{"id":24946392,"url":"https://github.com/machinezone/sparkml-par","last_synced_at":"2025-03-28T19:14:32.937Z","repository":{"id":76807829,"uuid":"139511281","full_name":"machinezone/SparkML-Par","owner":"machinezone","description":null,"archived":false,"fork":false,"pushed_at":"2020-10-13T08:54:10.000Z","size":24,"stargazers_count":3,"open_issues_count":1,"forks_count":1,"subscribers_count":3,"default_branch":"master","last_synced_at":"2025-02-02T20:28:18.707Z","etag":null,"topics":[],"latest_commit_sha":null,"homepage":null,"language":"Scala","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"bsd-3-clause","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/machinezone.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null}},"created_at":"2018-07-03T01:10:24.000Z","updated_at":"2020-04-24T03:52:52.000Z","dependencies_parsed_at":"2023-07-08T00:32:12.582Z","dependency_job_id":null,"html_url":"https://github.com/machinezone/SparkML-Par","commit_stats":null,"previous_names":[],"tags_count":1,"template":false,"template_full_name":null,"repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/machinezone%2FSparkML-Par","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/machinezone%2FSparkML-Par/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/machinezone%2FSparkML-Par/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/machinezone%2FSparkML-Par/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/machinezone","download_url":"https://codeload.github.com/machinezone/SparkML-Par/tar.gz/refs/heads/master","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":246085638,"owners_count":20721212,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":[],"created_at":"2025-02-02T20:24:20.017Z","updated_at":"2025-03-28T19:14:32.932Z","avatar_url":"https://github.com/machinezone.png","language":"Scala","readme":"\n# SparkML-Par\n\nParallel implementation of SparkML transformers and estimators.\n\n# Motivation\n\nThis library extends SparkML to allow for parallel transformation of input datasets.\nThat is to transform multiple columns in parallel using the same set of transformations\none would normally need to apply in sequence.\n\n# Development\n\nClone this repository and run `mvn clean test`\n\nTo build for a custom version of Spark/Scala, run \n`mvn clean compile \\\n-Dscala.major.version=\u003cSCALA_MAJOR\u003e \\\n-Dscala.minor.version=\u003cSCALA_MINOR\u003e \\\n-Dspark.version=\u003cSPARK_VERSION\u003e`\n\ne.g. \n```bash\nmvn clean package \\\n-Dscala.major.version=2.11 \\\n-Dscala.minor.version=2.11.8 \\\n-Dspark.version=2.3.0\n```\n\n## build profiles\n\nAlternatively one can build against a limited number of pre-defined profiles.\nSee the [pom](pom.xml) for a list of the profiles.\n\nExample build with profiles: \n\n`mvn clean package -Pspark_2.3,scala_2.11`\n\n`mvn clean package -Pspark_2.0,scala_2.10`\n\n# Support\n\nHere is a handy table of supported build version combinations:\n\n| Apache Spark | Scala |\n|:------------:|:-----:|\n| 2.0.x        | 2.10  |\n| 2.0.x        | 2.11  | \n| 2.1.x        | 2.10  |\n| 2.1.x        | 2.11  |\n| 2.2.x        | 2.10  |\n| 2.2.x        | 2.11  |\n| 2.3.x        | 2.11  |\n\n# License\n\nsee the [license](LICENSE) for license information.\n","funding_links":[],"categories":[],"sub_categories":[],"project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fmachinezone%2Fsparkml-par","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fmachinezone%2Fsparkml-par","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fmachinezone%2Fsparkml-par/lists"}