{"id":18106907,"url":"https://github.com/georgegkonis/spark-decentralized-query-processing","last_synced_at":"2026-05-10T03:55:19.610Z","repository":{"id":259518200,"uuid":"861222447","full_name":"georgegkonis/spark-decentralized-query-processing","owner":"georgegkonis","description":"Project for the academic course \"Decentralized Data Technologies\"","archived":false,"fork":false,"pushed_at":"2024-10-25T09:16:25.000Z","size":251,"stargazers_count":1,"open_issues_count":0,"forks_count":0,"subscribers_count":1,"default_branch":"master","last_synced_at":"2025-02-12T12:27:19.951Z","etag":null,"topics":["big-data","decentralized-data","jupyter","python","query-optimization","spark"],"latest_commit_sha":null,"homepage":"https://www.ceid.upatras.gr/en/course/decentralized-data-technologies/","language":"Jupyter Notebook","has_issues":false,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"mit","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/georgegkonis.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":null,"code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null}},"created_at":"2024-09-22T10:39:31.000Z","updated_at":"2024-10-25T09:16:28.000Z","dependencies_parsed_at":"2024-10-26T06:23:22.728Z","dependency_job_id":"0847b5ec-f0da-47dc-8145-6c5514888710","html_url":"https://github.com/georgegkonis/spark-decentralized-query-processing","commit_stats":null,"previous_names":["georgegkonis/spark-decentralized-query-processing"],"tags_count":0,"template":false,"template_full_name":null,"repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/georgegkonis%2Fspark-decentralized-query-processing","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/georgegkonis%2Fspark-decentralized-query-processing/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/georgegkonis%2Fspark-decentralized-query-processing/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/georgegkonis%2Fspark-decentralized-query-processing/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/georgegkonis","download_url":"https://codeload.github.com/georgegkonis/spark-decentralized-query-processing/tar.gz/refs/heads/master","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":247445649,"owners_count":20939953,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["big-data","decentralized-data","jupyter","python","query-optimization","spark"],"created_at":"2024-10-31T23:08:29.230Z","updated_at":"2026-05-10T03:55:14.563Z","avatar_url":"https://github.com/georgegkonis.png","language":"Jupyter Notebook","funding_links":[],"categories":[],"sub_categories":[],"readme":"# Spark Decentralized Query Processing \u0026 Optimization\n\nThis project is part of the [Decentralized Data Technologies](https://www.ceid.upatras.gr/en/course/decentralized-data-technologies/) course offered by\nthe [Department of Computer Engineering \u0026 Informatics](https://www.ceid.upatras.gr/en/) at\nthe [University of Patras](https://www.upatras.gr/en/). The objective of the project is to design and implement a\ndecentralized query processing and optimization system built on top of Apache Spark. The system will efficiently\ndistribute query plans across multiple nodes, enabling parallel execution and improving performance in a decentralized\nmanner. Additionally, the system will include query optimization capabilities by pushing down filters and projections to\ndata sources, thereby minimizing data transfer and enhancing overall query efficiency.\n\n## Installation\n\nTo set up the environment for this project, follow these steps:\n\n1. Ensure you have `conda` installed. If not, you can download and install it\n   from [here](https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html).\n\n2. Navigate to the project directory.\n\n3. Create the environment using the `environment.yml` file:\n    ```sh\n    conda env create -f environment.yml\n    ```\n\n4. Activate the environment:\n    ```sh\n    conda activate spark-env\n    ```\n\n5. Start Jupyter Notebook:\n    ```sh\n    jupyter notebook\n    ```\n\nYou should now be able to run the notebooks and scripts in this project.\n\n## Contributing\n\nDue to the academic nature of this project, contributions are not accepted.\n\n## License\n\nThis project is licensed under the MIT License - see the `LICENSE` file for details.\n\n## Authors\n\n- [George Gkonis](https://github.com/georgegkonis)\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fgeorgegkonis%2Fspark-decentralized-query-processing","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fgeorgegkonis%2Fspark-decentralized-query-processing","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fgeorgegkonis%2Fspark-decentralized-query-processing/lists"}