{"id":24901378,"url":"https://github.com/rhecosystemappeng/populate-vectors-pipeline","last_synced_at":"2025-03-27T18:18:48.611Z","repository":{"id":274613597,"uuid":"923464620","full_name":"RHEcosystemAppEng/populate-vectors-pipeline","owner":"RHEcosystemAppEng","description":"Populate vectors to Vector DB from 3 different sources, S3 bucket, code repository, and list of URLs","archived":false,"fork":false,"pushed_at":"2025-01-28T10:38:51.000Z","size":7,"stargazers_count":0,"open_issues_count":0,"forks_count":0,"subscribers_count":4,"default_branch":"main","last_synced_at":"2025-01-28T11:33:47.743Z","etag":null,"topics":[],"latest_commit_sha":null,"homepage":null,"language":"Python","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":null,"status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/RHEcosystemAppEng.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":null,"code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null}},"created_at":"2025-01-28T09:46:10.000Z","updated_at":"2025-01-28T10:38:54.000Z","dependencies_parsed_at":"2025-01-28T11:43:53.011Z","dependency_job_id":null,"html_url":"https://github.com/RHEcosystemAppEng/populate-vectors-pipeline","commit_stats":null,"previous_names":["rhecosystemappeng/populate-vectors-pipeline"],"tags_count":0,"template":false,"template_full_name":null,"repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/RHEcosystemAppEng%2Fpopulate-vectors-pipeline","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/RHEcosystemAppEng%2Fpopulate-vectors-pipeline/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/RHEcosystemAppEng%2Fpopulate-vectors-pipeline/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/RHEcosystemAppEng%2Fpopulate-vectors-pipeline/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/RHEcosystemAppEng","download_url":"https://codeload.github.com/RHEcosystemAppEng/populate-vectors-pipeline/tar.gz/refs/heads/main","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":245898318,"owners_count":20690466,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":[],"created_at":"2025-02-01T21:16:01.487Z","updated_at":"2025-03-27T18:18:48.583Z","avatar_url":"https://github.com/RHEcosystemAppEng.png","language":"Python","funding_links":[],"categories":[],"sub_categories":[],"readme":"# Populate Vectors Pipeline\n\nThis repo compiled a pipeline.yaml that populates vectors from 3 different sources by the user choice:\n* S3 Bucket\n* Code Repository\n* List of URLs\n\nCurrently, the repository only supports processing PDFs. However, it can be extended to handle other data types as needed.\n\n\n### Upload the pipeline as a job\nIf you want to upload the complied pipeline using a job this can be done using this repo: [ml-pipeline-importer-runner](https://github.com/RHEcosystemAppEng/ml-pipeline-importer-runner)\n\n### How to execute\n`pip install -r requirements.txt`\n`python3 ./main.py`\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Frhecosystemappeng%2Fpopulate-vectors-pipeline","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Frhecosystemappeng%2Fpopulate-vectors-pipeline","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Frhecosystemappeng%2Fpopulate-vectors-pipeline/lists"}