{"id":22703777,"url":"https://github.com/ul-mds/gecko-data","last_synced_at":"2026-04-16T08:31:10.799Z","repository":{"id":227424591,"uuid":"770933219","full_name":"ul-mds/gecko-data","owner":"ul-mds","description":"Example data sources as a starting point for working with Gecko","archived":false,"fork":false,"pushed_at":"2024-03-14T14:10:48.000Z","size":4884,"stargazers_count":1,"open_issues_count":0,"forks_count":0,"subscribers_count":0,"default_branch":"main","last_synced_at":"2025-03-29T20:11:32.014Z","etag":null,"topics":["data-science","numpy","pandas","python","record-linkage"],"latest_commit_sha":null,"homepage":"https://ul-mds.github.io/gecko/","language":"Jupyter Notebook","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"mit","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/ul-mds.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null}},"created_at":"2024-03-12T12:13:25.000Z","updated_at":"2024-03-22T13:32:21.000Z","dependencies_parsed_at":"2025-02-04T20:41:36.616Z","dependency_job_id":"96f0e048-df1c-48a6-a653-ba43173735b6","html_url":"https://github.com/ul-mds/gecko-data","commit_stats":null,"previous_names":["ul-mds/gecko-data"],"tags_count":0,"template":false,"template_full_name":null,"purl":"pkg:github/ul-mds/gecko-data","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/ul-mds%2Fgecko-data","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/ul-mds%2Fgecko-data/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/ul-mds%2Fgecko-data/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/ul-mds%2Fgecko-data/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/ul-mds","download_url":"https://codeload.github.com/ul-mds/gecko-data/tar.gz/refs/heads/main","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/ul-mds%2Fgecko-data/sbom","scorecard":null,"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":286080680,"owners_count":31877426,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2026-04-16T07:36:03.521Z","status":"ssl_error","status_checked_at":"2026-04-16T07:35:53.576Z","response_time":69,"last_error":"SSL_connect returned=1 errno=0 peeraddr=140.82.121.5:443 state=error: unexpected eof while reading","robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":false,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["data-science","numpy","pandas","python","record-linkage"],"created_at":"2024-12-10T08:12:35.215Z","updated_at":"2026-04-16T08:31:10.783Z","avatar_url":"https://github.com/ul-mds.png","language":"Jupyter Notebook","funding_links":[],"categories":[],"sub_categories":[],"readme":"This repository contains example data for use with [Gecko](https://github.com/ul-mds/gecko) \u0026mdash; a Python library for generation and mutation of realistic data at scale.\nAll data present in this repository is collected from public data sources.\nFeel free to use it to test Gecko's capabilities.\n\n# Usage\n\nClone this repository to an easy-to-find location.\n\n```bash\ngit clone https://github.com/ul-mds/gecko-data.git\n```\n\nYou can now use the data in this repository in your Gecko scripts.\n\n# Structure\n\nSubdirectories are locales which identify the language and country that the data contained within pertains to.\nCurrently, there exists data for the following locales:\n\n- [English (United States)](./en_US/)\n- [German (Germany)](./de_DE/)\n\nThere is also a [directory called \"common\"](./common/) which contains data independent of language and country.\n\nEach subdirectory contains a README file which lists the sources and additional pre-processing steps to make the data easier to work with.\nIf you intend to publish data generated with data from this repository, please take care to cite the correct sources from these README files.\n\n# License\n\nGecko is released under the MIT License.\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Ful-mds%2Fgecko-data","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Ful-mds%2Fgecko-data","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Ful-mds%2Fgecko-data/lists"}