{"id":24551971,"url":"https://github.com/mlabs-haskell/wikipedia_parser","last_synced_at":"2025-03-16T13:41:34.072Z","repository":{"id":221465652,"uuid":"725422023","full_name":"mlabs-haskell/wikipedia_parser","owner":"mlabs-haskell","description":null,"archived":false,"fork":false,"pushed_at":"2025-02-03T01:51:10.000Z","size":625,"stargazers_count":0,"open_issues_count":0,"forks_count":0,"subscribers_count":4,"default_branch":"staging","last_synced_at":"2025-02-03T02:30:07.077Z","etag":null,"topics":[],"latest_commit_sha":null,"homepage":null,"language":"Rust","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":null,"status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/mlabs-haskell.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":null,"code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null}},"created_at":"2023-11-30T05:23:51.000Z","updated_at":"2025-02-03T01:51:13.000Z","dependencies_parsed_at":"2024-03-07T05:28:31.691Z","dependency_job_id":"06673e40-0cc5-4c27-8626-b9e56a710910","html_url":"https://github.com/mlabs-haskell/wikipedia_parser","commit_stats":null,"previous_names":["mlabs-haskell/wikipedia_parser"],"tags_count":0,"template":false,"template_full_name":null,"repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/mlabs-haskell%2Fwikipedia_parser","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/mlabs-haskell%2Fwikipedia_parser/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/mlabs-haskell%2Fwikipedia_parser/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/mlabs-haskell%2Fwikipedia_parser/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/mlabs-haskell","download_url":"https://codeload.github.com/mlabs-haskell/wikipedia_parser/tar.gz/refs/heads/staging","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":243878408,"owners_count":20362431,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":[],"created_at":"2025-01-23T01:19:36.409Z","updated_at":"2025-03-16T13:41:34.067Z","avatar_url":"https://github.com/mlabs-haskell.png","language":"Rust","funding_links":[],"categories":[],"sub_categories":[],"readme":"# Wikipedia Parser\nThis is a tool for processing Wikipedia articles and extracting important information from them as plaintext.\n\n## Instructions for use\n1. Run `./download.sh` to download all of Wikipedia as a single xml file. This will likely take a long time.\n2. Install Rust on your system via [these instructions](https://rustup.rs/).\n3. Install `just` with `cargo install just`.\n4. Run `just extract-links` to create the graph of Wikipedia.\n5. Run `just extract-contents` to parse the contents of the Wikipedia articles. This will also take a long time\n6. Run `just extract-subgraph \u003croot article\u003e \u003cdegrees of separation\u003e` to produce the list of all articles within `degrees of separation` of the root. For instance, if I wanted to find all articles within 5 degrees of separation from the article for RNA, I would run `just extract-subgraph RNA 5`.\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fmlabs-haskell%2Fwikipedia_parser","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fmlabs-haskell%2Fwikipedia_parser","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fmlabs-haskell%2Fwikipedia_parser/lists"}