{"id":21532916,"url":"https://github.com/githubtoolbox/github-extractor-package","last_synced_at":"2025-07-21T21:32:23.567Z","repository":{"id":241804815,"uuid":"805023522","full_name":"GitHubToolbox/github-extractor-package","owner":"GitHubToolbox","description":"Extract various information from the GitHub API.","archived":false,"fork":false,"pushed_at":"2025-06-23T05:48:22.000Z","size":232,"stargazers_count":1,"open_issues_count":4,"forks_count":0,"subscribers_count":0,"default_branch":"master","last_synced_at":"2025-06-23T06:32:08.531Z","etag":null,"topics":["github","pypi","pypi-package","python","python-package","wolfsoftware"],"latest_commit_sha":null,"homepage":"","language":"Python","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"mit","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/GitHubToolbox.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":".github/CONTRIBUTING.md","funding":".github/FUNDING.yml","license":"LICENSE.md","code_of_conduct":".github/CODE_OF_CONDUCT.md","threat_model":null,"audit":null,"citation":"CITATION.cff","codeowners":".github/CODEOWNERS","security":".github/SECURITY.md","support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null,"zenodo":null},"funding":{"github":["WolfSoftware","TGWolf"]}},"created_at":"2024-05-23T18:23:51.000Z","updated_at":"2025-06-23T05:47:17.000Z","dependencies_parsed_at":"2024-06-03T06:23:05.776Z","dependency_job_id":"93494fb2-2acb-4558-a368-5cbcbfa0d31a","html_url":"https://github.com/GitHubToolbox/github-extractor-package","commit_stats":null,"previous_names":["githubtoolbox/github-extractor-package"],"tags_count":3,"template":false,"template_full_name":null,"purl":"pkg:github/GitHubToolbox/github-extractor-package","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GitHubToolbox%2Fgithub-extractor-package","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GitHubToolbox%2Fgithub-extractor-package/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GitHubToolbox%2Fgithub-extractor-package/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GitHubToolbox%2Fgithub-extractor-package/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/GitHubToolbox","download_url":"https://codeload.github.com/GitHubToolbox/github-extractor-package/tar.gz/refs/heads/master","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GitHubToolbox%2Fgithub-extractor-package/sbom","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":266382558,"owners_count":23920732,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","status":"online","status_checked_at":"2025-07-21T11:47:31.412Z","response_time":64,"last_error":null,"robots_txt_status":null,"robots_txt_updated_at":null,"robots_txt_url":"https://github.com/robots.txt","online":true,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["github","pypi","pypi-package","python","python-package","wolfsoftware"],"created_at":"2024-11-24T02:22:50.765Z","updated_at":"2025-07-21T21:32:23.535Z","avatar_url":"https://github.com/GitHubToolbox.png","language":"Python","readme":"\u003c!-- markdownlint-disable --\u003e\n\u003cp align=\"center\"\u003e\n    \u003ca href=\"https://github.com/GitHubToolbox/\"\u003e\n        \u003cimg src=\"https://cdn.wolfsoftware.com/assets/images/github/organisations/githubtoolbox/black-and-white-circle-256.png\" alt=\"GitHubToolbox logo\" /\u003e\n    \u003c/a\u003e\n    \u003cbr /\u003e\n    \u003ca href=\"https://github.com/GitHubToolbox/github-extractor-package/actions/workflows/cicd.yml\"\u003e\n        \u003cimg src=\"https://img.shields.io/github/actions/workflow/status/GitHubToolbox/github-extractor-package/cicd.yml?branch=master\u0026label=build%20status\u0026style=for-the-badge\" alt=\"Github Build Status\" /\u003e\n    \u003c/a\u003e\n    \u003ca href=\"https://github.com/GitHubToolbox/github-extractor-package/blob/master/LICENSE.md\"\u003e\n        \u003cimg src=\"https://img.shields.io/github/license/GitHubToolbox/github-extractor-package?color=blue\u0026label=License\u0026style=for-the-badge\" alt=\"License\"\u003e\n    \u003c/a\u003e\n    \u003ca href=\"https://github.com/GitHubToolbox/github-extractor-package\"\u003e\n        \u003cimg src=\"https://img.shields.io/github/created-at/GitHubToolbox/github-extractor-package?color=blue\u0026label=Created\u0026style=for-the-badge\" alt=\"Created\"\u003e\n    \u003c/a\u003e\n    \u003cbr /\u003e\n    \u003ca href=\"https://github.com/GitHubToolbox/github-extractor-package/releases/latest\"\u003e\n        \u003cimg src=\"https://img.shields.io/github/v/release/GitHubToolbox/github-extractor-package?color=blue\u0026label=Latest%20Release\u0026style=for-the-badge\" alt=\"Release\"\u003e\n    \u003c/a\u003e\n    \u003ca href=\"https://github.com/GitHubToolbox/github-extractor-package/releases/latest\"\u003e\n        \u003cimg src=\"https://img.shields.io/github/release-date/GitHubToolbox/github-extractor-package?color=blue\u0026label=Released\u0026style=for-the-badge\" alt=\"Released\"\u003e\n    \u003c/a\u003e\n    \u003ca href=\"https://github.com/GitHubToolbox/github-extractor-package/releases/latest\"\u003e\n        \u003cimg src=\"https://img.shields.io/github/commits-since/GitHubToolbox/github-extractor-package/latest.svg?color=blue\u0026style=for-the-badge\" alt=\"Commits since release\"\u003e\n    \u003c/a\u003e\n    \u003cbr /\u003e\n    \u003ca href=\"https://github.com/GitHubToolbox/github-extractor-package/blob/master/.github/CODE_OF_CONDUCT.md\"\u003e\n        \u003cimg src=\"https://img.shields.io/badge/Code%20of%20Conduct-blue?style=for-the-badge\" /\u003e\n    \u003c/a\u003e\n    \u003ca href=\"https://github.com/GitHubToolbox/github-extractor-package/blob/master/.github/CONTRIBUTING.md\"\u003e\n        \u003cimg src=\"https://img.shields.io/badge/Contributing-blue?style=for-the-badge\" /\u003e\n    \u003c/a\u003e\n    \u003ca href=\"https://github.com/GitHubToolbox/github-extractor-package/blob/master/.github/SECURITY.md\"\u003e\n        \u003cimg src=\"https://img.shields.io/badge/Report%20Security%20Concern-blue?style=for-the-badge\" /\u003e\n    \u003c/a\u003e\n    \u003ca href=\"https://github.com/GitHubToolbox/github-extractor-package/issues\"\u003e\n        \u003cimg src=\"https://img.shields.io/badge/Get%20Support-blue?style=for-the-badge\" /\u003e\n    \u003c/a\u003e\n\u003c/p\u003e\n\n## Overview\n\nThe GitHub Extractor package is a Python library designed to facilitate the extraction of data from GitHub.\n\nThis package provides functions to fetch information about repositories, including languages used, releases, contributors, topics, workflows,\nand more with robust error handling and configuration support.\n\n## Features\n\n- List organizations for a user from GitHub.\n- List repositories for a user from GitHub.\n- List repositories for a specified organization from GitHub.\n- Support for authentication using GitHub API tokens.\n- Filtering of organizations and repositories based on given patterns.\n- Pagination handling for API requests.\n\n## Installation\n\nYou can install GitHub Extractor via pip:\n\n```bash\npip install wolfsoftware.github-extractor\n```\n\n## Usage\n\n### Getting Token information\n\nYou an get basic information relating to the given token.\n\nThere is also a specific command line tool for this [Github Token Validator](https://github.com/GitHubToolbox/github-token-validator).\n\n```python\nfrom wolfsoftware.github_extractor import get_token_information\n\nconfig = {\n    \"token\": \"your_github_token\",\n}\n```\n\n\u003cdetails\u003e\n\n\u003csummary\u003eParameters\u003c/summary\u003e\n\n| Name    | Required | Purpose                                                                    |\n| :------ | :------: | :------------------------------------------------------------------------- |\n| token   | Yes      | Authentication for the GitHub API.                                         |\n| timeout | No       | The timeout to use when talking to the GitHub API (default is 10 seconds). |\n| slugs   | No       | Should we return the results as slugs. (List of names and nothing else).   |\n\n\u003c/details\u003e\n\n### Getting User Information\n\nYou an get basic information relating to the authenticated user (owner of the token). The information will be limited by the scope\nof the token.\n\n```python\nfrom wolfsoftware.github_extractor import get_authenticated_user\n\nconfig = {\n    \"token\": \"your_github_token\",\n}\n```\n\n\u003cdetails\u003e\n\n\u003csummary\u003eParameters\u003c/summary\u003e\n\n| Name    | Required | Purpose                                                                    |\n| :------ | :------: | :------------------------------------------------------------------------- |\n| token   | Yes      | Authentication for the GitHub API.                                         |\n| timeout | No       | The timeout to use when talking to the GitHub API (default is 10 seconds). |\n| slugs   | No       | Should we return the results as slugs. (List of names and nothing else).   |\n\n\u003c/details\u003e\n\n### Listing Organizations\n\nYou can list organizations that you are a member of using British or American English spelling.\n\n```python\nfrom wolfsoftware.github_extractor import list_organisations, list_organizations\n\nconfig = {\n    \"token\": \"your_github_token\",\n    \"ignore_orgs\": [\"Test*\"]\n}\n\n# Using British English spelling\norganisations = list_organisations(config)\n\n# Using American English spelling\norganisations_us = list_organizations(config)\n```\n\n\u003cdetails\u003e\n\n\u003csummary\u003eParameters\u003c/summary\u003e\n\n| Name    | Required | Purpose                                                                    |\n| :------ | :------: | :------------------------------------------------------------------------- |\n| token   | Yes      | Authentication for the GitHub API.                                         |\n| timeout | No       | The timeout to use when talking to the GitHub API (default is 10 seconds). |\n| slugs   | No       | Should we return the results as slugs. (List of names and nothing else).   |\n\n\u003c/details\u003e\n\n\u003cdetails\u003e\n\n\u003csummary\u003eFiltering Parameters\u003c/summary\u003e\n\n| Name         | Required | Purpose                                                   |\n| :----------- | :------: | :-------------------------------------------------------- |\n| include_orgs | No       | A list of organisation names to include in the results.   |\n| ignore_orgs  | No       | A list of organisation names to exclude from the results. |\n| get_members  | No       | Should we include organisation members in the results.    |\n\n\u003c/details\u003e\n\n### Listing User Repositories\n\nYou can list repositories for a user with optional filters:\n\n```python\nfrom wolfsoftware.github_extractor import list_user_repositories\n\nconfig = {\n    \"token\": \"your_github_token\",\n    \"ignore_repos\": [\"Test*\"],\n    \"include_repos\": [\"Project*\"]\n}\n\nrepositories = list_user_repositories(config)\n```\n\n\u003cdetails\u003e\n\n\u003csummary\u003eParameters\u003c/summary\u003e\n\n| Name          | Required | Purpose                                                                                                  |\n| :------------ | :------: | :------------------------------------------------------------------------------------------------------- |\n| token         | No       | Authentication for the GitHub API.                                                                       |\n| timeout       | No       | The timeout to use when talking to the GitHub API (default is 10 seconds).                               |\n| slugs         | No       | Should we return the results as slugs. (List of names and nothing else).                                 |\n| username      | No       | The GitHub username to list repositories for. (Authenticated user will be used is this is not supplied). |\n\n\u003c/details\u003e\n\n\u003cdetails\u003e\n\n\u003csummary\u003eAdditional Data Parameter\u003c/summary\u003e\n\n| Name             | Required | Purpose                                                   |\n| :--------------- | :------: | :-------------------------------------------------------- |\n| get_branches     | No       | Add details about all branches to each repository.        |\n| get_contributors | No       | Add details about all contributors to each repository.    |\n| get_languages    | No       | Add the list of identified languages for each repository. |\n| get_releases     | No       | Add details about all releases to each repository.        |\n| get_tags         | No       | Add details about all tags to each  repository.           |\n| get_topics       | No       | Add the list of defined topics to each repository.        |\n| get_workflows    | No       | Add details about all workflows to each repository.       |\n\n\u003c/details\u003e\n\n\u003cdetails\u003e\n\n\u003csummary\u003eFiltering Parameter\u003c/summary\u003e\n\n| Name          | Required | Purpose                                                                       |\n| :------------ | :------: | :---------------------------------------------------------------------------- |\n| include_names | No       | A list of repository names to include in the results.                         |\n| ignore_names  | No       | A list of repository names to exclude from the results.                       |\n| include_repos | No       | A list of organisation names/repository names to include in the results.      |\n| ignore_repos  | No       | A list of organisation names/repository names to exclude from the results.    |\n| skip_private  | No       | Do not include private repositories, this is for the authenticated user only. |\n\n\u003e ignore and include names use the full name of the repository, which is the organisation name / repository name E.g. GitHubToolbox/github-extractor-package\n\n\u003c/details\u003e\n\n### Listing Repositories by Organization\n\nYou can list repositories for a specific organization with optional filters:\n\n```python\nfrom wolfsoftware.github_extractor import list_repositories_by_org\n\nconfig = {\n    \"token\": \"your_github_token\",\n    \"org_name\": \"your_organization\",\n    \"ignore_repos\": [\"Test*\"],\n    \"include_repos\": [\"Project*\"]\n}\n\nrepositories = list_repositories_by_org(config)\n```\n\n\u003cdetails\u003e\n\n\u003csummary\u003eParameters\u003c/summary\u003e\n\n| Name     | Required | Purpose                                                                    |\n| :------- | :------: | :------------------------------------------------------------------------- |\n| token    | No       | Authentication for the GitHub API.                                         |\n| timeout  | No       | The timeout to use when talking to the GitHub API (default is 10 seconds). |\n| slugs    | No       | Should we return the results as slugs. (List of names and nothing else).   |\n| org_name | No       | The GitHub organisation to list repositories for.                          |\n\n\u003c/details\u003e\n\n\u003cdetails\u003e\n\n\u003csummary\u003eAdditional Data Parameter\u003c/summary\u003e\n\n| Name             | Required | Purpose                                                   |\n| :--------------- | :------: | :-------------------------------------------------------- |\n| get_branches     | No       | Add details about all branches to each repository.        |\n| get_contributors | No       | Add details about all contributors to each repository.    |\n| get_languages    | No       | Add the list of identified languages for each repository. |\n| get_releases     | No       | Add details about all releases to each repository.        |\n| get_tags         | No       | Add details about all tags to each  repository.           |\n| get_topics       | No       | Add the list of defined topics to each repository.        |\n| get_workflows    | No       | Add details about all workflows to each repository.       |\n\n\u003c/details\u003e\n\n\u003cdetails\u003e\n\n\u003csummary\u003eFiltering Parameter\u003c/summary\u003e\n\n| Name          | Required | Purpose                                                                       |\n| :------------ | :------: | :---------------------------------------------------------------------------- |\n| include_names | No       | A list of repository names to include in the results.                         |\n| ignore_names  | No       | A list of repository names to exclude from the results.                       |\n| include_repos | No       | A list of organisation names/repository names to include in the results.      |\n| ignore_repos  | No       | A list of organisation names/repository names to exclude from the results.    |\n| skip_private  | No       | Do not include private repositories, this is for the authenticated user only. |\n\n\u003e ignore and include names use the full name of the repository, which is the organisation name / repository name E.g. GitHubToolbox/github-extractor-package\n\n\u003c/details\u003e\n\n### Listing all Organisation Repositories\n\nYou can list all repositories for all organisations you're a member of.\n\n```python\nfrom wolfsoftware.github_extractor import list_all_org_repositories\n\nconfig = {\n    \"token\": \"your_github_token\",\n    \"ignore_repos\": [\"Test*\"],\n    \"include_repos\": [\"Project*\"]\n}\n\nrepositories = list_all_org_repositories(config)\n```\n\n\u003cdetails\u003e\n\n\u003csummary\u003eParameters\u003c/summary\u003e\n\n| Name          | Required | Purpose                                                                                                  |\n| :------------ | :------: | :------------------------------------------------------------------------------------------------------- |\n| token         | Yes      | Authentication for the GitHub API.                                                                       |\n| timeout       | No       | The timeout to use when talking to the GitHub API (default is 10 seconds).                               |\n| slugs         | No       | Should we return the results as slugs. (List of names and nothing else).                                 |\n\n\u003c/details\u003e\n\n\u003cdetails\u003e\n\n\u003csummary\u003eAdditional Data Parameter\u003c/summary\u003e\n\n| Name             | Required | Purpose                                                   |\n| :--------------- | :------: | :-------------------------------------------------------- |\n| get_branches     | No       | Add details about all branches to each repository.        |\n| get_contributors | No       | Add details about all contributors to each repository.    |\n| get_languages    | No       | Add the list of identified languages for each repository. |\n| get_releases     | No       | Add details about all releases to each repository.        |\n| get_tags         | No       | Add details about all tags to each  repository.           |\n| get_topics       | No       | Add the list of defined topics to each repository.        |\n| get_workflows    | No       | Add details about all workflows to each repository.       |\n\n\u003c/details\u003e\n\n\u003cdetails\u003e\n\n\u003csummary\u003eFiltering Parameter\u003c/summary\u003e\n\n| Name          | Required | Purpose                                                                       |\n| :------------ | :------: | :---------------------------------------------------------------------------- |\n| include_names | No       | A list of repository names to include in the results.                         |\n| ignore_names  | No       | A list of repository names to exclude from the results.                       |\n| include_repos | No       | A list of organisation names/repository names to include in the results.      |\n| ignore_repos  | No       | A list of organisation names/repository names to exclude from the results.    |\n| skip_private  | No       | Do not include private repositories, this is for the authenticated user only. |\n\n\u003e ignore and include names use the full name of the repository, which is the organisation name / repository name E.g. GitHubToolbox/github-extractor-package\n\n\u003c/details\u003e\n\n### Listing all Visible Repositories\n\nYou can list repositories that you are able to access.\n\n```python\nfrom wolfsoftware.github_extractor import list_all_visible_repositories\n\nconfig = {\n    \"token\": \"your_github_token\",\n    \"ignore_repos\": [\"Test*\"],\n    \"include_repos\": [\"Project*\"]\n}\n\nrepositories = list_all_visible_repositories(config)\n```\n\n\u003cdetails\u003e\n\n\u003csummary\u003eParameters\u003c/summary\u003e\n\n| Name          | Required | Purpose                                                                                                  |\n| :------------ | :------: | :------------------------------------------------------------------------------------------------------- |\n| token         | Yes      | Authentication for the GitHub API.                                                                       |\n| timeout       | No       | The timeout to use when talking to the GitHub API (default is 10 seconds).                               |\n| slugs         | No       | Should we return the results as slugs. (List of names and nothing else).                                 |\n\n\u003c/details\u003e\n\n\u003cdetails\u003e\n\n\u003csummary\u003eAdditional Data Parameter\u003c/summary\u003e\n\n| Name             | Required | Purpose                                                   |\n| :--------------- | :------: | :-------------------------------------------------------- |\n| get_branches     | No       | Add details about all branches to each repository.        |\n| get_contributors | No       | Add details about all contributors to each repository.    |\n| get_languages    | No       | Add the list of identified languages for each repository. |\n| get_releases     | No       | Add details about all releases to each repository.        |\n| get_tags         | No       | Add details about all tags to each  repository.           |\n| get_topics       | No       | Add the list of defined topics to each repository.        |\n| get_workflows    | No       | Add details about all workflows to each repository.       |\n\n\u003c/details\u003e\n\n\u003cdetails\u003e\n\n\u003csummary\u003eFiltering Parameter\u003c/summary\u003e\n\n| Name          | Required | Purpose                                                                       |\n| :------------ | :------: | :---------------------------------------------------------------------------- |\n| include_names | No       | A list of repository names to include in the results.                         |\n| ignore_names  | No       | A list of repository names to exclude from the results.                       |\n| include_repos | No       | A list of organisation names/repository names to include in the results.      |\n| ignore_repos  | No       | A list of organisation names/repository names to exclude from the results.    |\n| skip_private  | No       | Do not include private repositories, this is for the authenticated user only. |\n\n\u003e ignore and include names use the full name of the repository, which is the organisation name / repository name E.g. GitHubToolbox/github-extractor-package\n\n\u003c/details\u003e\n\n### Exceptions\n\nThe following custom exceptions are used:\n\n| Name                   | Purpose                                                                                        |\n| :--------------------- | :--------------------------------------------------------------------------------------------- |\n| AuthenticationError    | Raised when authentication fails. This is caused by an invalid token.                          |\n| MissingOrgNameError    | Raised when the organization name is missing.                                                  |\n| MissingTokenError      | Raised when the GitHub API token is missing but is required.                                   |\n| NotFoundError          | Raised when a requested resource is not found. This is caused by incorrect scope of the token. |\n| RateLimitExceededError | Raised when the GitHub API rate limit is exceeded.                                             |\n| RequestError           | Raised for general request errors.                                                             |\n| RequestTimeoutError    | Raised when a request times out.                                                               |\n\n\u003cbr /\u003e\n\u003cp align=\"right\"\u003e\u003ca href=\"https://wolfsoftware.com/\"\u003e\u003cimg src=\"https://img.shields.io/badge/Created%20by%20Wolf%20on%20behalf%20of%20Wolf%20Software-blue?style=for-the-badge\" /\u003e\u003c/a\u003e\u003c/p\u003e\n","funding_links":["https://github.com/sponsors/WolfSoftware","https://github.com/sponsors/TGWolf"],"categories":[],"sub_categories":[],"project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fgithubtoolbox%2Fgithub-extractor-package","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fgithubtoolbox%2Fgithub-extractor-package","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fgithubtoolbox%2Fgithub-extractor-package/lists"}