{"id":13704163,"url":"https://github.com/openvax/pyensembl","last_synced_at":"2025-12-27T00:06:07.425Z","repository":{"id":16021990,"uuid":"18765629","full_name":"openvax/pyensembl","owner":"openvax","description":"Python interface to access reference genome features (such as genes, transcripts, and exons) from Ensembl ","archived":false,"fork":false,"pushed_at":"2025-08-13T10:55:38.000Z","size":1005,"stargazers_count":397,"open_issues_count":75,"forks_count":70,"subscribers_count":20,"default_branch":"main","last_synced_at":"2025-11-27T09:16:23.890Z","etag":null,"topics":[],"latest_commit_sha":null,"homepage":"","language":"Python","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"apache-2.0","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/openvax.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":"CONTRIBUTING.md","funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null}},"created_at":"2014-04-14T15:23:02.000Z","updated_at":"2025-11-25T18:03:46.000Z","dependencies_parsed_at":"2023-01-13T18:40:06.255Z","dependency_job_id":"6c48b3f1-1cdd-4103-aee2-da5092bac9bf","html_url":"https://github.com/openvax/pyensembl","commit_stats":{"total_commits":467,"total_committers":22,"mean_commits":"21.227272727272727","dds":"0.19700214132762317","last_synced_commit":"82a8bdbb248efe78591605b4e14282e293f4ce7c"},"previous_names":[],"tags_count":13,"template":false,"template_full_name":null,"purl":"pkg:github/openvax/pyensembl","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/openvax%2Fpyensembl","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/openvax%2Fpyensembl/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/openvax%2Fpyensembl/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/openvax%2Fpyensembl/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/openvax","download_url":"https://codeload.github.com/openvax/pyensembl/tar.gz/refs/heads/main","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/openvax%2Fpyensembl/sbom","scorecard":{"id":710708,"data":{"date":"2025-08-11","repo":{"name":"github.com/openvax/pyensembl","commit":"e5ba40daaf370fc6ce8c440c071ceecbcfc36f60"},"scorecard":{"version":"v5.2.1-40-gf6ed084d","commit":"f6ed084d17c9236477efd66e5b258b9d4cc7b389"},"score":4.2,"checks":[{"name":"Packaging","score":-1,"reason":"packaging workflow not detected","details":["Warn: no GitHub/GitLab publishing workflow detected."],"documentation":{"short":"Determines if the project is published as a package that others can easily download, install, easily update, and uninstall.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#packaging"}},{"name":"Code-Review","score":1,"reason":"Found 3/30 approved changesets -- score normalized to 1","details":null,"documentation":{"short":"Determines if the project requires human code review before pull requests (aka merge requests) are merged.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#code-review"}},{"name":"Maintained","score":3,"reason":"4 commit(s) and 0 issue activity found in the last 90 days -- score normalized to 3","details":null,"documentation":{"short":"Determines if the project is \"actively maintained\".","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#maintained"}},{"name":"Token-Permissions","score":0,"reason":"detected GitHub workflow tokens with excessive permissions","details":["Warn: no topLevel permission defined: .github/workflows/tests.yml:1","Info: no jobLevel write permissions found"],"documentation":{"short":"Determines if the project's workflows follow the principle of least privilege.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#token-permissions"}},{"name":"Dangerous-Workflow","score":10,"reason":"no dangerous workflow patterns detected","details":null,"documentation":{"short":"Determines if the project's GitHub Action workflows avoid dangerous patterns.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#dangerous-workflow"}},{"name":"CII-Best-Practices","score":0,"reason":"no effort to earn an OpenSSF best practices badge detected","details":null,"documentation":{"short":"Determines if the project has an OpenSSF (formerly CII) Best Practices Badge.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#cii-best-practices"}},{"name":"Binary-Artifacts","score":10,"reason":"no binaries found in the repo","details":null,"documentation":{"short":"Determines if the project has generated executable (binary) artifacts in the source repository.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#binary-artifacts"}},{"name":"Pinned-Dependencies","score":0,"reason":"dependency not pinned by hash detected -- score normalized to 0","details":["Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/tests.yml:22: update your workflow using https://app.stepsecurity.io/secureworkflow/openvax/pyensembl/tests.yml/main?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/tests.yml:24: update your workflow using https://app.stepsecurity.io/secureworkflow/openvax/pyensembl/tests.yml/main?enable=pin","Warn: third-party GitHubAction not pinned by hash: .github/workflows/tests.yml:54: update your workflow using https://app.stepsecurity.io/secureworkflow/openvax/pyensembl/tests.yml/main?enable=pin","Warn: pipCommand not pinned by hash: deploy.sh:3","Warn: pipCommand not pinned by hash: deploy.sh:4","Warn: pipCommand not pinned by hash: develop.sh:3","Warn: pipCommand not pinned by hash: .github/workflows/tests.yml:30","Warn: pipCommand not pinned by hash: .github/workflows/tests.yml:31","Warn: pipCommand not pinned by hash: .github/workflows/tests.yml:32","Warn: pipCommand not pinned by hash: .github/workflows/tests.yml:33","Info:   0 out of   2 GitHub-owned GitHubAction dependencies pinned","Info:   0 out of   1 third-party GitHubAction dependencies pinned","Info:   0 out of   7 pipCommand dependencies pinned"],"documentation":{"short":"Determines if the project has declared and pinned the dependencies of its build process.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#pinned-dependencies"}},{"name":"Security-Policy","score":0,"reason":"security policy file not detected","details":["Warn: no security policy file detected","Warn: no security file to analyze","Warn: no security file to analyze","Warn: no security file to analyze"],"documentation":{"short":"Determines if the project has published a security policy.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#security-policy"}},{"name":"Fuzzing","score":0,"reason":"project is not fuzzed","details":["Warn: no fuzzer integrations found"],"documentation":{"short":"Determines if the project uses fuzzing.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#fuzzing"}},{"name":"License","score":10,"reason":"license file detected","details":["Info: project has a license file: LICENSE:0","Info: FSF or OSI recognized license: Apache License 2.0: LICENSE:0"],"documentation":{"short":"Determines if the project has defined a license.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#license"}},{"name":"Vulnerabilities","score":10,"reason":"0 existing vulnerabilities detected","details":null,"documentation":{"short":"Determines if the project has open, known unfixed vulnerabilities.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#vulnerabilities"}},{"name":"Signed-Releases","score":-1,"reason":"no releases found","details":null,"documentation":{"short":"Determines if the project cryptographically signs release artifacts.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#signed-releases"}},{"name":"Branch-Protection","score":-1,"reason":"internal error: error during branchesHandler.setup: internal error: githubv4.Query: Resource not accessible by integration","details":null,"documentation":{"short":"Determines if the default and release branches are protected with GitHub's branch protection settings.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#branch-protection"}},{"name":"SAST","score":0,"reason":"SAST tool is not run on all commits -- score normalized to 0","details":["Warn: 0 commits out of 5 are checked with a SAST tool"],"documentation":{"short":"Determines if the project uses static code analysis.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#sast"}}]},"last_synced_at":"2025-08-22T08:08:44.283Z","repository_id":16021990,"created_at":"2025-08-22T08:08:44.283Z","updated_at":"2025-08-22T08:08:44.283Z"},"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":286080680,"owners_count":28065691,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","status":"online","status_checked_at":"2025-12-26T02:00:06.189Z","response_time":55,"last_error":null,"robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":true,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":[],"created_at":"2024-08-02T21:01:05.070Z","updated_at":"2025-12-27T00:06:07.403Z","avatar_url":"https://github.com/openvax.png","language":"Python","funding_links":[],"categories":["Next Generation Sequencing","زیست شناسی و بیوتکنولوژی"],"sub_categories":["Python Modules","کار با زمان و تقویم"],"readme":"[![Tests](https://github.com/openvax/pyensembl/actions/workflows/tests.yml/badge.svg)](https://github.com/openvax/pyensembl/actions/workflows/tests.yml)\n[![Coverage Status](https://coveralls.io/repos/github/openvax/pyensembl/badge.svg?branch=main)](https://coveralls.io/github/openvax/pyensembl?branch=main)\n\u003ca href=\"https://pypi.python.org/pypi/pyensembl/\"\u003e\n\u003cimg src=\"https://img.shields.io/pypi/v/pyensembl.svg?maxAge=1000\" alt=\"PyPI\" /\u003e\n\u003c/a\u003e\n\n# PyEnsembl\n\nPyEnsembl is a Python interface to [Ensembl](http://www.ensembl.org) reference genome metadata such as exons and transcripts. PyEnsembl downloads [GTF](https://en.wikipedia.org/wiki/Gene_transfer_format) and [FASTA](https://en.wikipedia.org/wiki/FASTA_format) files from the [Ensembl FTP server](ftp://ftp.ensembl.org) and loads them into a local database. PyEnsembl can also work with custom reference data specified using user-supplied GTF and FASTA files.\n\n# Example Usage\n\n```python\nfrom pyensembl import EnsemblRelease\n\n# release 77 uses human reference genome GRCh38\ndata = EnsemblRelease(77)\n\n# will return ['HLA-A']\ngene_names = data.gene_names_at_locus(contig=6, position=29945884)\n\n# get all exons associated with HLA-A\nexon_ids  = data.exon_ids_of_gene_name('HLA-A')\n```\n\n# Installation\n\nYou can install PyEnsembl using [pip](https://pip.pypa.io/en/latest/quickstart.html):\n\n```sh\npip install pyensembl\n```\n\nThis should also install any required packages such as [datacache](https://github.com/openvax/datacache).\n\nBefore using PyEnsembl, run the following command to download and install\nEnsembl data:\n\n```\npyensembl install --release \u003clist of Ensembl release numbers\u003e --species \u003cspecies-name\u003e\n```\n\nFor example, `pyensembl install --release 75 76 --species human` will download and install all\nhuman reference data from Ensembl releases 75 and 76.\n\nAlternatively, you can create the `EnsemblRelease` object from inside a Python\nprocess and call `ensembl_object.download()` followed by `ensembl_object.index()`.\n\n## Cache Location\n\nBy default, PyEnsembl uses the platform-specific `Cache` folder\nand caches the files into the `pyensembl` sub-directory.\nYou can override this default by setting the environment key `PYENSEMBL_CACHE_DIR`\nas your preferred location for caching:\n\n```sh\nexport PYENSEMBL_CACHE_DIR=/custom/cache/dir\n```\n\nor\n\n```python\nimport os\n\nos.environ['PYENSEMBL_CACHE_DIR'] = '/custom/cache/dir'\n# ... PyEnsembl API usage\n```\n\n# Usage tips\n\n## List installed genomes\n\nTo see the genomes for which PyEnsembl has already downloaded and indexed metadata you can run:\n\n```sh\npyensembl list\n```\n\nOr equivalently do this in Python:\n\n```python\nfrom pyensembl.shell import collect_all_installed_ensembl_releases\ncollect_all_installed_ensembl_releases()\n```\n\n## Load genome in Python\n\nHere's an example Python snippet that loads fly genome data from Ensembl release v100:\n\n```python\nfrom pyensembl import EnsemblRelease\ndata = EnsemblRelease(release=100, species='drosophila_melanogaster')\n```\n\n## Data structures\n\n### Gene\n\n```python\ngene = genome.gene_by_id(gene_id='FBgn0011747')\n```\n\n### Transcript\n\n```python\ntranscript = gene.transcripts[0]\n```\n\n### Protein information\n\n```python\ntranscript.protein_id\ntranscript.protein_sequence\n```\n\n# Non-Ensembl Data\n\nPyEnsembl also allows arbitrary genomes via the specification\nof local file paths or remote URLs to both Ensembl and non-Ensembl GTF\nand FASTA files. (Warning: GTF formats can vary, and handling of\nnon-Ensembl data is still very much in development.)\n\nFor example:\n\n```python\nfrom pyensembl import Genome\ndata = Genome(\n    reference_name='GRCh38',\n    annotation_name='my_genome_features',\n    # annotation_version=None,\n    gtf_path_or_url='/My/local/gtf/path_to_my_genome_features.gtf', # Path or URL of GTF file\n    # transcript_fasta_paths_or_urls=None, # List of paths or URLs of FASTA files containing transcript sequences\n    # protein_fasta_paths_or_urls=None, # List of paths or URLs of FASTA files containing protein sequences\n    # cache_directory_path=None, # Where to place downloaded and cached files for this genome\n)\n# parse GTF and construct database of genomic features\ndata.index()\ngene_names = data.gene_names_at_locus(contig=6, position=29945884)\n```\n\n# API\n\nThe `EnsemblRelease` object has methods to let you access all possible\ncombinations of the annotation features _gene_name_, _gene_id_,\n_transcript_name_, _transcript_id_, _exon_id_ as well as the location of\nthese genomic elements (contig, start position, end position, strand).\n\n## Genes\n\n\u003cdl\u003e\n\u003cdt\u003egenes(contig=None, strand=None)\u003c/dt\u003e\n\u003cdd\u003eReturns a list of Gene objects, optionally restricted to a particular contig\nor strand.\u003c/dd\u003e\n\n\u003cdt\u003egenes_at_locus(contig, position, end=None, strand=None)\u003c/dt\u003e\n\u003cdd\u003eReturns a list of Gene objects overlapping a particular position on a contig,\noptionally extend into a range with the end parameter and restrict to\nforward or backward strand by passing strand='+' or strand='-'.\u003c/dd\u003e\n\n\u003cdt\u003egene_by_id(gene_id)\u003c/dt\u003e\n\u003cdd\u003eReturn a Gene object for given Ensembl gene ID (e.g. \"ENSG00000068793\").\u003c/dd\u003e\n\n\u003cdt\u003egene_names(contig=None, strand=None)\u003c/dt\u003e\n\u003cdd\u003eReturns all gene names in the annotation database, optionally restricted\nto a particular contig or strand.\u003c/dd\u003e\n\n\u003cdt\u003egenes_by_name(gene_name)\u003c/dt\u003e\n\u003cdd\u003eGet all the unqiue genes with the given name (there might be multiple\ndue to copies in the genome), return a list containing a Gene object for each\ndistinct ID.\u003c/dd\u003e\n\n\u003cdt\u003egene_by_protein_id(protein_id)\u003c/dt\u003e\n\u003cdd\u003eFind Gene associated with the given Ensembl protein ID (e.g. \"ENSP00000350283\")\u003c/dd\u003e\n\n\u003cdt\u003egene_names_at_locus(contig, position, end=None, strand=None)\n\u003c/dt\u003e\n\u003cdd\u003eNames of genes overlapping with the given locus, optionally restricted by strand.\n(returns a list to account for overlapping genes)\u003c/dd\u003e\n\n\u003cdt\u003egene_name_of_gene_id(gene_id)\n\u003c/dt\u003e\n\u003cdd\u003eReturns name of gene with given genen ID.\u003c/dd\u003e\n\n\u003cdt\u003egene_name_of_transcript_id(transcript_id)\n\u003c/dt\u003e\u003cdd\u003eReturns name of gene associated with given transcript ID.\u003c/dd\u003e\n\n\u003cdt\u003egene_name_of_transcript_name(transcript_name)\n\u003c/dt\u003e\n\u003cdd\u003eReturns name of gene associated with given transcript name.\u003c/dd\u003e\n\n\u003cdt\u003egene_name_of_exon_id(exon_id)\n\u003c/dt\u003e\u003cdd\u003eReturns name of gene associated with given exon ID.\u003c/dd\u003e\n\n\u003cdt\u003egene_ids(contig=None, strand=None)\n\u003c/dt\u003e\n\u003cdd\u003eReturn all gene IDs in the annotation database, optionally restricted by\nchromosome name or strand.\u003c/dd\u003e\n\n\u003cdt\u003egene_ids_of_gene_name(gene_name)\n\u003c/dt\u003e\n\u003cdd\u003eReturns all Ensembl gene IDs with the given name.\u003c/dd\u003e\n\n\u003c/dl\u003e\n\n## Transcripts\n\n\u003cdl\u003e\n\u003cdt\u003etranscripts(contig=None, strand=None)\u003c/dt\u003e\n\u003cdd\u003eReturns a list of Transcript objects for all transcript entries in the\nEnsembl database, optionally restricted to a particular contig or strand.\u003c/dd\u003e\n\n\u003cdt\u003etranscript_by_id(transcript_id)\u003c/dt\u003e\n\u003cdd\u003eConstruct a Transcript object for given Ensembl transcript ID (e.g. \"ENST00000369985\")\u003c/dd\u003e\n\n\u003cdt\u003etranscripts_by_name(transcript_name)\u003c/dt\u003e\n\u003cdd\u003eReturns a list of Transcript objects for every transcript matching the given name.\u003c/dd\u003e\n\n\u003cdt\u003etranscript_names(contig=None, strand=None)\u003c/dt\u003e\n\u003cdd\u003eReturns all transcript names in the annotation database.\u003c/dd\u003e\n\n\u003cdt\u003etranscript_ids(contig=None, strand=None)\u003c/dt\u003e\n\u003cdd\u003eReturns all transcript IDs in the annotation database.\u003c/dd\u003e\n\n\u003cdt\u003etranscript_ids_of_gene_id(gene_id)\u003c/dt\u003e\n\u003cdd\u003eReturn IDs of all transcripts associated with given gene ID.\u003c/dd\u003e\n\n\u003cdt\u003etranscript_ids_of_gene_name(gene_name)\u003c/dt\u003e\n\u003cdd\u003eReturn IDs of all transcripts associated with given gene name.\u003c/dd\u003e\n\n\u003cdt\u003etranscript_ids_of_transcript_name(transcript_name)\u003c/dt\u003e\n\u003cdd\u003eFind all Ensembl transcript IDs with the given name.\u003c/dd\u003e\n\n\u003cdt\u003etranscript_ids_of_exon_id(exon_id)\u003c/dt\u003e\n\u003cdd\u003eReturn IDs of all transcripts associatd with given exon ID.\u003c/dd\u003e\n\u003c/dl\u003e\n\n## Exons\n\n\u003cdl\u003e\n\u003cdt\u003eexon_ids(contig=None, strand=None)\u003c/dt\u003e\n\u003cdd\u003eReturns a list of exons IDs in the annotation database, optionally restricted\nby the given chromosome and strand.\u003c/dd\u003e\n\n\u003cdt\u003eexon_by_id(exon_id)\u003c/dt\u003e\n\u003cdd\u003eConstruct an Exon object for given Ensembl exon ID (e.g. \"ENSE00001209410\")\u003c/dd\u003e\n\n\u003cdt\u003eexon_ids_of_gene_id(gene_id)\u003c/dt\u003e\n\u003cdd\u003eReturns a list of exon IDs associated with a given gene ID.\u003c/dd\u003e\n\n\u003cdt\u003eexon_ids_of_gene_name(gene_name)\u003c/dt\u003e\n\u003cdd\u003eReturns a list of exon IDs associated with a given gene name.\u003c/dd\u003e\n\n\u003cdt\u003eexon_ids_of_transcript_id(transcript_id)\u003c/dt\u003e\n\u003cdd\u003eReturns a list of exon IDs associated with a given transcript ID.\u003c/dd\u003e\n\n\u003cdt\u003eexon_ids_of_transcript_name(transcript_name)\u003c/dt\u003e\n\u003cdd\u003eReturns a list of exon IDs associated with a given transcript name.\u003c/dd\u003e\n\u003c/dl\u003e\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fopenvax%2Fpyensembl","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fopenvax%2Fpyensembl","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fopenvax%2Fpyensembl/lists"}