{"id":13400388,"url":"https://github.com/lmcinnes/umap","last_synced_at":"2026-01-16T03:26:20.874Z","repository":{"id":37413050,"uuid":"95995403","full_name":"lmcinnes/umap","owner":"lmcinnes","description":"Uniform Manifold Approximation and Projection","archived":false,"fork":false,"pushed_at":"2026-01-12T01:17:28.000Z","size":94718,"stargazers_count":8051,"open_issues_count":524,"forks_count":860,"subscribers_count":122,"default_branch":"master","last_synced_at":"2026-01-12T05:52:27.751Z","etag":null,"topics":["dimensionality-reduction","machine-learning","topological-data-analysis","umap","visualization"],"latest_commit_sha":null,"homepage":"https://umap-learn.readthedocs.io","language":"Python","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"bsd-3-clause","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/lmcinnes.png","metadata":{"files":{"readme":"README.rst","changelog":null,"contributing":"CONTRIBUTING.md","funding":null,"license":"LICENSE.txt","code_of_conduct":"CODE_OF_CONDUCT.md","threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null,"zenodo":null,"notice":null,"maintainers":null,"copyright":null,"agents":null,"dco":null,"cla":null}},"created_at":"2017-07-02T01:11:17.000Z","updated_at":"2026-01-12T01:17:33.000Z","dependencies_parsed_at":"2023-02-16T12:15:47.333Z","dependency_job_id":"850e066c-67a0-484a-9fb7-d69177042d1a","html_url":"https://github.com/lmcinnes/umap","commit_stats":{"total_commits":1517,"total_committers":138,"mean_commits":"10.992753623188406","dds":0.4119973632168754,"last_synced_commit":"a012b9d8751d98b94935ca21f278a54b3c3e1b7f"},"previous_names":[],"tags_count":38,"template":false,"template_full_name":null,"purl":"pkg:github/lmcinnes/umap","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/lmcinnes%2Fumap","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/lmcinnes%2Fumap/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/lmcinnes%2Fumap/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/lmcinnes%2Fumap/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/lmcinnes","download_url":"https://codeload.github.com/lmcinnes/umap/tar.gz/refs/heads/master","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/lmcinnes%2Fumap/sbom","scorecard":{"id":596078,"data":{"date":"2025-08-11","repo":{"name":"github.com/lmcinnes/umap","commit":"90871ffc135a0cf8e0866881f149b4d4cf562152"},"scorecard":{"version":"v5.2.1-40-gf6ed084d","commit":"f6ed084d17c9236477efd66e5b258b9d4cc7b389"},"score":3,"checks":[{"name":"Packaging","score":-1,"reason":"packaging workflow not detected","details":["Warn: no GitHub/GitLab publishing workflow detected."],"documentation":{"short":"Determines if the project is published as a package that others can easily download, install, easily update, and uninstall.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#packaging"}},{"name":"Token-Permissions","score":-1,"reason":"No tokens found","details":null,"documentation":{"short":"Determines if the project's workflows follow the principle of least privilege.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#token-permissions"}},{"name":"Dangerous-Workflow","score":-1,"reason":"no workflows found","details":null,"documentation":{"short":"Determines if the project's GitHub Action workflows avoid dangerous patterns.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#dangerous-workflow"}},{"name":"Code-Review","score":3,"reason":"Found 3/8 approved changesets -- score normalized to 3","details":null,"documentation":{"short":"Determines if the project requires human code review before pull requests (aka merge requests) are merged.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#code-review"}},{"name":"Maintained","score":9,"reason":"8 commit(s) and 3 issue activity found in the last 90 days -- score normalized to 9","details":null,"documentation":{"short":"Determines if the project is \"actively maintained\".","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#maintained"}},{"name":"Binary-Artifacts","score":10,"reason":"no binaries found in the repo","details":null,"documentation":{"short":"Determines if the project has generated executable (binary) artifacts in the source repository.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#binary-artifacts"}},{"name":"CII-Best-Practices","score":0,"reason":"no effort to earn an OpenSSF best practices badge detected","details":null,"documentation":{"short":"Determines if the project has an OpenSSF (formerly CII) Best Practices Badge.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#cii-best-practices"}},{"name":"Security-Policy","score":0,"reason":"security policy file not detected","details":["Warn: no security policy file detected","Warn: no security file to analyze","Warn: no security file to analyze","Warn: no security file to analyze"],"documentation":{"short":"Determines if the project has published a security policy.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#security-policy"}},{"name":"Fuzzing","score":0,"reason":"project is not fuzzed","details":["Warn: no fuzzer integrations found"],"documentation":{"short":"Determines if the project uses fuzzing.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#fuzzing"}},{"name":"License","score":10,"reason":"license file detected","details":["Info: project has a license file: LICENSE.txt:0","Info: FSF or OSI recognized license: BSD 3-Clause \"New\" or \"Revised\" License: LICENSE.txt:0"],"documentation":{"short":"Determines if the project has defined a license.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#license"}},{"name":"Signed-Releases","score":-1,"reason":"no releases found","details":null,"documentation":{"short":"Determines if the project cryptographically signs release artifacts.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#signed-releases"}},{"name":"Branch-Protection","score":0,"reason":"branch protection not enabled on development/release branches","details":["Warn: branch protection not enabled for branch 'master'"],"documentation":{"short":"Determines if the default and release branches are protected with GitHub's branch protection settings.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#branch-protection"}},{"name":"SAST","score":0,"reason":"SAST tool is not run on all commits -- score normalized to 0","details":["Warn: 0 commits out of 26 are checked with a SAST tool"],"documentation":{"short":"Determines if the project uses static code analysis.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#sast"}},{"name":"Pinned-Dependencies","score":0,"reason":"dependency not pinned by hash detected -- score normalized to 0","details":["Warn: downloadThenRun not pinned by hash: ci_scripts/install.sh:33","Warn: pipCommand not pinned by hash: ci_scripts/install.sh:61","Warn: pipCommand not pinned by hash: ci_scripts/install.sh:62","Warn: pipCommand not pinned by hash: ci_scripts/install.sh:66","Warn: pipCommand not pinned by hash: ci_scripts/install.sh:67","Warn: pipCommand not pinned by hash: ci_scripts/install.sh:77","Warn: pipCommand not pinned by hash: ci_scripts/install.sh:78","Warn: pipCommand not pinned by hash: ci_scripts/install.sh:79","Warn: pipCommand not pinned by hash: ci_scripts/install.sh:80","Warn: pipCommand not pinned by hash: ci_scripts/install.sh:81","Warn: pipCommand not pinned by hash: ci_scripts/install.sh:82","Warn: pipCommand not pinned by hash: ci_scripts/install.sh:83","Warn: pipCommand not pinned by hash: ci_scripts/install.sh:84","Warn: pipCommand not pinned by hash: ci_scripts/install.sh:85","Info:   0 out of   1 downloadThenRun dependencies pinned","Info:   0 out of  13 pipCommand dependencies pinned"],"documentation":{"short":"Determines if the project has declared and pinned the dependencies of its build process.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#pinned-dependencies"}},{"name":"Vulnerabilities","score":0,"reason":"66 existing vulnerabilities detected","details":["Warn: Project is vulnerable to: PYSEC-2021-856 / GHSA-5545-2q6w-2gh6","Warn: Project is vulnerable to: GHSA-6p56-wp2h-9hxr","Warn: Project is vulnerable to: PYSEC-2019-108 / GHSA-9fq2-x9r6-wfmf","Warn: Project is vulnerable to: PYSEC-2021-857 / GHSA-f7c7-j99h-c22f","Warn: Project is vulnerable to: GHSA-fpfv-jqm9-f5jm","Warn: Project is vulnerable to: PYSEC-2017-1 / GHSA-frgw-fgh6-9g52","Warn: Project is vulnerable to: PYSEC-2020-107 / GHSA-jjw5-xxj6-pcv5","Warn: Project is vulnerable to: PYSEC-2024-110 / GHSA-jw8x-6495-233v","Warn: Project is vulnerable to: PYSEC-2020-108","Warn: Project is vulnerable to: PYSEC-2023-102","Warn: Project is vulnerable to: PYSEC-2023-114","Warn: Project is vulnerable to: PYSEC-2017-74","Warn: Project is vulnerable to: GHSA-3c5c-7235-994j","Warn: Project is vulnerable to: GHSA-3f63-hfp8-52jq","Warn: Project is vulnerable to: PYSEC-2021-41 / GHSA-3wvg-mj6g-m9cv","Warn: Project is vulnerable to: PYSEC-2020-77 / GHSA-3xv8-3j54-hgrp","Warn: Project is vulnerable to: PYSEC-2020-80 / GHSA-43fq-w8qq-v88h","Warn: Project is vulnerable to: GHSA-44wm-f244-xhp3","Warn: Project is vulnerable to: GHSA-4fx9-vc88-q2xc","Warn: Project is vulnerable to: PYSEC-2021-35 / GHSA-57h3-9rgr-c24m","Warn: Project is vulnerable to: PYSEC-2020-172 / GHSA-5gm3-px64-rw72","Warn: Project is vulnerable to: PYSEC-2021-331 / GHSA-7534-mm45-c74v","Warn: Project is vulnerable to: PYSEC-2021-92 / GHSA-7r7m-5h27-29hp","Warn: Project is vulnerable to: PYSEC-2020-78 / GHSA-8843-m7mw-mxqm","Warn: Project is vulnerable to: PYSEC-2023-227 / GHSA-8ghj-p4vj-mr35","Warn: Project is vulnerable to: PYSEC-2014-87 / GHSA-8m9x-pxwq-j236","Warn: Project is vulnerable to: PYSEC-2022-10 / GHSA-8vj2-vxx3-667w","Warn: Project is vulnerable to: PYSEC-2021-36 / GHSA-8xjq-8fcg-g5hw","Warn: Project is vulnerable to: PYSEC-2016-6 / GHSA-8xjv-v9xq-m5h9","Warn: Project is vulnerable to: PYSEC-2021-42 / GHSA-95q3-8gr9-gm8w","Warn: Project is vulnerable to: PYSEC-2022-168 / GHSA-9j59-75qj-795w","Warn: Project is vulnerable to: PYSEC-2014-10 / GHSA-cfmr-38g9-f2h7","Warn: Project is vulnerable to: PYSEC-2020-76 / GHSA-cqhg-xjhh-p8hf","Warn: Project is vulnerable to: PYSEC-2021-40 / GHSA-f4w8-cv6p-x6r5","Warn: Project is vulnerable to: PYSEC-2021-69 / GHSA-f5g8-5qq7-938w","Warn: Project is vulnerable to: PYSEC-2021-139 / GHSA-g6rj-rv7j-xwp4","Warn: Project is vulnerable to: PYSEC-2015-16 / GHSA-h5rf-vgqx-wjv2","Warn: Project is vulnerable to: PYSEC-2016-5 / GHSA-hggx-3h72-49ww","Warn: Project is vulnerable to: PYSEC-2020-84 / GHSA-hj69-c76v-86wr","Warn: Project is vulnerable to: PYSEC-2016-7 / GHSA-hvr8-466p-75rh","Warn: Project is vulnerable to: PYSEC-2015-15 / GHSA-j6f7-g425-4gmx","Warn: Project is vulnerable to: GHSA-j7hp-h8jx-5ppr","Warn: Project is vulnerable to: PYSEC-2019-110 / GHSA-j7mj-748x-7p78","Warn: Project is vulnerable to: GHSA-jgpv-4h4c-xhw3","Warn: Project is vulnerable to: PYSEC-2022-42979 / GHSA-m2vv-5vj5-2hm7","Warn: Project is vulnerable to: PYSEC-2021-37 / GHSA-mvg9-xffr-p774","Warn: Project is vulnerable to: PYSEC-2020-83 / GHSA-p49h-hjvm-jg3h","Warn: Project is vulnerable to: PYSEC-2022-8 / GHSA-pw3c-h7wp-cvhx","Warn: Project is vulnerable to: PYSEC-2021-93 / GHSA-q5hq-fp76-qmrc","Warn: Project is vulnerable to: PYSEC-2020-82 / GHSA-r7rm-8j6h-r933","Warn: Project is vulnerable to: PYSEC-2014-23 / GHSA-r854-96gq-rfg3","Warn: Project is vulnerable to: PYSEC-2016-8 / GHSA-rwr3-c2q8-gm56","Warn: Project is vulnerable to: PYSEC-2020-81 / GHSA-vcqg-3p29-xw73","Warn: Project is vulnerable to: PYSEC-2020-79 / GHSA-vj42-xq3r-hr3r","Warn: Project is vulnerable to: PYSEC-2021-70 / GHSA-vqcj-wrf2-7v73","Warn: Project is vulnerable to: PYSEC-2016-9 / GHSA-w4vg-rf63-f3j3","Warn: Project is vulnerable to: PYSEC-2014-22 / GHSA-x895-2wrm-hvp7","Warn: Project is vulnerable to: PYSEC-2022-9 / GHSA-xrcv-f9gm-v42c","Warn: Project is vulnerable to: PYSEC-2021-137","Warn: Project is vulnerable to: PYSEC-2021-138","Warn: Project is vulnerable to: PYSEC-2021-317","Warn: Project is vulnerable to: PYSEC-2021-38","Warn: Project is vulnerable to: PYSEC-2021-39","Warn: Project is vulnerable to: PYSEC-2021-94","Warn: Project is vulnerable to: PYSEC-2023-175","Warn: Project is vulnerable to: PYSEC-2019-156 / GHSA-xp76-357g-9wqq"],"documentation":{"short":"Determines if the project has open, known unfixed vulnerabilities.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#vulnerabilities"}}]},"last_synced_at":"2025-08-20T23:10:33.307Z","repository_id":37413050,"created_at":"2025-08-20T23:10:33.307Z","updated_at":"2025-08-20T23:10:33.307Z"},"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":286080680,"owners_count":28477203,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2026-01-16T03:13:13.607Z","status":"ssl_error","status_checked_at":"2026-01-16T03:11:47.863Z","response_time":107,"last_error":"SSL_read: unexpected eof while reading","robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":false,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["dimensionality-reduction","machine-learning","topological-data-analysis","umap","visualization"],"created_at":"2024-07-30T19:00:51.489Z","updated_at":"2026-01-16T03:26:20.858Z","avatar_url":"https://github.com/lmcinnes.png","language":"Python","readme":".. -*- mode: rst -*-\n\n.. image:: doc/logo_large.png\n  :width: 600\n  :alt: UMAP logo\n  :align: center\n\n|pypi_version|_ |pypi_downloads|_\n\n|conda_version|_ |conda_downloads|_\n\n|License|_ |build_status|_ |Coverage|_\n\n|Docs|_ |joss_paper|_\n\n.. |pypi_version| image:: https://img.shields.io/pypi/v/umap-learn.svg\n.. _pypi_version: https://pypi.python.org/pypi/umap-learn/\n\n.. |pypi_downloads| image:: https://pepy.tech/badge/umap-learn/month\n.. _pypi_downloads: https://pepy.tech/project/umap-learn\n\n.. |conda_version| image:: https://anaconda.org/conda-forge/umap-learn/badges/version.svg\n.. _conda_version: https://anaconda.org/conda-forge/umap-learn\n\n.. |conda_downloads| image:: https://anaconda.org/conda-forge/umap-learn/badges/downloads.svg\n.. _conda_downloads: https://anaconda.org/conda-forge/umap-learn\n\n.. |License| image:: https://img.shields.io/pypi/l/umap-learn.svg\n.. _License: https://github.com/lmcinnes/umap/blob/master/LICENSE.txt\n\n.. |build_status| image:: https://dev.azure.com/TutteInstitute/build-pipelines/_apis/build/status/lmcinnes.umap?branchName=master\n.. _build_status: https://dev.azure.com/TutteInstitute/build-pipelines/_build/latest?definitionId=2\u0026branchName=master\n\n.. |Coverage| image:: https://coveralls.io/repos/github/lmcinnes/umap/badge.svg\n.. _Coverage: https://coveralls.io/github/lmcinnes/umap\n\n.. |Docs| image:: https://readthedocs.org/projects/umap-learn/badge/?version=latest\n.. _Docs: https://umap-learn.readthedocs.io/en/latest/?badge=latest\n\n.. |joss_paper| image:: http://joss.theoj.org/papers/10.21105/joss.00861/status.svg\n.. _joss_paper: https://doi.org/10.21105/joss.00861\n\n====\nUMAP\n====\n\nUniform Manifold Approximation and Projection (UMAP) is a dimension reduction\ntechnique that can be used for visualisation similarly to t-SNE, but also for\ngeneral non-linear dimension reduction. The algorithm is founded on three\nassumptions about the data:\n\n1. The data is uniformly distributed on a Riemannian manifold;\n2. The Riemannian metric is locally constant (or can be approximated as such);\n3. The manifold is locally connected.\n\nFrom these assumptions it is possible to model the manifold with a fuzzy\ntopological structure. The embedding is found by searching for a low dimensional\nprojection of the data that has the closest possible equivalent fuzzy\ntopological structure.\n\nThe details for the underlying mathematics can be found in\n`our paper on ArXiv \u003chttps://arxiv.org/abs/1802.03426\u003e`_:\n\nMcInnes, L, Healy, J, *UMAP: Uniform Manifold Approximation and Projection\nfor Dimension Reduction*, ArXiv e-prints 1802.03426, 2018\n\nA broader introduction to UMAP targetted the scientific community can be found \nin our `paper published in Nature Review Methods Primers  \u003chttps://doi.org/10.1038/s43586-024-00363-x\u003e`_:\n\nHealy, J., McInnes, L. *Uniform manifold approximation and projection*. Nat Rev Methods \nPrimers 4, 82 (2024). \n\nA read only version of this paper can accessed via `link \u003chttps://rdcu.be/d0YZT\u003e`_\n\nThe important thing is that you don't need to worry about that—you can use\nUMAP right now for dimension reduction and visualisation as easily as a drop\nin replacement for scikit-learn's t-SNE.\n\nDocumentation is `available via Read the Docs \u003chttps://umap-learn.readthedocs.io/\u003e`_.\n\n**New: this package now also provides support for densMAP.** The densMAP algorithm augments UMAP\nto preserve local density information in addition to the topological structure of the data.\nDetails of this method are described in the following `paper \u003chttps://doi.org/10.1038/s41587-020-00801-7\u003e`_:\n\nNarayan, A, Berger, B, Cho, H, *Assessing Single-Cell Transcriptomic Variability\nthrough Density-Preserving Data Visualization*, Nature Biotechnology, 2021\n\n----------\nInstalling\n----------\n\nUMAP depends upon ``scikit-learn``, and thus ``scikit-learn``'s dependencies\nsuch as ``numpy`` and ``scipy``. UMAP adds a requirement for ``numba`` for\nperformance reasons. The original version used Cython, but the improved code\nclarity, simplicity and performance of Numba made the transition necessary.\n\nRequirements:\n\n* Python 3.6 or greater\n* numpy\n* scipy\n* scikit-learn\n* numba\n* tqdm\n* `pynndescent \u003chttps://github.com/lmcinnes/pynndescent\u003e`_\n\nRecommended packages:\n\n* For plotting\n   * matplotlib\n   * datashader\n   * holoviews\n* for Parametric UMAP\n   * tensorflow \u003e 2.0.0\n\n**Install Options**\n\nConda install, via the excellent work of the conda-forge team:\n\n.. code:: bash\n\n    conda install -c conda-forge umap-learn\n\nThe conda-forge packages are available for Linux, OS X, and Windows 64 bit.\n\nPyPI install, presuming you have numba and sklearn and all its requirements\n(numpy and scipy) installed:\n\n.. code:: bash\n\n    pip install umap-learn\n\nIf you wish to use the plotting functionality you can use\n\n.. code:: bash\n\n    pip install umap-learn[plot]\n\nto install all the plotting dependencies.\n\nIf you wish to use Parametric UMAP, you need to install Tensorflow, which can be\ninstalled either using the instructions at https://www.tensorflow.org/install\n(recommended) or using\n\n.. code:: bash\n\n    pip install umap-learn[parametric_umap]\n\nfor a CPU-only version of Tensorflow.\n\nIf you're on an x86 processor, you can also optionally install `tbb`, which will\nprovide additional CPU optimizations:\n\n.. code:: bash\n\n    pip install umap-learn[tbb]\n\nIf pip is having difficulties pulling the dependencies then we'd suggest installing\nthe dependencies manually using anaconda followed by pulling umap from pip:\n\n.. code:: bash\n\n    conda install numpy scipy\n    conda install scikit-learn\n    conda install numba\n    pip install umap-learn\n\nFor a manual install get this package:\n\n.. code:: bash\n\n    wget https://github.com/lmcinnes/umap/archive/master.zip\n    unzip master.zip\n    rm master.zip\n    cd umap-master\n\nOptionally, install the requirements through Conda:\n\n.. code:: bash\n\n    conda install scikit-learn numba\n\nThen install the package\n\n.. code:: bash\n\n    python -m pip install -e .\n\n---------------\nHow to use UMAP\n---------------\n\nThe umap package inherits from sklearn classes, and thus drops in neatly\nnext to other sklearn transformers with an identical calling API.\n\n.. code:: python\n\n    import umap\n    from sklearn.datasets import load_digits\n\n    digits = load_digits()\n\n    embedding = umap.UMAP().fit_transform(digits.data)\n\nThere are a number of parameters that can be set for the UMAP class; the\nmajor ones are as follows:\n\n -  ``n_neighbors``: This determines the number of neighboring points used in\n    local approximations of manifold structure. Larger values will result in\n    more global structure being preserved at the loss of detailed local\n    structure. In general this parameter should often be in the range 5 to\n    50, with a choice of 10 to 15 being a sensible default.\n\n -  ``min_dist``: This controls how tightly the embedding is allowed compress\n    points together. Larger values ensure embedded points are more evenly\n    distributed, while smaller values allow the algorithm to optimise more\n    accurately with regard to local structure. Sensible values are in the\n    range 0.001 to 0.5, with 0.1 being a reasonable default.\n\n -  ``metric``: This determines the choice of metric used to measure distance\n    in the input space. A wide variety of metrics are already coded, and a user\n    defined function can be passed as long as it has been JITd by numba.\n\nAn example of making use of these options:\n\n.. code:: python\n\n    import umap\n    from sklearn.datasets import load_digits\n\n    digits = load_digits()\n\n    embedding = umap.UMAP(n_neighbors=5,\n                          min_dist=0.3,\n                          metric='correlation').fit_transform(digits.data)\n\nUMAP also supports fitting to sparse matrix data. For more details\nplease see `the UMAP documentation \u003chttps://umap-learn.readthedocs.io/\u003e`_\n\n----------------\nBenefits of UMAP\n----------------\n\nUMAP has a few signficant wins in its current incarnation.\n\nFirst of all UMAP is *fast*. It can handle large datasets and high\ndimensional data without too much difficulty, scaling beyond what most t-SNE\npackages can manage. This includes very high dimensional sparse datasets. UMAP\nhas successfully been used directly on data with over a million dimensions.\n\nSecond, UMAP scales well in embedding dimension—it isn't just for\nvisualisation! You can use UMAP as a general purpose dimension reduction\ntechnique as a preliminary step to other machine learning tasks. With a\nlittle care it partners well with the `hdbscan\n\u003chttps://github.com/scikit-learn-contrib/hdbscan\u003e`_ clustering library (for\nmore details please see `Using UMAP for Clustering\n\u003chttps://umap-learn.readthedocs.io/en/latest/clustering.html\u003e`_).\n\nThird, UMAP often performs better at preserving some aspects of global structure\nof the data than most implementations of t-SNE. This means that it can often\nprovide a better \"big picture\" view of your data as well as preserving local neighbor\nrelations.\n\nFourth, UMAP supports a wide variety of distance functions, including\nnon-metric distance functions such as *cosine distance* and *correlation\ndistance*. You can finally embed word vectors properly using cosine distance!\n\nFifth, UMAP supports adding new points to an existing embedding via\nthe standard sklearn ``transform`` method. This means that UMAP can be\nused as a preprocessing transformer in sklearn pipelines.\n\nSixth, UMAP supports supervised and semi-supervised dimension reduction.\nThis means that if you have label information that you wish to use as\nextra information for dimension reduction (even if it is just partial\nlabelling) you can do that—as simply as providing it as the ``y``\nparameter in the fit method.\n\nSeventh, UMAP supports a variety of additional experimental features including: an\n\"inverse transform\" that can approximate a high dimensional sample that would map to\na given position in the embedding space; the ability to embed into non-euclidean\nspaces including hyperbolic embeddings, and embeddings with uncertainty; very\npreliminary support for embedding dataframes also exists.\n\nFinally, UMAP has solid theoretical foundations in manifold learning\n(see `our paper on ArXiv \u003chttps://arxiv.org/abs/1802.03426\u003e`_).\nThis both justifies the approach and allows for further\nextensions that will soon be added to the library.\n\n------------------------\nPerformance and Examples\n------------------------\n\nUMAP is very efficient at embedding large high dimensional datasets. In\nparticular it scales well with both input dimension and embedding dimension.\nFor the best possible performance we recommend installing the nearest neighbor\ncomputation library `pynndescent \u003chttps://github.com/lmcinnes/pynndescent\u003e`_ .\nUMAP will work without it, but if installed it will run faster, particularly on\nmulticore machines.\n\nFor a problem such as the 784-dimensional MNIST digits dataset with\n70000 data samples, UMAP can complete the embedding in under a minute (as\ncompared with around 45 minutes for scikit-learn's t-SNE implementation).\nDespite this runtime efficiency, UMAP still produces high quality embeddings.\n\nThe obligatory MNIST digits dataset, embedded in 42\nseconds (with pynndescent installed and after numba jit warmup)\nusing a 3.1 GHz Intel Core i7 processor (n_neighbors=10, min_dist=0.001):\n\n.. image:: images/umap_example_mnist1.png\n    :alt: UMAP embedding of MNIST digits\n\nThe MNIST digits dataset is fairly straightforward, however. A better test is\nthe more recent \"Fashion MNIST\" dataset of images of fashion items (again\n70000 data sample in 784 dimensions). UMAP\nproduced this embedding in 49 seconds (n_neighbors=5, min_dist=0.1):\n\n.. image:: images/umap_example_fashion_mnist1.png\n    :alt: UMAP embedding of \"Fashion MNIST\"\n\nThe UCI shuttle dataset (43500 sample in 8 dimensions) embeds well under\n*correlation* distance in 44 seconds (note the longer time\nrequired for correlation distance computations):\n\n.. image:: images/umap_example_shuttle.png\n    :alt: UMAP embedding the UCI Shuttle dataset\n\nThe following is a densMAP visualization of the MNIST digits dataset with 784 features\nbased on the same parameters as above (n_neighbors=10, min_dist=0.001). densMAP reveals\nthat the cluster corresponding to digit 1 is noticeably denser, suggesting that\nthere are fewer degrees of freedom in the images of 1 compared to other digits.\n\n.. image:: images/densmap_example_mnist.png\n    :alt: densMAP embedding of the MNIST dataset\n\n--------\nPlotting\n--------\n\nUMAP includes a subpackage ``umap.plot`` for plotting the results of UMAP embeddings.\nThis package needs to be imported separately since it has extra requirements\n(matplotlib, datashader and holoviews). It allows for fast and simple plotting and\nattempts to make sensible decisions to avoid overplotting and other pitfalls. An\nexample of use:\n\n.. code:: python\n\n    import umap\n    import umap.plot\n    from sklearn.datasets import load_digits\n\n    digits = load_digits()\n\n    mapper = umap.UMAP().fit(digits.data)\n    umap.plot.points(mapper, labels=digits.target)\n\nThe plotting package offers basic plots, as well as interactive plots with hover\ntools and various diagnostic plotting options. See the documentation for more details.\n\n---------------\nParametric UMAP\n---------------\n\nParametric UMAP provides support for training a neural network to learn a UMAP based\ntransformation of data. This can be used to support faster inference of new unseen\ndata, more robust inverse transforms, autoencoder versions of UMAP and\nsemi-supervised classification (particularly for data well separated by UMAP and very\nlimited amounts of labelled data). See the\n`documentation of Parametric UMAP \u003chttps://umap-learn.readthedocs.io/en/0.5dev/parametric_umap.html\u003e`_\nor the\n`example notebooks \u003chttps://github.com/lmcinnes/umap/tree/master/notebooks/Parametric_UMAP\u003e`_\nfor more.\n\n\n-------\ndensMAP\n-------\n\nThe densMAP algorithm augments UMAP to additionally preserve local density information\nin addition to the topological structure captured by UMAP. One can easily run densMAP\nusing the umap package by setting the ``densmap`` input flag:\n\n.. code:: python\n\n    embedding = umap.UMAP(densmap=True).fit_transform(data)\n\nThis functionality is built upon the densMAP `implementation \u003chttps://github.com/hhcho/densvis\u003e`_ provided by the developers\nof densMAP, who also contributed to integrating densMAP into the umap package.\n\ndensMAP inherits all of the parameters of UMAP. The following is a list of additional\nparameters that can be set for densMAP:\n\n - ``dens_frac``: This determines the fraction of epochs (a value between 0 and 1) that will include the density-preservation term in the optimization objective. This parameter is set to 0.3 by default. Note that densMAP switches density optimization on after an initial phase of optimizing the embedding using UMAP.\n\n - ``dens_lambda``: This determines the weight of the density-preservation objective. Higher values prioritize density preservation, and lower values (closer to zero) prioritize the UMAP objective. Setting this parameter to zero reduces the algorithm to UMAP. Default value is 2.0.\n\n - ``dens_var_shift``: Regularization term added to the variance of local densities in the embedding for numerical stability. We recommend setting this parameter to 0.1, which consistently works well in many settings.\n\n - ``output_dens``: When this flag is True, the call to ``fit_transform`` returns, in addition to the embedding, the local radii (inverse measure of local density defined in the `densMAP paper \u003chttps://doi.org/10.1101/2020.05.12.077776\u003e`_) for the original dataset and for the embedding. The output is a tuple ``(embedding, radii_original, radii_embedding)``. Note that the radii are log-transformed. If False, only the embedding is returned. This flag can also be used with UMAP to explore the local densities of UMAP embeddings. By default this flag is False.\n\nFor densMAP we recommend larger values of ``n_neighbors`` (e.g. 30) for reliable estimation of local density.\n\nAn example of making use of these options (based on a subsample of the mnist_784 dataset):\n\n.. code:: python\n\n    import umap\n    from sklearn.datasets import fetch_openml\n    from sklearn.utils import resample\n\n    digits = fetch_openml(name='mnist_784')\n    subsample, subsample_labels = resample(digits.data, digits.target, n_samples=7000,\n                                           stratify=digits.target, random_state=1)\n\n    embedding, r_orig, r_emb = umap.UMAP(densmap=True, dens_lambda=2.0, n_neighbors=30,\n                                         output_dens=True).fit_transform(subsample)\n\nSee `the documentation \u003chttps://umap-learn.readthedocs.io/en/0.5dev/densmap_demo.html\u003e`_ for more details.\n\n\n---------------------------------\nInteractive UMAP with Nomic Atlas\n---------------------------------\n\n.. image:: https://assets.nomicatlas.com/mnist-training-embeddings-umap-short.gif\n   :width: 600\n   :alt: MNIST UMAP visualization in Nomic Atlas\n\nFor interactive exploration of UMAP embeddings, especially for visualizing large datasets data over time/training epochs, you can use `Nomic Atlas \u003chttps://atlas.nomic.ai/\u003e`_. Nomic Atlas is a platform for embedding generation, visualization, analysis, and retrieval that directly integrates UMAP as one of its projection models.\n\nUsing Nomic Atlas with UMAP is straightforward:\n\n.. code:: python\n\n    from nomic import AtlasDataset\n    from nomic.data_inference import ProjectionOptions\n\n    # Create a dataset\n    dataset = AtlasDataset(\"my-dataset\")\n    \n    # data is a DataFrame or a list of dicts\n    dataset.add_data(data)\n\n    # Create an interactive UMAP in Atlas\n    atlas_map = dataset.create_index(\n        indexed_field='text',\n        projection=ProjectionOptions(\n            model=\"umap\",\n            n_neighbors=15,\n            min_dist=0.1,\n            n_epochs=200\n        )\n    )\n    # you can access your UMAP coordinates later on with\n    # atlas_map.maps[0].embeddings.projected\n\nNomic Atlas provides:\n\n* In-browser analysis of your UMAP data with the `Atlas Analyst \u003chttps://docs.nomic.ai/atlas/data-maps/atlas-analyst\u003e`_\n* Vector search over your UMAP data using the `Nomic API \u003chttps://docs.nomic.ai/atlas/data-maps/guides/vector-search-over-your-data\u003e`_\n* Interactive features like zooming, recoloring, searching, and filtering in the `Nomic Atlas data map \u003chttps://docs.nomic.ai/atlas/data-maps/controls\u003e`_\n* Scalability for millions of data points\n* Rich information display on hover\n* Shareable UMAPs via URL links to your embeddings and data maps in Atlas\n\n\n---------------------------------\nGPU-Accelerated UMAP with torchdr\n---------------------------------\n\nFor GPU-accelerated UMAP computations, `torchdr \u003chttps://github.com/TorchDR/TorchDR\u003e`_ provides a PyTorch-based implementation that significantly speed up the algorithm. \ntorchdr accelerates **every step** of the dimensionality reduction pipeline on GPU: kNN computation, affinity construction and embedding optimization.\n\nUsing torchdr with UMAP is straightforward:\n\n.. code:: python\n\n    from torchdr import UMAP as torchdrUMAP\n    \n    umap_gpu = torchdrUMAP(\n        n_neighbors=15,\n        min_dist=0.1,\n        n_components=2,\n        device='cuda'\n    )\n    embedding = umap_gpu.fit_transform(data-maps)\n\nFor more information and advanced usage, see the `torchdr documentation \u003chttps://torchdr.github.io/index.html\u003e`_.\n\n\n----------------\nHelp and Support\n----------------\n\nDocumentation is at `Read the Docs \u003chttps://umap-learn.readthedocs.io/\u003e`_.\nThe documentation `includes a FAQ \u003chttps://umap-learn.readthedocs.io/en/latest/faq.html\u003e`_ that\nmay answer your questions. If you still have questions then please\n`open an issue \u003chttps://github.com/lmcinnes/umap/issues/new\u003e`_\nand I will try to provide any help and guidance that I can.\n\n--------\nCitation\n--------\n\nIf you make use of this software for your work we would appreciate it if you\nwould cite the paper from the Journal of Open Source Software:\n\n.. code:: bibtex\n\n    @article{mcinnes2018umap-software,\n      title={UMAP: Uniform Manifold Approximation and Projection},\n      author={McInnes, Leland and Healy, John and Saul, Nathaniel and Grossberger, Lukas},\n      journal={The Journal of Open Source Software},\n      volume={3},\n      number={29},\n      pages={861},\n      year={2018}\n    }\n\nIf you would like to cite this algorithm in your work the ArXiv paper is the\ncurrent reference:\n\n.. code:: bibtex\n\n   @article{2018arXivUMAP,\n        author = {{McInnes}, L. and {Healy}, J. and {Melville}, J.},\n        title = \"{UMAP: Uniform Manifold Approximation\n        and Projection for Dimension Reduction}\",\n        journal = {ArXiv e-prints},\n        archivePrefix = \"arXiv\",\n        eprint = {1802.03426},\n        primaryClass = \"stat.ML\",\n        keywords = {Statistics - Machine Learning,\n                    Computer Science - Computational Geometry,\n                    Computer Science - Learning},\n        year = 2018,\n        month = feb,\n   }\n\nIf you found the Nature Primer introduction useful please cite the following reference:\n\n.. code:: bibtex\n\n    @article{Healy2024,\n      author={Healy, John\n      and McInnes, Leland},\n      title={Uniform manifold approximation and projection},\n      journal={Nature Reviews Methods Primers},\n      year={2024},\n      month={Nov},\n      day={21},\n      volume={4},\n      number={1},\n      pages={82},\n      abstract={Uniform manifold approximation and projection is a nonlinear dimension reduction method often used for visualizing data and as pre-processing for further machine-learning tasks such as clustering. In this Primer, we provide an introduction to the uniform manifold approximation and projection algorithm, the intuitions behind how it works, how best to apply it on data and how to interpret and understand results.},\n      issn={2662-8449},\n      doi={10.1038/s43586-024-00363-x},\n      url={https://doi.org/10.1038/s43586-024-00363-x}\n    }\n\nAdditionally, if you use the densMAP algorithm in your work please cite the following reference:\n\n.. code:: bibtex\n\n    @article {NBC2020,\n        author = {Narayan, Ashwin and Berger, Bonnie and Cho, Hyunghoon},\n        title = {Assessing Single-Cell Transcriptomic Variability through Density-Preserving Data Visualization},\n        journal = {Nature Biotechnology},\n        year = {2021},\n        doi = {10.1038/s41587-020-00801-7},\n        publisher = {Springer Nature},\n        URL = {https://doi.org/10.1038/s41587-020-00801-7},\n        eprint = {https://www.biorxiv.org/content/early/2020/05/14/2020.05.12.077776.full.pdf},\n    }\n\nIf you use the Parametric UMAP algorithm in your work please cite the following reference:\n\n.. code:: bibtex\n\n    @article {SMG2020,\n        author = {Sainburg, Tim and McInnes, Leland and Gentner, Timothy Q.},\n        title = {Parametric UMAP: learning embeddings with deep neural networks for representation and semi-supervised learning},\n        journal = {ArXiv e-prints},\n        archivePrefix = \"arXiv\",\n        eprint = {2009.12981},\n        primaryClass = \"stat.ML\",\n        keywords = {Statistics - Machine Learning,\n                    Computer Science - Computational Geometry,\n                    Computer Science - Learning},\n        year = 2020,\n        }\n\n\n-------\nLicense\n-------\n\nThe umap package is 3-clause BSD licensed.\n\nWe would like to note that the umap package makes heavy use of\nNumFOCUS sponsored projects, and would not be possible without\ntheir support of those projects, so please `consider contributing to NumFOCUS \u003chttps://www.numfocus.org/membership\u003e`_.\n\n------------\nContributing\n------------\n\nContributions are more than welcome! There are lots of opportunities\nfor potential projects, so please get in touch if you would like to\nhelp out. Everything from code to notebooks to\nexamples and documentation are all *equally valuable* so please don't feel\nyou can't contribute. To contribute please\n`fork the project \u003chttps://github.com/lmcinnes/umap/issues#fork-destination-box\u003e`_\nmake your changes and\nsubmit a pull request. We will do our best to work through any issues with\nyou and get your code merged into the main branch.\n\n\n","funding_links":[],"categories":["Python","Dimensionality Reduction","数据可视化","Linear Algebra / Statistics Toolkit","其他_机器学习与深度学习","Uncategorized","Machine Learning","Python Packages"],"sub_categories":["Tensor Similarity \u0026 Dimension Reduction","Uncategorized","Unsupervised"],"project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Flmcinnes%2Fumap","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Flmcinnes%2Fumap","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Flmcinnes%2Fumap/lists"}