{"id":13676039,"url":"https://github.com/polm/fugashi","last_synced_at":"2026-02-01T15:39:15.731Z","repository":{"id":35163581,"uuid":"214947510","full_name":"polm/fugashi","owner":"polm","description":"A Cython MeCab wrapper for fast, pythonic Japanese tokenization and morphological analysis.","archived":false,"fork":false,"pushed_at":"2025-10-24T07:22:49.000Z","size":501,"stargazers_count":487,"open_issues_count":10,"forks_count":39,"subscribers_count":6,"default_branch":"main","last_synced_at":"2025-11-27T18:09:59.158Z","etag":null,"topics":["cython-wrapper","japanese","mecab","nlp","tokenizer"],"latest_commit_sha":null,"homepage":"","language":"C++","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"mit","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/polm.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":".github/FUNDING.yml","license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":"CITATION.cff","codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null,"zenodo":null,"notice":null,"maintainers":null,"copyright":null,"agents":null,"dco":null,"cla":null},"funding":{"github":"polm"}},"created_at":"2019-10-14T04:24:52.000Z","updated_at":"2025-11-27T02:08:25.000Z","dependencies_parsed_at":"2024-04-15T13:38:17.868Z","dependency_job_id":"34a8aa19-0228-4596-bc08-a7bebe67da77","html_url":"https://github.com/polm/fugashi","commit_stats":{"total_commits":255,"total_committers":10,"mean_commits":25.5,"dds":0.05490196078431375,"last_synced_commit":"57e4c28d2caa9893778316a1f038df18eb92ffae"},"previous_names":[],"tags_count":79,"template":false,"template_full_name":null,"purl":"pkg:github/polm/fugashi","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/polm%2Ffugashi","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/polm%2Ffugashi/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/polm%2Ffugashi/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/polm%2Ffugashi/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/polm","download_url":"https://codeload.github.com/polm/fugashi/tar.gz/refs/heads/main","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/polm%2Ffugashi/sbom","scorecard":{"id":740252,"data":{"date":"2025-08-11","repo":{"name":"github.com/polm/fugashi","commit":"71d0b97392d5dc5fe431c1b3d5fea195ed63d70d"},"scorecard":{"version":"v5.2.1-40-gf6ed084d","commit":"f6ed084d17c9236477efd66e5b258b9d4cc7b389"},"score":4.2,"checks":[{"name":"Maintained","score":6,"reason":"6 commit(s) and 2 issue activity found in the last 90 days -- score normalized to 6","details":null,"documentation":{"short":"Determines if the project is \"actively maintained\".","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#maintained"}},{"name":"Dangerous-Workflow","score":10,"reason":"no dangerous workflow patterns detected","details":null,"documentation":{"short":"Determines if the project's GitHub Action workflows avoid dangerous patterns.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#dangerous-workflow"}},{"name":"Binary-Artifacts","score":10,"reason":"no binaries found in the repo","details":null,"documentation":{"short":"Determines if the project has generated executable (binary) artifacts in the source repository.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#binary-artifacts"}},{"name":"Packaging","score":-1,"reason":"packaging workflow not detected","details":["Warn: no GitHub/GitLab publishing workflow detected."],"documentation":{"short":"Determines if the project is published as a package that others can easily download, install, easily update, and uninstall.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#packaging"}},{"name":"Code-Review","score":2,"reason":"Found 8/30 approved changesets -- score normalized to 2","details":null,"documentation":{"short":"Determines if the project requires human code review before pull requests (aka merge requests) are merged.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#code-review"}},{"name":"Token-Permissions","score":0,"reason":"detected GitHub workflow tokens with excessive permissions","details":["Warn: no topLevel permission defined: .github/workflows/manylinux1.yml:1","Warn: no topLevel permission defined: .github/workflows/osx.yml:1","Warn: no topLevel permission defined: .github/workflows/test_manylinux.yml:1","Warn: no topLevel permission defined: .github/workflows/windows.yml:1","Info: no jobLevel write permissions found"],"documentation":{"short":"Determines if the project's workflows follow the principle of least privilege.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#token-permissions"}},{"name":"CII-Best-Practices","score":0,"reason":"no effort to earn an OpenSSF best practices badge detected","details":null,"documentation":{"short":"Determines if the project has an OpenSSF (formerly CII) Best Practices Badge.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#cii-best-practices"}},{"name":"Security-Policy","score":0,"reason":"security policy file not detected","details":["Warn: no security policy file detected","Warn: no security file to analyze","Warn: no security file to analyze","Warn: no security file to analyze"],"documentation":{"short":"Determines if the project has published a security policy.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#security-policy"}},{"name":"Fuzzing","score":0,"reason":"project is not fuzzed","details":["Warn: no fuzzer integrations found"],"documentation":{"short":"Determines if the project uses fuzzing.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#fuzzing"}},{"name":"Vulnerabilities","score":10,"reason":"0 existing vulnerabilities detected","details":null,"documentation":{"short":"Determines if the project has open, known unfixed vulnerabilities.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#vulnerabilities"}},{"name":"License","score":10,"reason":"license file detected","details":["Info: project has a license file: LICENSE:0","Info: FSF or OSI recognized license: MIT License: LICENSE:0"],"documentation":{"short":"Determines if the project has defined a license.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#license"}},{"name":"Branch-Protection","score":0,"reason":"branch protection not enabled on development/release branches","details":["Warn: branch protection not enabled for branch 'main'"],"documentation":{"short":"Determines if the default and release branches are protected with GitHub's branch protection settings.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#branch-protection"}},{"name":"Signed-Releases","score":-1,"reason":"no releases found","details":null,"documentation":{"short":"Determines if the project cryptographically signs release artifacts.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#signed-releases"}},{"name":"Pinned-Dependencies","score":0,"reason":"dependency not pinned by hash detected -- score normalized to 0","details":["Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/manylinux1.yml:9: update your workflow using https://app.stepsecurity.io/secureworkflow/polm/fugashi/manylinux1.yml/main?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/manylinux1.yml:11: update your workflow using https://app.stepsecurity.io/secureworkflow/polm/fugashi/manylinux1.yml/main?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/manylinux1.yml:38: update your workflow using https://app.stepsecurity.io/secureworkflow/polm/fugashi/manylinux1.yml/main?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/manylinux1.yml:40: update your workflow using https://app.stepsecurity.io/secureworkflow/polm/fugashi/manylinux1.yml/main?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/manylinux1.yml:46: update your workflow using https://app.stepsecurity.io/secureworkflow/polm/fugashi/manylinux1.yml/main?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/manylinux1.yml:65: update your workflow using https://app.stepsecurity.io/secureworkflow/polm/fugashi/manylinux1.yml/main?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/manylinux1.yml:67: update your workflow using https://app.stepsecurity.io/secureworkflow/polm/fugashi/manylinux1.yml/main?enable=pin","Warn: third-party GitHubAction not pinned by hash: .github/workflows/manylinux1.yml:72: update your workflow using https://app.stepsecurity.io/secureworkflow/polm/fugashi/manylinux1.yml/main?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/manylinux1.yml:75: update your workflow using https://app.stepsecurity.io/secureworkflow/polm/fugashi/manylinux1.yml/main?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/osx.yml:12: update your workflow using https://app.stepsecurity.io/secureworkflow/polm/fugashi/osx.yml/main?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/osx.yml:14: update your workflow using https://app.stepsecurity.io/secureworkflow/polm/fugashi/osx.yml/main?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/osx.yml:23: update your workflow using https://app.stepsecurity.io/secureworkflow/polm/fugashi/osx.yml/main?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/test_manylinux.yml:31: update your workflow using https://app.stepsecurity.io/secureworkflow/polm/fugashi/test_manylinux.yml/main?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/windows.yml:34: update your workflow using https://app.stepsecurity.io/secureworkflow/polm/fugashi/windows.yml/main?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/windows.yml:36: update your workflow using https://app.stepsecurity.io/secureworkflow/polm/fugashi/windows.yml/main?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/windows.yml:41: update your workflow using https://app.stepsecurity.io/secureworkflow/polm/fugashi/windows.yml/main?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/windows.yml:64: update your workflow using https://app.stepsecurity.io/secureworkflow/polm/fugashi/windows.yml/main?enable=pin","Warn: containerImage not pinned by hash: Dockerfile:1: pin your Docker image by updating quay.io/pypa/manylinux2014_x86_64 to quay.io/pypa/manylinux2014_x86_64@sha256:265279207916439ab59f696e168b579f789d1b247aa7654efa31b75416adaefa","Warn: pipCommand not pinned by hash: .github/macos-build.sh:51","Warn: pipCommand not pinned by hash: .github/macos-build.sh:52","Warn: pipCommand not pinned by hash: .github/workflows/manylinux1.yml:87","Warn: pipCommand not pinned by hash: .github/workflows/manylinux1.yml:88","Warn: pipCommand not pinned by hash: .github/workflows/manylinux1.yml:24","Warn: pipCommand not pinned by hash: .github/workflows/manylinux1.yml:25","Warn: pipCommand not pinned by hash: .github/workflows/manylinux1.yml:58","Warn: pipCommand not pinned by hash: .github/workflows/manylinux1.yml:59","Warn: pipCommand not pinned by hash: .github/workflows/osx.yml:33","Warn: pipCommand not pinned by hash: .github/workflows/windows.yml:73","Warn: pipCommand not pinned by hash: .github/workflows/windows.yml:81","Info:   0 out of  16 GitHub-owned GitHubAction dependencies pinned","Info:   0 out of   1 third-party GitHubAction dependencies pinned","Info:   0 out of   1 containerImage dependencies pinned","Info:   0 out of  11 pipCommand dependencies pinned"],"documentation":{"short":"Determines if the project has declared and pinned the dependencies of its build process.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#pinned-dependencies"}},{"name":"SAST","score":0,"reason":"SAST tool is not run on all commits -- score normalized to 0","details":["Warn: 0 commits out of 10 are checked with a SAST tool"],"documentation":{"short":"Determines if the project uses static code analysis.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#sast"}}]},"last_synced_at":"2025-08-22T17:13:32.825Z","repository_id":35163581,"created_at":"2025-08-22T17:13:32.825Z","updated_at":"2025-08-22T17:13:32.825Z"},"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":286080680,"owners_count":28981356,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2026-02-01T15:35:50.179Z","status":"ssl_error","status_checked_at":"2026-02-01T15:35:38.075Z","response_time":56,"last_error":"SSL_connect returned=1 errno=0 peeraddr=140.82.121.6:443 state=error: unexpected eof while reading","robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":false,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["cython-wrapper","japanese","mecab","nlp","tokenizer"],"created_at":"2024-08-02T13:00:16.796Z","updated_at":"2026-02-01T15:39:15.711Z","avatar_url":"https://github.com/polm.png","language":"C++","funding_links":["https://github.com/sponsors/polm"],"categories":["C++"],"sub_categories":[],"readme":"[![Open in Streamlit](https://static.streamlit.io/badges/streamlit_badge_black_white.svg)](https://fugashi.streamlit.app)\n[![Current PyPI packages](https://badge.fury.io/py/fugashi.svg)](https://pypi.org/project/fugashi/)\n![Test Status](https://github.com/polm/fugashi/workflows/test-manylinux/badge.svg)\n[![PyPI - Downloads](https://img.shields.io/pypi/dm/fugashi)](https://pypi.org/project/fugashi/)\n![Supported Platforms](https://img.shields.io/badge/platforms-linux%20macosx%20windows-blue)\n\n# fugashi\n\n\u003cimg src=\"https://github.com/polm/fugashi/raw/main/fugashi.png\" width=125 height=125 alt=\"fugashi by Irasutoya\" /\u003e\n\nfugashi is a Cython wrapper for [MeCab](https://taku910.github.io/mecab/), a\nJapanese tokenizer and morphological analysis tool.  Wheels are provided for\nLinux, OSX (Intel), and Win64, and UniDic is [easy to install](#installing-a-dictionary).\n\n**issueを英語で書く必要はありません。**\n\nCheck out the [interactive demo][], see the [blog post](https://www.dampfkraft.com/nlp/fugashi.html) for background\non why fugashi exists and some of the design decisions, or see [this\nguide][guide] for a basic introduction to Japanese tokenization.\n\n[guide]: https://www.dampfkraft.com/nlp/how-to-tokenize-japanese.html\n[interactive demo]: https://fugashi.streamlit.app\n\nIf you are on a platform for which wheels are not provided, you'll need to\ninstall MeCab first. It's recommended you install [from\nsource](https://github.com/taku910/mecab). If you need to build from source on\nWindows, [@chezou's fork](https://github.com/chezou/mecab) is recommended; see\n[issue #44](https://github.com/polm/fugashi/issues/44#issuecomment-954426115)\nfor an explanation of the problems with the official repo.\n\nKnown platforms without wheels:\n\n- musl-based distros like alpine [#77](https://github.com/polm/fugashi/issues/77)\n- PowerPC\n- Windows 32bit\n\n## Usage\n\n```python\nfrom fugashi import Tagger\n\ntagger = Tagger('-Owakati')\ntext = \"麩菓子は、麩を主材料とした日本の菓子。\"\ntagger.parse(text)\n# =\u003e '麩 菓子 は 、 麩 を 主材 料 と し た 日本 の 菓子 。'\nfor word in tagger(text):\n    print(word, word.feature.lemma, word.pos, sep='\\t')\n    # \"feature\" is the Unidic feature data as a named tuple\n```\n\n## Installing a Dictionary\n\nfugashi requires a dictionary. [UniDic](https://unidic.ninjal.ac.jp/) is\nrecommended, and two easy-to-install versions are provided.\n\n  - [unidic-lite](https://github.com/polm/unidic-lite), a slightly modified version 2.1.2 of Unidic (from 2013) that's relatively small\n  - [unidic](https://github.com/polm/unidic-py), the latest UniDic 3.1.0, which is 770MB on disk and requires a separate download step\n\nIf you just want to make sure things work you can start with `unidic-lite`, but\nfor more serious processing `unidic` is recommended. For production use you'll\ngenerally want to generate your own dictionary too; for details see the [MeCab\ndocumentation](https://taku910.github.io/mecab/learn.html).\n\nTo get either of these dictionaries, you can install them directly using `pip`\nor do the below:\n\n```sh\npip install 'fugashi[unidic-lite]'\n\n# The full version of UniDic requires a separate download step\npip install 'fugashi[unidic]'\npython -m unidic download\n```\n\nFor more information on the different MeCab dictionaries available, see [this article](https://www.dampfkraft.com/nlp/japanese-tokenizer-dictionaries.html).\n\n## Dictionary Use\n\nfugashi is written with the assumption you'll use Unidic to process Japanese,\nbut it supports arbitrary dictionaries. \n\nIf you're using a dictionary besides Unidic you can use the GenericTagger like this:\n\n```python\nfrom fugashi import GenericTagger\ntagger = GenericTagger()\n\n# parse can be used as normal\ntagger.parse('something')\n# features from the dictionary can be accessed by field numbers\nfor word in tagger(text):\n    print(word.surface, word.feature[0])\n```\n\nYou can also create a dictionary wrapper to get feature information as a named tuple. \n\n```python\nfrom fugashi import GenericTagger, create_feature_wrapper\nCustomFeatures = create_feature_wrapper('CustomFeatures', 'alpha beta gamma')\ntagger = GenericTagger(wrapper=CustomFeatures)\nfor word in tagger.parseToNodeList(text):\n    print(word.surface, word.feature.alpha)\n```\n\n## Citation\n\nIf you use fugashi in research, it would be appreciated if you cite this paper. You can read it at [the ACL Anthology](https://www.aclweb.org/anthology/2020.nlposs-1.7/) or [on Arxiv](https://arxiv.org/abs/2010.06858).\n\n    @inproceedings{mccann-2020-fugashi,\n        title = \"fugashi, a Tool for Tokenizing {J}apanese in Python\",\n        author = \"McCann, Paul\",\n        booktitle = \"Proceedings of Second Workshop for NLP Open Source Software (NLP-OSS)\",\n        month = nov,\n        year = \"2020\",\n        address = \"Online\",\n        publisher = \"Association for Computational Linguistics\",\n        url = \"https://www.aclweb.org/anthology/2020.nlposs-1.7\",\n        pages = \"44--51\",\n        abstract = \"Recent years have seen an increase in the number of large-scale multilingual NLP projects. However, even in such projects, languages with special processing requirements are often excluded. One such language is Japanese. Japanese is written without spaces, tokenization is non-trivial, and while high quality open source tokenizers exist they can be hard to use and lack English documentation. This paper introduces fugashi, a MeCab wrapper for Python, and gives an introduction to tokenizing Japanese.\",\n    }\n\n## Alternatives\n\nIf you have a problem with fugashi feel free to open an issue. However, there\nare some cases where it might be better to use a different library.\n\n- If you don't want to deal with installing MeCab at all, try [SudachiPy](https://github.com/WorksApplications/sudachi.rs).\n- If you need to work with Korean, try [pymecab-ko](https://github.com/NoUnique/pymecab-ko) or [KoNLPy](https://konlpy.org/en/latest/).\n\n## License and Copyright Notice\n\nfugashi is released under the terms of the [MIT license](./LICENSE). Please\ncopy it far and wide.\n\nfugashi is a wrapper for MeCab, and fugashi wheels include MeCab binaries.\nMeCab is copyrighted free software by Taku Kudo `\u003ctaku@chasen.org\u003e` and Nippon\nTelegraph and Telephone Corporation, and is redistributed under the [BSD\nLicense](./LICENSE.mecab).\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fpolm%2Ffugashi","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fpolm%2Ffugashi","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fpolm%2Ffugashi/lists"}