{"id":36418783,"url":"https://github.com/ks-shim/klay","last_synced_at":"2026-01-11T17:01:41.860Z","repository":{"id":56627629,"uuid":"169530455","full_name":"ks-shim/klay","owner":"ks-shim","description":"KLAY - Korean Language AnalYzer (한국어 형태소 분석기)","archived":false,"fork":false,"pushed_at":"2025-08-11T23:35:27.000Z","size":66461,"stargazers_count":19,"open_issues_count":0,"forks_count":1,"subscribers_count":8,"default_branch":"master","last_synced_at":"2025-08-12T01:24:48.532Z","etag":null,"topics":["analyzer","klay","komoran","korean","language","morphology"],"latest_commit_sha":null,"homepage":"","language":"Java","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"apache-2.0","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/ks-shim.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":"SECURITY.md","support":null}},"created_at":"2019-02-07T06:45:45.000Z","updated_at":"2025-08-11T23:36:03.000Z","dependencies_parsed_at":"2023-02-10T13:32:01.061Z","dependency_job_id":null,"html_url":"https://github.com/ks-shim/klay","commit_stats":null,"previous_names":[],"tags_count":6,"template":false,"template_full_name":null,"purl":"pkg:github/ks-shim/klay","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/ks-shim%2Fklay","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/ks-shim%2Fklay/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/ks-shim%2Fklay/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/ks-shim%2Fklay/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/ks-shim","download_url":"https://codeload.github.com/ks-shim/klay/tar.gz/refs/heads/master","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/ks-shim%2Fklay/sbom","scorecard":{"id":571092,"data":{"date":"2025-08-11","repo":{"name":"github.com/ks-shim/klay","commit":"ea7626f5ebfe6b2bb5d311225df0c621f6f38599"},"scorecard":{"version":"v5.2.1-40-gf6ed084d","commit":"f6ed084d17c9236477efd66e5b258b9d4cc7b389"},"score":2.2,"checks":[{"name":"Dangerous-Workflow","score":-1,"reason":"no workflows found","details":null,"documentation":{"short":"Determines if the project's GitHub Action workflows avoid dangerous patterns.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#dangerous-workflow"}},{"name":"Packaging","score":-1,"reason":"packaging workflow not detected","details":["Warn: no GitHub/GitLab publishing workflow detected."],"documentation":{"short":"Determines if the project is published as a package that others can easily download, install, easily update, and uninstall.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#packaging"}},{"name":"Maintained","score":1,"reason":"2 commit(s) and 0 issue activity found in the last 90 days -- score normalized to 1","details":null,"documentation":{"short":"Determines if the project is \"actively maintained\".","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#maintained"}},{"name":"Code-Review","score":0,"reason":"Found 0/28 approved changesets -- score normalized to 0","details":null,"documentation":{"short":"Determines if the project requires human code review before pull requests (aka merge requests) are merged.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#code-review"}},{"name":"Token-Permissions","score":-1,"reason":"No tokens found","details":null,"documentation":{"short":"Determines if the project's workflows follow the principle of least privilege.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#token-permissions"}},{"name":"Security-Policy","score":4,"reason":"security policy file detected","details":["Info: security policy file detected: SECURITY.md:1","Warn: no linked content found","Info: Found disclosure, vulnerability, and/or timelines in security policy: SECURITY.md:1","Info: Found text in security policy: SECURITY.md:1"],"documentation":{"short":"Determines if the project has published a security policy.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#security-policy"}},{"name":"Binary-Artifacts","score":10,"reason":"no binaries found in the repo","details":null,"documentation":{"short":"Determines if the project has generated executable (binary) artifacts in the source repository.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#binary-artifacts"}},{"name":"CII-Best-Practices","score":0,"reason":"no effort to earn an OpenSSF best practices badge detected","details":null,"documentation":{"short":"Determines if the project has an OpenSSF (formerly CII) Best Practices Badge.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#cii-best-practices"}},{"name":"Fuzzing","score":0,"reason":"project is not fuzzed","details":["Warn: no fuzzer integrations found"],"documentation":{"short":"Determines if the project uses fuzzing.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#fuzzing"}},{"name":"License","score":10,"reason":"license file detected","details":["Info: project has a license file: LICENSE:0","Info: FSF or OSI recognized license: Apache License 2.0: LICENSE:0"],"documentation":{"short":"Determines if the project has defined a license.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#license"}},{"name":"Signed-Releases","score":-1,"reason":"no releases found","details":null,"documentation":{"short":"Determines if the project cryptographically signs release artifacts.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#signed-releases"}},{"name":"Branch-Protection","score":0,"reason":"branch protection not enabled on development/release branches","details":["Warn: branch protection not enabled for branch 'master'"],"documentation":{"short":"Determines if the default and release branches are protected with GitHub's branch protection settings.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#branch-protection"}},{"name":"Pinned-Dependencies","score":-1,"reason":"no dependencies found","details":null,"documentation":{"short":"Determines if the project has declared and pinned the dependencies of its build process.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#pinned-dependencies"}},{"name":"SAST","score":0,"reason":"SAST tool is not run on all commits -- score normalized to 0","details":["Warn: 0 commits out of 2 are checked with a SAST tool"],"documentation":{"short":"Determines if the project uses static code analysis.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#sast"}},{"name":"Vulnerabilities","score":0,"reason":"32 existing vulnerabilities detected","details":["Warn: Project is vulnerable to: GHSA-5mg8-w23w-74h3","Warn: Project is vulnerable to: GHSA-7g45-4rm6-3mm3","Warn: Project is vulnerable to: GHSA-mvr2-9pj6-7w5j","Warn: Project is vulnerable to: GHSA-h46c-h94j-95f3","Warn: Project is vulnerable to: GHSA-wf8f-6423-gfxg","Warn: Project is vulnerable to: GHSA-xmc8-26q4-qjhx","Warn: Project is vulnerable to: GHSA-7rjr-3q55-vv33","Warn: Project is vulnerable to: GHSA-8489-44mv-ggj8","Warn: Project is vulnerable to: GHSA-jfh8-c2jp-5v3q","Warn: Project is vulnerable to: GHSA-p6xc-xr62-6r2g","Warn: Project is vulnerable to: GHSA-vwqq-5vrc-xw9h","Warn: Project is vulnerable to: GHSA-2cqf-6xv9-f22w","Warn: Project is vulnerable to: GHSA-2hjr-vmf3-xwvp","Warn: Project is vulnerable to: GHSA-3393-hvrj-w7v3","Warn: Project is vulnerable to: GHSA-5v8f-xx9m-wj44","Warn: Project is vulnerable to: GHSA-62ww-4p3p-7fhj","Warn: Project is vulnerable to: GHSA-c77j-p484-h84m","Warn: Project is vulnerable to: GHSA-ccmr-qj26-845g","Warn: Project is vulnerable to: GHSA-cqgv-256r-m9r8","Warn: Project is vulnerable to: GHSA-fj32-6v7m-57pg","Warn: Project is vulnerable to: GHSA-g9fw-9x87-rmrj","Warn: Project is vulnerable to: GHSA-hr65-qq6p-87r4","Warn: Project is vulnerable to: GHSA-jgx4-7v3v-vwfm","Warn: Project is vulnerable to: GHSA-jqm6-m3j3-8gg9","Warn: Project is vulnerable to: GHSA-3mc7-4q67-w48m","Warn: Project is vulnerable to: GHSA-98wm-3w3q-mw94","Warn: Project is vulnerable to: GHSA-9w3m-gqgf-c4p9","Warn: Project is vulnerable to: GHSA-c4r9-r8fh-9vj2","Warn: Project is vulnerable to: GHSA-hhhw-99gj-p3c3","Warn: Project is vulnerable to: GHSA-mjmj-j48q-9wg2","Warn: Project is vulnerable to: GHSA-rvwf-54qp-4r6v","Warn: Project is vulnerable to: GHSA-w37g-rhq8-7m4j"],"documentation":{"short":"Determines if the project has open, known unfixed vulnerabilities.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#vulnerabilities"}}]},"last_synced_at":"2025-08-20T16:22:48.112Z","repository_id":56627629,"created_at":"2025-08-20T16:22:48.112Z","updated_at":"2025-08-20T16:22:48.112Z"},"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":286080680,"owners_count":28314259,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2026-01-11T14:58:17.114Z","status":"ssl_error","status_checked_at":"2026-01-11T14:55:53.580Z","response_time":60,"last_error":"SSL_read: unexpected eof while reading","robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":false,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["analyzer","klay","komoran","korean","language","morphology"],"created_at":"2026-01-11T17:01:41.208Z","updated_at":"2026-01-11T17:01:41.852Z","avatar_url":"https://github.com/ks-shim.png","language":"Java","funding_links":[],"categories":[],"sub_categories":[],"readme":"# 1. KLAY\n[![Build Status](https://github.com/ks-shim/klay/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/ks-shim/klay/actions/workflows/pages/pages-build-deployment)\n[![Coverage Status](https://coveralls.io/repos/github/ks-shim/klay/badge.svg?branch=master)](https://coveralls.io/github/ks-shim/klay?branch=master)\n\n**K**orean **L**anguage **A**nal**Y**zer using KOMORAN's dictionaries.\n- korean morphology analysis\n- 한국어 형태소 분석기 입니다.\n- 목표\n  * 좀 더 빠른 분석 속도\n  * 좀 더 자바스럽게 ...\n  * 품질 유지 (추후 품질 개선 계획) \n- 개발 시작일 : 2019. 02 ~\n  - version : 0.1 (2019.02.26)\n  - version : 0.3 (2019.03.18)\n  - version : 0.3.2 (2022.01.18)\n  - version : 0.3.6 (2022.09.07) \u003c-- current\n- KOMORAN의 사전을 기반으로 분석하며, 사용하는 Data structure와 분석 방식은 상이합니다.\n- Data Structure : KLAY의 분석 방식에 맞게 수정한 Lucene의 Trie를 사용합니다.\n- KLAY is a thread-safe analyzer. (멀티 쓰레드 환경에서의 사용을 권장합니다.)\n\n\n# 2. Architecture\nPerformance와 동시에 확장성을 고려하였으며 Readability에 많은 신경을 썼습니다. 그래서 조금 더 자바(Java)스럽게 Design하였습니다.\n## 2-1. Tokenization\nChain of Responsibiility 패턴을 사용하여 구현하였습니다. ChainedTokenizationRule 인터페이스를 구현하여 Rule을 쉽게\n추가할 수 있습니다. 현재는 아래와 같은 Rule을 순차적으로 적용하고 있습니다.\n - UserDictionaryMatchRule : 사용자 사전에 매칭하는 Rule\n - CharacterTypeAndLengthLimitRule : 문자타입 및 길이 제한 Rule\n\n![tokenization_diagram](data/image/tokenization_diagram.png)\n\n## 2-2. Analysis\n마찬가지로 Chain of Responsibility 패턴을 사용하여 구현하였습니다. ChainedAnalysisRule 인터페이스를 구현하여 Rule을 쉽게\n추가할 수 있습니다. 현재는 아래와 같은 Rule을 순차적으로 적용하고 있습니다.\n - CanSkipRule : 분석없이 생략할 수 있는 Rule\n - FWDRule : 기분석 사전으로 Fully 매칭하는 Rule\n - AllPossibleCandidateRule : 미등록어 추정 Rule\n - NARule : 분석 불가 Rule\n\nHMM(Viterbi)는 MorphSequence 클래스를 사용하여 계산되어집니다.\n\n![analysis_diagram](data/image/analysis_diagram.png)\n\n## 2-3. Dictionary\nLucene의 Trie를 변형하여 적용하였습니다.\n\n![dictionary_diagram](data/image/dictionary_diagram.png)\n\n# 3. Example\n```java\n    //***********************************************************************\n    // 1. configuration and creating Klay object ...\n    //***********************************************************************\n    Klay klay = new Klay(Paths.get(\"data/configuration/klay.conf\"));\n\n    //***********************************************************************\n    // 2. start morphological analysis.\n    //***********************************************************************\n    String text = \"너무기대안하고갔나....................재밌게봤다\";\n    Morphs morphs = klay.doKlay(text);\n\n    //***********************************************************************\n    // 3. print result.\n    //***********************************************************************\n    Iterator\u003cMorph\u003e iter = morphs.iterator();\n    while(iter.hasNext()) {\n        System.out.println(iter.next());\n    }\n```\n# 4. Performance\n## 4-1. 사양 및 데이터\n - 프로세서 : Intel(R) Core(TM) i7-6700K CPU @ 4.00GHz, 4008Mhz, 4코어, 8 논리 프로세서\n - 메모리 : 32.0 GB\n - 분석 데이터 위치 : data/performance/test.txt\n - 분석 데이터 건수 : 199,992 건\n## 4-2. 결과 및 코드\n - 사전 로딩 : 0.284 (s)\n - 분석 시간 : 16.815 (s)\n```java\n    String src = \"data/performance/test.txt\";\n    Klay klay = new Klay(Paths.get(\"data/configuration/klay.conf\"));\n\n    StopWatch watch = new StopWatch();\n    watch.start();\n    int count = 0;\n    try (BufferedReader in = new BufferedReader(new FileReader(src))) {\n        String line = null;\n        while((line = in.readLine()) != null) {\n            line = line.trim();\n            if(line.isEmpty()) continue;\n\n            klay.doKlay(line);\n            System.out.print(\"\\r\" + ++count);\n        }\n    }\n    watch.stop();\n    System.out.println(\"Analysis Time : \" + watch.getTime(TimeUnit.MILLISECONDS) / 1000.0 + \" (s)\");\n```\n\n# 5. Elasticsearch Plugin Download\n - Download : https://github.com/ks-shim/klay-es-plugin\n \n# 6. Resources Download\n - Dictionary : https://github.com/ks-shim/klay-resources/tree/master/dictionary\n - Configuration : https://github.com/ks-shim/klay-resources/tree/master/configuration\n \n# 7. KLAY for python \n - https://github.com/ks-shim/klay4py\n - 개발중 ...\n \n# 8. Maven\n```\n\u003cdependency\u003e\n  \u003cgroupId\u003eio.github.ks-shim.klay\u003c/groupId\u003e\n  \u003cartifactId\u003eklay-common\u003c/artifactId\u003e\n  \u003cversion\u003e0.3.8\u003c/version\u003e\n\u003c/dependency\u003e\n```\n```\n\u003cdependency\u003e\n  \u003cgroupId\u003eio.github.ks-shim.klay\u003c/groupId\u003e\n  \u003cartifactId\u003eklay-dictionary\u003c/artifactId\u003e\n  \u003cversion\u003e0.3.8\u003c/version\u003e\n\u003c/dependency\u003e\n```\n```\n\u003cdependency\u003e\n  \u003cgroupId\u003eio.github.ks-shim.klay\u003c/groupId\u003e\n  \u003cartifactId\u003eklay-core\u003c/artifactId\u003e\n  \u003cversion\u003e0.3.8\u003c/version\u003e\n\u003c/dependency\u003e\n```\n```\n\u003crepositories\u003e\n  \u003crepository\u003e\n      \u003cid\u003eoos\u003c/id\u003e\n      \u003curl\u003ehttps://s01.oss.sonatype.org/content/groups/public/\u003c/url\u003e\n  \u003c/repository\u003e\n\u003c/repositories\u003e\n```\n# 9. Dictionary build\n- dictionary-build 모듈 : klay.dictionary.build.DictionaryBuilder 실행\n```java\npublic static void main(String[] args) throws Exception {\n\n    // 1. 사전에 환경설정 파일의 Raw 사전 정보를 변경합니다.\n    Properties config = new Properties();\n    config.load(Files.newInputStream(Paths.get(\"data/configuration/klay.conf\")));\n\n    // 2. 관측확률/전이확률에 사용한 pos-frequency 정보를 읽어들입니다.\n    DictionaryTextSource posFreqSource = new DictionaryTextSource(Paths.get(config.getProperty(\"dictionary.grammar.path\")));\n\n    // 3. 관측확률 사전의 소스/타겟 정보를 생성합니다.\n    DictionaryTextSource[] emissionSources = {\n            // *** must build DIC_WORD first !!\n            new DictionaryTextSource(\n                    Paths.get(config.getProperty(\"dictionary.word.path\")), DictionaryTextSource.DictionaryType.DIC_WORD),\n            new DictionaryTextSource(\n                    Paths.get(config.getProperty(\"dictionary.irregular.path\")), DictionaryTextSource.DictionaryType.DIC_IRREGULAR)\n    };\n    DictionaryBinaryTarget emissionTarget =\n            new DictionaryBinaryTarget(Paths.get(config.getProperty(\"dictionary.emission.path\")));\n\n    // 4. 전이확률 사전의 소스/타켓 정보를 생성합니다.\n    DictionaryTextSource transitionSource =\n            new DictionaryTextSource(\n                    Paths.get(config.getProperty(\"dictionary.grammar.path\")), DictionaryTextSource.DictionaryType.GRAMMAR);\n    DictionaryBinaryTarget transitionTarget =\n            new DictionaryBinaryTarget(Paths.get(config.getProperty(\"dictionary.transition.path\")));\n\n    // 5. 빌더를 생성하고 빌딩을 시작합니다.\n    DictionaryBuilder builder = new DictionaryBuilder.Builder()\n            .posFreqSource(posFreqSource)\n            .emissionSourcesAndTarget(emissionSources, emissionTarget)\n            .transitionSourceAndTarget(transitionSource, transitionTarget)\n            .build();\n\n    builder.buildAll();\n}\n```\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fks-shim%2Fklay","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fks-shim%2Fklay","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fks-shim%2Fklay/lists"}