{"id":15519062,"url":"https://github.com/danieldk/wordpieces","last_synced_at":"2025-10-29T17:43:05.779Z","repository":{"id":45257846,"uuid":"223381496","full_name":"danieldk/wordpieces","owner":"danieldk","description":"Split tokens into word pieces","archived":false,"fork":false,"pushed_at":"2022-10-10T14:57:20.000Z","size":34,"stargazers_count":5,"open_issues_count":0,"forks_count":0,"subscribers_count":3,"default_branch":"master","last_synced_at":"2025-05-28T11:45:17.992Z","etag":null,"topics":["piece","rust","tokenization","word","wordpiece"],"latest_commit_sha":null,"homepage":"","language":"Rust","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"other","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/danieldk.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":"LICENSE-APACHE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null}},"created_at":"2019-11-22T10:36:17.000Z","updated_at":"2023-11-14T15:43:27.000Z","dependencies_parsed_at":"2022-08-27T06:10:20.483Z","dependency_job_id":null,"html_url":"https://github.com/danieldk/wordpieces","commit_stats":null,"previous_names":[],"tags_count":8,"template":false,"template_full_name":null,"purl":"pkg:github/danieldk/wordpieces","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/danieldk%2Fwordpieces","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/danieldk%2Fwordpieces/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/danieldk%2Fwordpieces/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/danieldk%2Fwordpieces/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/danieldk","download_url":"https://codeload.github.com/danieldk/wordpieces/tar.gz/refs/heads/master","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/danieldk%2Fwordpieces/sbom","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":260945643,"owners_count":23087050,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["piece","rust","tokenization","word","wordpiece"],"created_at":"2024-10-02T10:19:56.773Z","updated_at":"2025-10-29T17:43:05.719Z","avatar_url":"https://github.com/danieldk.png","language":"Rust","funding_links":[],"categories":[],"sub_categories":[],"readme":"# wordpieces\n\nThis crate provides a subword tokenizer. A subword tokenizer splits a\ntoken into several pieces, so-called *word pieces*.  Word pieces were\npopularized by and used in the\n[BERT](https://arxiv.org/abs/1810.04805) natural language encoder.\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fdanieldk%2Fwordpieces","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fdanieldk%2Fwordpieces","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fdanieldk%2Fwordpieces/lists"}