{"id":17498407,"url":"https://github.com/simonepri/text-tokenizers-colab","last_synced_at":"2025-10-09T00:37:06.164Z","repository":{"id":66099009,"uuid":"255945825","full_name":"simonepri/text-tokenizers-colab","owner":"simonepri","description":"🔪 Tokenize text on the fly on Colab.","archived":false,"fork":false,"pushed_at":"2020-04-18T12:33:34.000Z","size":24,"stargazers_count":3,"open_issues_count":0,"forks_count":0,"subscribers_count":2,"default_branch":"master","last_synced_at":"2025-03-29T17:24:54.449Z","etag":null,"topics":["colab-notebook","machine-learning","text","tokenization"],"latest_commit_sha":null,"homepage":null,"language":"Jupyter Notebook","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"mit","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/simonepri.png","metadata":{"files":{"readme":"readme.md","changelog":null,"contributing":null,"funding":null,"license":"license","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null}},"created_at":"2020-04-15T14:41:14.000Z","updated_at":"2020-04-18T12:33:36.000Z","dependencies_parsed_at":"2023-04-12T09:23:14.968Z","dependency_job_id":null,"html_url":"https://github.com/simonepri/text-tokenizers-colab","commit_stats":null,"previous_names":[],"tags_count":0,"template":false,"template_full_name":null,"purl":"pkg:github/simonepri/text-tokenizers-colab","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/simonepri%2Ftext-tokenizers-colab","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/simonepri%2Ftext-tokenizers-colab/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/simonepri%2Ftext-tokenizers-colab/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/simonepri%2Ftext-tokenizers-colab/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/simonepri","download_url":"https://codeload.github.com/simonepri/text-tokenizers-colab/tar.gz/refs/heads/master","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/simonepri%2Ftext-tokenizers-colab/sbom","scorecard":null,"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":279000639,"owners_count":26082879,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","status":"online","status_checked_at":"2025-10-08T02:00:06.501Z","response_time":56,"last_error":null,"robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":true,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["colab-notebook","machine-learning","text","tokenization"],"created_at":"2024-10-19T16:58:02.613Z","updated_at":"2025-10-09T00:37:06.138Z","avatar_url":"https://github.com/simonepri.png","language":"Jupyter Notebook","funding_links":[],"categories":[],"sub_categories":[],"readme":"\u003ch1 align=\"center\"\u003e\n  \u003cb\u003etext-tokenizers-colab\u003c/b\u003e\n\u003c/h1\u003e\n\u003cp align=\"center\"\u003e\n  🔪 Tokenize text on the fly on Colab.\n\u003c/p\u003e\n\n## Synopsis\n\nTokenization is the task of splitting a text into meaningful segments, called tokens.\nThis repository contains python notebooks to run some text tokenizers for quick experimentation purposes.\nJust click on one of the links in the list below and run the notebook.\n\nDo you believe that this is *useful*?\nHas it *saved you time*?\nOr maybe you simply *like it*?  \nIf so, [support this work with a Star ⭐️][start].\n\n## Notebooks\n - Hugging Face's Transformers Library Tokenizers - [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)][colab:transformers]\n - Explosion AI spaCy Library Tokenizers - [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)][colab:spacy]\n\n\n## Authors\n\n- **Simone Primarosa** - [simonepri][github:simonepri]\n\nSee also the list of [contributors][contributors] who participated in this project.\n\n\n## License\n\nThis project is licensed under the MIT License - see the [license][license] file for details.\n\n\n\n\u003c!-- Links --\u003e\n\n[start]: https://github.com/simonepri/text-tokenizers-colab#start-of-content\n[license]: https://github.com/simonepri/text-tokenizers-colab/tree/master/license\n[contributors]: https://github.com/simonepri/text-tokenizers-colab/contributors\n\n[github:simonepri]: https://github.com/simonepri\n\n[colab:transformers]: https://colab.research.google.com/github/simonepri/text-tokenizers-colab/blob/master/transformers-tokenizers.ipynb\n[colab:spacy]: https://colab.research.google.com/github/simonepri/text-tokenizers-colab/blob/master/spacy-tokenizers.ipynb\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fsimonepri%2Ftext-tokenizers-colab","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fsimonepri%2Ftext-tokenizers-colab","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fsimonepri%2Ftext-tokenizers-colab/lists"}