awesome-tokenizers
  
  
    A curated list of tokenizer libraries for blazing-fast NLP processing. 
    https://github.com/nlpoptimize/awesome-tokenizers
  
        Last synced: 2 days ago 
        JSON representation
    
- 
            
🔹 **WordPiece Tokenizer Implementations**
- Tokenizers.jl
 - rust-tokenizers
 - tokenizers-cpp
 - BertTokenizers
 - bertTokenizer (Java)
 - ZhuoruLin/fast-wordpiece
 - FlashTokenizer
 - FastBertTokenizer
 - tokenizers-cpp
 - bertTokenizer (Java)
 - ZhuoruLin/fast-wordpiece
 - SeanLee97/BertWordPieceTokenizer.jl
 - BlingFire
 - huggingface_tokenizer_cpp
 - Tokenizers.jl
 - SeanLee97/BertWordPieceTokenizer.jl
 - BlingFire
 - Deep Java Library (DJL) BertTokenizer
 - tokenizers.net
 - transformers BertTokenizer
 - fast-bert-tokenizer-py
 - ml-commons/tokenizer
 - huggingface_tokenizer_cpp
 - transformers BertTokenizer
 - Deep Java Library (DJL) BertTokenizer
 - tokenizers.net
 - fast-bert-tokenizer-py
 - ml-commons/tokenizer
 
 - 
            
🔹 **BPE (Byte Pair Encoding) Implementations**
 - 
            
🔹 **SentencePiece Implementations**
 
            Categories
          
          
        
            Sub Categories
          
          
            Keywords
          
          
              
                nlp
                10
              
              
                natural-language-processing
                9
              
              
                bert
                8
              
              
                deep-learning
                6
              
              
                bpe
                6
              
              
                tensorflow
                6
              
              
                machine-learning
                6
              
              
                python
                5
              
              
                language-model
                5
              
              
                pytorch
                4
              
              
                neural-machine-translation
                4
              
              
                word-segmentation
                4
              
              
                wordpiece-tokenization
                3
              
              
                wordpiece
                3
              
              
                huggingface
                3
              
              
                transformer
                3
              
              
                natural-language-understanding
                3
              
              
                word-embeddings
                2
              
              
                sentence-embeddings
                2
              
              
                tokenizer
                2
              
              
                transfomers
                2
              
              
                ai
                2
              
              
                autograd
                2
              
              
                deep-neural-networks
                2
              
              
                djl
                2
              
              
                java
                2
              
              
                ml
                2
              
              
                mxnet
                2
              
              
                neural-network
                2
              
              
                onnxruntime
                2
              
              
                rust
                2
              
              
                pypi-package
                2
              
              
                openai
                2
              
              
                llm
                2
              
              
                byte-pair-tokenizer
                2
              
              
                byte-pair-encoding
                2
              
              
                bpe-tokenizer
                2
              
              
                tiktoken
                2
              
              
                tokenizers
                2
              
              
                tokenization
                2
              
              
                subword-units
                2
              
              
                segmentation
                2
              
              
                nmt
                2
              
              
                machine-translation
                2
              
              
                speech-recognition
                2
              
              
                seq2seq
                2
              
              
                pytorch-transformers
                2
              
              
                pretrained-models
                2
              
              
                nlp-library
                2
              
              
                model-hub
                2