{"id":28138140,"url":"https://github.com/jleahred/indent_tokenizer","last_synced_at":"2026-04-29T01:31:54.719Z","repository":{"id":62440832,"uuid":"88198354","full_name":"jleahred/indent_tokenizer","owner":"jleahred","description":"Separate tokens based on indentation","archived":false,"fork":false,"pushed_at":"2018-01-29T19:55:17.000Z","size":27,"stargazers_count":1,"open_issues_count":0,"forks_count":1,"subscribers_count":1,"default_branch":"master","last_synced_at":"2025-08-21T21:33:39.387Z","etag":null,"topics":["indentation","processing","text","tokenizer"],"latest_commit_sha":null,"homepage":null,"language":"Rust","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"lgpl-3.0","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/jleahred.png","metadata":{"files":{"readme":"README.adoc","changelog":null,"contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null}},"created_at":"2017-04-13T19:16:06.000Z","updated_at":"2020-07-20T20:18:21.000Z","dependencies_parsed_at":"2022-11-01T21:53:42.314Z","dependency_job_id":null,"html_url":"https://github.com/jleahred/indent_tokenizer","commit_stats":null,"previous_names":[],"tags_count":0,"template":false,"template_full_name":null,"purl":"pkg:github/jleahred/indent_tokenizer","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/jleahred%2Findent_tokenizer","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/jleahred%2Findent_tokenizer/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/jleahred%2Findent_tokenizer/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/jleahred%2Findent_tokenizer/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/jleahred","download_url":"https://codeload.github.com/jleahred/indent_tokenizer/tar.gz/refs/heads/master","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/jleahred%2Findent_tokenizer/sbom","scorecard":{"id":523131,"data":{"date":"2025-08-11","repo":{"name":"github.com/jleahred/indent_tokenizer","commit":"10de433b6387d222ee00e79252938c529342f0b8"},"scorecard":{"version":"v5.2.1-40-gf6ed084d","commit":"f6ed084d17c9236477efd66e5b258b9d4cc7b389"},"score":3,"checks":[{"name":"Code-Review","score":0,"reason":"Found 1/16 approved changesets -- score normalized to 0","details":null,"documentation":{"short":"Determines if the project requires human code review before pull requests (aka merge requests) are merged.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#code-review"}},{"name":"Dangerous-Workflow","score":-1,"reason":"no workflows found","details":null,"documentation":{"short":"Determines if the project's GitHub Action workflows avoid dangerous patterns.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#dangerous-workflow"}},{"name":"Maintained","score":0,"reason":"0 commit(s) and 0 issue activity found in the last 90 days -- score normalized to 0","details":null,"documentation":{"short":"Determines if the project is \"actively maintained\".","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#maintained"}},{"name":"Packaging","score":-1,"reason":"packaging workflow not detected","details":["Warn: no GitHub/GitLab publishing workflow detected."],"documentation":{"short":"Determines if the project is published as a package that others can easily download, install, easily update, and uninstall.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#packaging"}},{"name":"Binary-Artifacts","score":10,"reason":"no binaries found in the repo","details":null,"documentation":{"short":"Determines if the project has generated executable (binary) artifacts in the source repository.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#binary-artifacts"}},{"name":"Pinned-Dependencies","score":-1,"reason":"no dependencies found","details":null,"documentation":{"short":"Determines if the project has declared and pinned the dependencies of its build process.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#pinned-dependencies"}},{"name":"Token-Permissions","score":-1,"reason":"No tokens found","details":null,"documentation":{"short":"Determines if the project's workflows follow the principle of least privilege.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#token-permissions"}},{"name":"CII-Best-Practices","score":0,"reason":"no effort to earn an OpenSSF best practices badge detected","details":null,"documentation":{"short":"Determines if the project has an OpenSSF (formerly CII) Best Practices Badge.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#cii-best-practices"}},{"name":"Security-Policy","score":0,"reason":"security policy file not detected","details":["Warn: no security policy file detected","Warn: no security file to analyze","Warn: no security file to analyze","Warn: no security file to analyze"],"documentation":{"short":"Determines if the project has published a security policy.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#security-policy"}},{"name":"Fuzzing","score":0,"reason":"project is not fuzzed","details":["Warn: no fuzzer integrations found"],"documentation":{"short":"Determines if the project uses fuzzing.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#fuzzing"}},{"name":"License","score":10,"reason":"license file detected","details":["Info: project has a license file: LICENSE:0","Info: FSF or OSI recognized license: GNU Lesser General Public License v3.0: LICENSE:0"],"documentation":{"short":"Determines if the project has defined a license.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#license"}},{"name":"Signed-Releases","score":-1,"reason":"no releases found","details":null,"documentation":{"short":"Determines if the project cryptographically signs release artifacts.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#signed-releases"}},{"name":"Vulnerabilities","score":10,"reason":"0 existing vulnerabilities detected","details":null,"documentation":{"short":"Determines if the project has open, known unfixed vulnerabilities.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#vulnerabilities"}},{"name":"Branch-Protection","score":0,"reason":"branch protection not enabled on development/release branches","details":["Warn: branch protection not enabled for branch 'master'"],"documentation":{"short":"Determines if the default and release branches are protected with GitHub's branch protection settings.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#branch-protection"}},{"name":"SAST","score":0,"reason":"SAST tool is not run on all commits -- score normalized to 0","details":["Warn: 0 commits out of 2 are checked with a SAST tool"],"documentation":{"short":"Determines if the project uses static code analysis.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#sast"}}]},"last_synced_at":"2025-08-20T03:35:13.945Z","repository_id":62440832,"created_at":"2025-08-20T03:35:13.945Z","updated_at":"2025-08-20T03:35:13.945Z"},"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":286080680,"owners_count":32407164,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2026-04-28T19:38:08.556Z","status":"ssl_error","status_checked_at":"2026-04-28T19:37:55.688Z","response_time":56,"last_error":"SSL_read: unexpected eof while reading","robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":false,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["indentation","processing","text","tokenizer"],"created_at":"2025-05-14T17:13:30.022Z","updated_at":"2026-04-29T01:31:54.680Z","avatar_url":"https://github.com/jleahred.png","language":"Rust","funding_links":[],"categories":[],"sub_categories":[],"readme":"= Identation tokenizer\n\nA small an simple indentation tokenizer\n\nA similar project is https://github.com/jleahred/indentation_flattener[indentation_flattener]. \nIn this case we flatten the input adding PUSH_INDENT, and POP_INDENT.\nThis looks better for PEG grammars.\n\n\n\n== Usage\n\nAdd to `cargo.toml`\n[source, toml]\n----\n[dependencies]\nindent_tokenizer = \"0.2.0\"\n----\n\nSee example below\n\n\n\n== Modifs\n\n0.2.0:: Removed general types (as String, u32 or usize) +\nUsing concrete types (new types)\n\n\n== Indentation format\n\nTabs are no valid on indentation grouping.\n\n\nLet's see by example.\n\n\n.Simple valid input\n----\n.....\n...\n    ....\n        ....\n        ....\n    ....\n    ....\n....\n....\n    ....\n----\n\n\nIndentation groups can have any number of spaces\n\n.Valid indentation different spaces\n----\n.....               level0\n  ....              level1  \u003c--\n        ....        level2\n  ....              level1  \u003c--\n  ....              level1  \u003c--\n....                level0\n....                level0\n      ....          level1  \u003c--\n----\n\nIt's not a good idea to have same level with different spaces, but it's\nallowed when you are creating a new level.\n\nIn this example, last level1 is idented with more spaces than previus ones\n\n\n\n\n.Invalid indentation\n----\n.....\n...\n    ....\n        ....\n       ....     \u003c--  incorrect indentation\n    ....        \u003c--  correct previous ident level\n    ....\n....\n....\n    ....\n----\n\nIn order to go back a level, the indentation has to match with\nthe previous on this level.\n\nAs we saw in previous example, increasing level is free indentation.\n\n\n.Start line indicator\n----\n|.....\n    |.....\n    |......\n        |......\n----\n\nYou can start lines with `|`, but it's optional.\n\n\n.Indentation indicator is optional\n----\n.....\n    |.....\n     ......\n     ......\n        ......\n----\n\nLook that `|` is one position previous to indentation level.\n\n\n\nIt is usefull when you need to start with spaces.\n\n\n.I want to start a line with spaces\n----\n.....\n    | .....     \u003c- This line starts with an space\n    |  ......   \u003c- Starting with 2 spaces\n    |.....      \u003c- starts with no spaces\n     .....      \u003c- starting with no spaces\n     ...        \u003c- starting with no spaces\n----\n\n\n.My line starts with a `|`\n----\n.....\n    ||.....     This line starts with a `|`\n    |......     This one starts with `.`\n----\n\n\nA line is empty when there are no content or it only has spaces.\n\n\n.Empty lines\n----\n.....\n    .....\n    .....\n    .....\n    .....     next line is empty\n\n    .....     next line is empty\n\n.....\n.....         next line is empty\n\n----\n\nWhat if I want represent empty lines?\n\n.Representing empty lines\n----\n.....\n    .....\n    .....\n    .....\n    .....     I want new line after this line\n   |\n\n    .....     and three new lines, please\n   |\n   |\n   |\n\n----\n\nWhat if I want to represent spaces at end of line?\n\nSpaces at end of line will not be erased, therefore, you don't need to do anything about it.\n\nBut could be intesting to represent it because some editors can run trailing or\njust because you can visualize it.\n\n\n.Representing spaces at end line\n----\n.....\n    .....\n    .....\n    .....\n    This line keeps 2 spaces and end  |\n    and you know it\n\n    Next line is properly indented and only has spaces\n   |   |\n\n----\n\n\nIn fact, you can write `|` at end of all lines. It will be removed.\n\nNext strings, are equivalent.\n\n.`|` it's optional at end of line\n----\n.....|\n    .....|\n    .....|\n    .....|\n\n\n.....\n    .....\n    .....\n    .....\n\n----\n\n\n\nBut I could need a pipe `|` at end of line\n\n.pipe at end of line\n----\n.....\n    .....\n    .....\n    .....\n    This line ends with a pipe||\n\n----\n\n\n.Pitfall\n----\n|.....\n.....   \u003c- Invalid, remember, indentation mark | is previus to real indentation\n\n\n|.....\n .....   \u003c- This is OK, but not elegant\n\n\n| ....   \u003c- I want to start with an space\n|.....   \u003c- This is redundant, but more clear\n \n----\n\n\n== Tokens\n\n* Each change of leven represent an end of token.\n* An empty line, is used to separate tokens on same level\n* A token contain lines and a list of tokens\n\n\n.Tokens\n----\nThis is the first token\n    This is another token, because it's on a different level\n        And another token\n    This is also a different token\n\nA token can contain\nmultiple lines\n    This is another token\n    with three\n    lines\n\nEmpty lines can be used to\nseparate tokens\n    This is a token,\n    that continues\n    here. Next empty line define\n    a token division\n\n    And this is a different one\n    with a couple of lines\n----\n\n\n\n\n== Identation tokenizer API\n\nVersion 0.2 removed general types as String, usize, u32...\n\nInstead, it's created an specific type on each context.\n\n\nConcrete types::\n[source, rust]\n----\n#[derive(Debug, PartialEq, Copy, Clone)]\npub struct LineNum(u32);\n\n#[derive(Debug, PartialEq, Clone, Eq)]\npub struct SLine(String);\n----\n\n* LineNum to represent the line number\n* SLine to respresent the line string\n\nInternally, the system uses more new types as NSpaces to represent number of spaces\n\n\n\n\nFunction to call::\n[source, rust]\n----\npub fn tokenize(input: \u0026str) -\u003e Result\u003cVec\u003cToken\u003e, Error\u003e \n----\n\n\nToken type::\n[source, rust]\n----\n#[derive(Debug, PartialEq)]\npub struct Token {\n    pub lines: Vec\u003cSLine\u003e,\n    pub tokens: Vec\u003cToken\u003e,\n}\n----\n\n\nError type::\n[source, rust]\n----\n#[derive(Debug, PartialEq)]\npub struct Error {\n    pub line: LineNum,\n    pub desc: String,\n}\n----\n\n\nThats all\n\n\nLook into lib.rs to see the api and tests.rs to se examples\n\n\n== Examples\n\nYou can look into tests.rs, there are several tests.\n\n\n.Complex example\n[source, rust]\n----\n    let tokens = tokenize(\"\n0\n    || 01a\n     01b\n     01c\n\n     02a\n     02b\n\n        |020a\n        ||020b\n\n        |  021a\n        |021b\n1a\n1b\n    11a\n   ||11b\n    11c\n\n    12a  ||\n   |12b  ||\n2a\n    21a\n    21b\n   |\n   |\n\n\")\n----\n\nThe result will be \n\n[source, rust]\n----\n   vec![Token {\n            lines: vec![SLine::from(\"0\")],\n            tokens: vec![Token {\n                            lines: vec![SLine::from(\"| 01a\"),\n                                        SLine::from(\"01b\"),\n                                        SLine::from(\"01c\")],\n                            tokens: vec![],\n                        },\n                        Token {\n                            lines: vec![SLine::from(\"02a\"), SLine::from(\"02b\")],\n                            tokens: vec![Token {\n                                            lines: vec![SLine::from(\"020a\"),\n                                                        SLine::from(\"|020b\")],\n                                            tokens: vec![],\n                                        },\n                                        Token {\n                                            lines: vec![SLine::from(\"  021a\"),\n                                                        SLine::from(\"021b\")],\n                                            tokens: vec![],\n                                        }],\n                        }],\n        },\n        Token {\n            lines: vec![SLine::from(\"1a\"), SLine::from(\"1b\")],\n            tokens: vec![Token {\n                            lines: vec![SLine::from(\"11a\"),\n                                        SLine::from(\"|11b\"),\n                                        SLine::from(\"11c\")],\n                            tokens: vec![],\n                        },\n                        Token {\n                            lines: vec![SLine::from(\"12a  |\"), SLine::from(\"12b  |\")],\n                            tokens: vec![],\n                        }],\n        },\n        Token {\n            lines: vec![SLine::from(\"2a\")],\n            tokens: vec![Token {\n                            lines: vec![SLine::from(\"21a\"),\n                                        SLine::from(\"21b\"),\n                                        SLine::from(\"\"),\n                                        SLine::from(\"\")],\n                            tokens: vec![],\n                        }],\n        }];\n----\n\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fjleahred%2Findent_tokenizer","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fjleahred%2Findent_tokenizer","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fjleahred%2Findent_tokenizer/lists"}