{"id":39483055,"url":"https://github.com/soumitradev/barkov","last_synced_at":"2026-04-18T13:05:25.819Z","repository":{"id":57596464,"uuid":"372598916","full_name":"soumitradev/barkov","owner":"soumitradev","description":"A simple markov chain generator.","archived":false,"fork":false,"pushed_at":"2026-04-15T20:43:53.000Z","size":1408,"stargazers_count":2,"open_issues_count":0,"forks_count":0,"subscribers_count":1,"default_branch":"main","last_synced_at":"2026-04-15T22:32:29.367Z","etag":null,"topics":["go","golang","library","markov","markov-chain"],"latest_commit_sha":null,"homepage":"","language":"Go","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"mit","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/soumitradev.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null,"zenodo":null,"notice":null,"maintainers":null,"copyright":null,"agents":null,"dco":null,"cla":null}},"created_at":"2021-05-31T18:39:18.000Z","updated_at":"2026-04-15T20:43:58.000Z","dependencies_parsed_at":"2022-08-23T14:50:50.735Z","dependency_job_id":null,"html_url":"https://github.com/soumitradev/barkov","commit_stats":null,"previous_names":[],"tags_count":10,"template":false,"template_full_name":null,"purl":"pkg:github/soumitradev/barkov","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/soumitradev%2Fbarkov","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/soumitradev%2Fbarkov/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/soumitradev%2Fbarkov/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/soumitradev%2Fbarkov/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/soumitradev","download_url":"https://codeload.github.com/soumitradev/barkov/tar.gz/refs/heads/main","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/soumitradev%2Fbarkov/sbom","scorecard":{"id":839051,"data":{"date":"2025-08-18","repo":{"name":"github.com/soumitradev/barkov","commit":"80917effa2990f4f93e3dd8964f7d2c6aa46d50f"},"scorecard":{"version":"v5.2.1-41-g40576783","commit":"40576783fda6698350fcbbeaea760ff827433034"},"score":3,"checks":[{"name":"Dangerous-Workflow","score":-1,"reason":"no workflows found","details":null,"documentation":{"short":"Determines if the project's GitHub Action workflows avoid dangerous patterns.","url":"https://github.com/ossf/scorecard/blob/40576783fda6698350fcbbeaea760ff827433034/docs/checks.md#dangerous-workflow"}},{"name":"Binary-Artifacts","score":10,"reason":"no binaries found in the repo","details":null,"documentation":{"short":"Determines if the project has generated executable (binary) artifacts in the source repository.","url":"https://github.com/ossf/scorecard/blob/40576783fda6698350fcbbeaea760ff827433034/docs/checks.md#binary-artifacts"}},{"name":"SAST","score":0,"reason":"no SAST tool detected","details":["Warn: no pull requests merged into dev branch"],"documentation":{"short":"Determines if the project uses static code analysis.","url":"https://github.com/ossf/scorecard/blob/40576783fda6698350fcbbeaea760ff827433034/docs/checks.md#sast"}},{"name":"Token-Permissions","score":-1,"reason":"No tokens found","details":null,"documentation":{"short":"Determines if the project's workflows follow the principle of least privilege.","url":"https://github.com/ossf/scorecard/blob/40576783fda6698350fcbbeaea760ff827433034/docs/checks.md#token-permissions"}},{"name":"Maintained","score":0,"reason":"0 commit(s) and 0 issue activity found in the last 90 days -- score normalized to 0","details":null,"documentation":{"short":"Determines if the project is \"actively maintained\".","url":"https://github.com/ossf/scorecard/blob/40576783fda6698350fcbbeaea760ff827433034/docs/checks.md#maintained"}},{"name":"Packaging","score":-1,"reason":"packaging workflow not detected","details":["Warn: no GitHub/GitLab publishing workflow detected."],"documentation":{"short":"Determines if the project is published as a package that others can easily download, install, easily update, and uninstall.","url":"https://github.com/ossf/scorecard/blob/40576783fda6698350fcbbeaea760ff827433034/docs/checks.md#packaging"}},{"name":"Code-Review","score":0,"reason":"Found 0/2 approved changesets -- score normalized to 0","details":null,"documentation":{"short":"Determines if the project requires human code review before pull requests (aka merge requests) are merged.","url":"https://github.com/ossf/scorecard/blob/40576783fda6698350fcbbeaea760ff827433034/docs/checks.md#code-review"}},{"name":"Pinned-Dependencies","score":-1,"reason":"no dependencies found","details":null,"documentation":{"short":"Determines if the project has declared and pinned the dependencies of its build process.","url":"https://github.com/ossf/scorecard/blob/40576783fda6698350fcbbeaea760ff827433034/docs/checks.md#pinned-dependencies"}},{"name":"CII-Best-Practices","score":0,"reason":"no effort to earn an OpenSSF best practices badge detected","details":null,"documentation":{"short":"Determines if the project has an OpenSSF (formerly CII) Best Practices Badge.","url":"https://github.com/ossf/scorecard/blob/40576783fda6698350fcbbeaea760ff827433034/docs/checks.md#cii-best-practices"}},{"name":"Security-Policy","score":0,"reason":"security policy file not detected","details":["Warn: no security policy file detected","Warn: no security file to analyze","Warn: no security file to analyze","Warn: no security file to analyze"],"documentation":{"short":"Determines if the project has published a security policy.","url":"https://github.com/ossf/scorecard/blob/40576783fda6698350fcbbeaea760ff827433034/docs/checks.md#security-policy"}},{"name":"Fuzzing","score":0,"reason":"project is not fuzzed","details":["Warn: no fuzzer integrations found"],"documentation":{"short":"Determines if the project uses fuzzing.","url":"https://github.com/ossf/scorecard/blob/40576783fda6698350fcbbeaea760ff827433034/docs/checks.md#fuzzing"}},{"name":"License","score":10,"reason":"license file detected","details":["Info: project has a license file: LICENSE:0","Info: FSF or OSI recognized license: MIT License: LICENSE:0"],"documentation":{"short":"Determines if the project has defined a license.","url":"https://github.com/ossf/scorecard/blob/40576783fda6698350fcbbeaea760ff827433034/docs/checks.md#license"}},{"name":"Signed-Releases","score":-1,"reason":"no releases found","details":null,"documentation":{"short":"Determines if the project cryptographically signs release artifacts.","url":"https://github.com/ossf/scorecard/blob/40576783fda6698350fcbbeaea760ff827433034/docs/checks.md#signed-releases"}},{"name":"Branch-Protection","score":0,"reason":"branch protection not enabled on development/release branches","details":["Warn: branch protection not enabled for branch 'main'"],"documentation":{"short":"Determines if the default and release branches are protected with GitHub's branch protection settings.","url":"https://github.com/ossf/scorecard/blob/40576783fda6698350fcbbeaea760ff827433034/docs/checks.md#branch-protection"}},{"name":"Vulnerabilities","score":10,"reason":"0 existing vulnerabilities detected","details":null,"documentation":{"short":"Determines if the project has open, known unfixed vulnerabilities.","url":"https://github.com/ossf/scorecard/blob/40576783fda6698350fcbbeaea760ff827433034/docs/checks.md#vulnerabilities"}}]},"last_synced_at":"2025-08-23T19:57:49.580Z","repository_id":57596464,"created_at":"2025-08-23T19:57:49.581Z","updated_at":"2025-08-23T19:57:49.581Z"},"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":286080680,"owners_count":31969787,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2026-04-18T00:39:45.007Z","status":"online","status_checked_at":"2026-04-18T02:00:07.018Z","response_time":103,"last_error":null,"robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":true,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["go","golang","library","markov","markov-chain"],"created_at":"2026-01-18T05:17:20.300Z","updated_at":"2026-04-18T13:05:25.794Z","avatar_url":"https://github.com/soumitradev.png","language":"Go","funding_links":[],"categories":[],"sub_categories":[],"readme":"# Barkov\n\nA simple markov chain generator.\n\nHeavily inspired from https://github.com/jsvine/markovify.\n\n## Installation\n\n```bash\ngo get github.com/soumitradev/barkov/v2\n```\n\n## Why?\nThe reason I made this library is because the markovify library was quite slow, and it did not give me enough control over the tokenization or the validation parts of the markov chain without me having to override the existing classes, which I found very annoying. For this reason, this implementation is quite barebones, and does not come with tokenization or validation code. You can choose to tokenize your text however you want, and validate a sentence in whichever way you see fit. If you don't want to use the chain struct that I've defined, and want to use your own, fine. There's a `GenerativeChain` interface you need to satisfy that has 3 exported methods, and you can use the most useful parts of this library.\n\nSome advantages of this library over the original (in no particular order):\n- Much more memory efficient by default (doesn't store too many state variables, relies more on barebones maps and slices)\n- You don't need to override the default tokenizer, as there is no default tokenizer\n- You don't need to override the default validator, as there is no default validator\n- Implements tree pruning during markov generation, to allow for way more efficient generation\n- Uses goroutines to peform many generations at a time to allow for faster generation\n- Implements a timeout for generation functions that perform validation, allowing for bounded-time generation\n- All the useful functions are not written with some chain class in mind, but an interface, allowing for much more customizability\n\nFeatures that aren't in this library (yet):\n- Combining models\n- Exporting and importing models to/from JSON\n\n## Usage\nThis is an exhaustive example for all features of this library.\n\n```go\npackage main\n\nimport (\n\t\"fmt\"\n\t\"os\"\n\t\"strings\"\n\t\"time\"\n\n\t\"github.com/soumitradev/barkov/v2\"\n)\n\nconst STATE_SIZE = 4\nconst MAX_SENTENCE_LEN = 100\nconst TIMEOUT = 10 * time.Second\n\nfunc makeValidator(fullText string) func([]string) bool {\n\toriginal := fullText\n\t// Validator that checks if whatever was generated\n\t// so far was already in the original text, and\n\t// rejects if it is.\n\treturn func(gram []string) bool {\n\t\ttext := strings.Join(gram, \" \")\n\t\treturn !strings.Contains(original, text)\n\t}\n}\n\nfunc tokenize(messages []string) ([][]string, func([]string) bool) {\n\tcorpus := make([][]string, 0, len(messages))\n\tvar fullText strings.Builder\n\n\tfor _, message := range messages {\n\t\ttokens := strings.Split(message, \" \")\n\t\t// Filter out messages that are too long or too short\n\t\tif len(tokens) \u003c STATE_SIZE || len(tokens) \u003e MAX_SENTENCE_LEN {\n\t\t\tcontinue\n\t\t}\n\n\t\t// Filter out empty tokens that might exist due to multiple spaces\n\t\tfiltered := make([]string, 0, len(tokens))\n\t\tfor _, token := range tokens {\n\t\t\tif token == \"\" {\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\tfiltered = append(filtered, token)\n\t\t}\n\n\t\tcorpus = append(corpus, filtered)\n\t\tfullText.WriteString(strings.Join(filtered, \" \") + \"\\n\")\n\t}\n\n\treturn corpus, makeValidator(fullText.String())\n}\n\nfunc readLines(filepath string) []string {\n\tbytes, err := os.ReadFile(filepath)\n\tif err != nil {\n\t\tpanic(fmt.Sprintf(\"Error reading file at %s\", filepath))\n\t}\n\n\treturn strings.Split(string(bytes), \"\\n\")\n}\n\nfunc main() {\n\tfilepath := \"./corpus.txt\"\n\tmessages := readLines(filepath)\n\tcorpus, validator := tokenize(messages)\n\n\tfmt.Println(\"Finished building corpus and context!\")\n\tfmt.Printf(\"State Size: %d\\n\", STATE_SIZE)\n\tchain := barkov.InitChain(STATE_SIZE).Build(corpus).Compress()\n\tfmt.Println(\"Finished building and compiling markov model!\")\n\n\tfmt.Println(\"Printing 5 random sentences first:\")\n\tfor range 5 {\n\t\t// Use the threaded version of the generation function with validator and timeout\n\t\tgenerated, err := barkov.GenThreaded(chain, validator, TIMEOUT)\n\t\tif err != nil {\n\t\t\tfmt.Println(\"[ERROR]\", err)\n\t\t\tcontinue\n\t\t}\n\t\tfmt.Println(strings.Join(generated, \" \"))\n\t}\n\n\tfmt.Println(\"Printing 5 random sentences with start states:\")\n\tfor range 5 {\n\t\tstart := barkov.ConstructState([]string{\"i\", \"did\", \"not\"})\n\t\t// You can even provide a start state\n\t\tgenerated, err := barkov.GenThreadedWithStart(chain, start, validator, TIMEOUT)\n\t\tif err != nil {\n\t\t\tfmt.Println(\"[ERROR]\", err)\n\t\t\tcontinue\n\t\t}\n\t\tfmt.Println(strings.Join(generated, \" \"))\n\t}\n}\n```\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fsoumitradev%2Fbarkov","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fsoumitradev%2Fbarkov","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fsoumitradev%2Fbarkov/lists"}