{"id":21322294,"url":"https://github.com/lupino/simhash","last_synced_at":"2025-03-15T23:11:58.256Z","repository":{"id":45699202,"uuid":"514126722","full_name":"Lupino/simhash","owner":"Lupino","description":"htm.core SimHash Runner","archived":false,"fork":false,"pushed_at":"2024-04-19T14:57:31.000Z","size":148,"stargazers_count":0,"open_issues_count":0,"forks_count":1,"subscribers_count":2,"default_branch":"main","last_synced_at":"2025-01-22T11:48:11.979Z","etag":null,"topics":[],"latest_commit_sha":null,"homepage":null,"language":"Haskell","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"bsd-3-clause","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/Lupino.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null}},"created_at":"2022-07-15T03:52:57.000Z","updated_at":"2023-10-23T12:24:53.000Z","dependencies_parsed_at":"2023-02-09T02:01:20.188Z","dependency_job_id":"9ce4b027-ffa8-4e17-8c56-f73d540e6ed1","html_url":"https://github.com/Lupino/simhash","commit_stats":null,"previous_names":[],"tags_count":0,"template":false,"template_full_name":null,"repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/Lupino%2Fsimhash","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/Lupino%2Fsimhash/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/Lupino%2Fsimhash/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/Lupino%2Fsimhash/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/Lupino","download_url":"https://codeload.github.com/Lupino/simhash/tar.gz/refs/heads/main","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":243801681,"owners_count":20350108,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":[],"created_at":"2024-11-21T20:14:25.727Z","updated_at":"2025-03-15T23:11:58.223Z","avatar_url":"https://github.com/Lupino.png","language":"Haskell","readme":"# simhash\n\nhtm.core SimHash Runner\n\n# Build\n\nRecommend build `simhash` with [`stack`](https://docs.haskellstack.org/en/stable/README/)\n\n    git clone https://github.com/Lupino/simhash.git\n    cd simhash\n    git submodule update --init\n    stack build\n    stack install --local-bin-path bin\n\n# Usage\n\n    $ ./bin/simhash-runner --help\n    Usage: simhash-runner [-f|--file FILE] COMMAND [--version]\n      SimHash Runner\n\n    Available options:\n      -f,--file FILE           SimHash model file\n      --version                Print version information\n      -h,--help                Show this help text\n\n    Available commands:\n      train                    Train simhash model\n      test                     Test a string\n      infer                    Run infer task\n      infer-learn              Run infer learn task\n      v2-train                 Train simhash model v2\n      v2-test                  Test a string v2\n      v2-infer                 Run infer task v2\n      v2-infer-learn           Run infer learn task v2\n\n\n# Train and valid Sample format\n\n    label, string\n\n\n# V1\n\n## Train\n\n    $ ./bin/simhash-runner -f sample/v1-sample train -d sample/train_data.txt -t sample/valid_data.txt\n    Train iters 6/6 100.0%\n    Train Spent 0s\n    Train Finished in 0s\n    Total Spent 0s\n    Test iters 6/6 100.0%\n    Test score 66.66%\n    Test Spent 0s\n    Test Finished in 0s\n    Total Spent 0s\n\n## Test\n\n    $ ./bin/simhash-runner -f sample/v1-sample test -s 'test data 1'\n    [(\"label1\",0.5322839697942492),(\"label2\",0.46771603691875463)]\n\n\n## Deploy Infer\n\n    $ ./bin/simhash-runner -f sample/v1-sample infer -H tcp://127.0.0.1:5000 -n v1-sample -w 10 -s 5\n\n\n## Deploy Infer Learn\n\n    $ ./bin/simhash-runner -f sample/v1-sample infer-learn -H tcp://127.0.0.1:5000 -n v1-sample -w 10\n\n# V2\n\n## Train\n\n    $ ./bin/simhash-runner -f sample/v2-sample v2-train -d sample/train_data.txt -t sample/valid_data.txt\n    Train iters 6/6 100.0%\n    Train Spent 0s\n    Train Finished in 0s\n    Total Spent 0s\n    Test iters 6/6 100.0%\n    Test score 66.66%\n    Test Spent 0s\n    Test Finished in 0s\n    Total Spent 0s\n\n## Test\n\n    $ ./bin/simhash-runner -f sample/v2-sample v2-test -s 'test data 1'\n    [(\"label1\",0.5322839697942492),(\"label2\",0.46771603691875463)]\n\n\n## Deploy Infer\n\n    $ ./bin/simhash-runner -f sample/v2-sample v2-infer -H tcp://127.0.0.1:5000 -n v1-sample -w 10 -s 5\n\n\n## Deploy Infer Learn\n\n    $ ./bin/simhash-runner -f sample/v2-sample v2-infer-learn -H tcp://127.0.0.1:5000 -n v1-sample -w 10\n\n## Custom model options\n\nOption file name is `path/to/model_name.opts.yml`\n\n\n    column_size: 1600\n    encoder:\n      size: 600\n      sparsity: 0.2\n      token_similarity: true\n","funding_links":[],"categories":[],"sub_categories":[],"project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Flupino%2Fsimhash","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Flupino%2Fsimhash","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Flupino%2Fsimhash/lists"}