{"id":19298375,"url":"https://github.com/nlpchina/word2vec_java","last_synced_at":"2025-04-12T19:41:58.049Z","repository":{"id":11205057,"uuid":"13589686","full_name":"NLPchina/Word2VEC_java","owner":"NLPchina","description":"word2vec java版本的一个实现","archived":false,"fork":false,"pushed_at":"2021-04-01T07:49:24.000Z","size":18664,"stargazers_count":697,"open_issues_count":36,"forks_count":484,"subscribers_count":96,"default_branch":"master","last_synced_at":"2025-04-03T22:08:42.918Z","etag":null,"topics":["java","word2vec-java"],"latest_commit_sha":null,"homepage":null,"language":"Java","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":null,"status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/NLPchina.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":null,"code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null}},"created_at":"2013-10-15T12:42:13.000Z","updated_at":"2025-02-26T07:03:16.000Z","dependencies_parsed_at":"2022-09-03T02:01:57.618Z","dependency_job_id":null,"html_url":"https://github.com/NLPchina/Word2VEC_java","commit_stats":null,"previous_names":[],"tags_count":0,"template":false,"template_full_name":null,"repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/NLPchina%2FWord2VEC_java","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/NLPchina%2FWord2VEC_java/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/NLPchina%2FWord2VEC_java/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/NLPchina%2FWord2VEC_java/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/NLPchina","download_url":"https://codeload.github.com/NLPchina/Word2VEC_java/tar.gz/refs/heads/master","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":248625172,"owners_count":21135510,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["java","word2vec-java"],"created_at":"2024-11-09T23:07:49.788Z","updated_at":"2025-04-12T19:41:58.025Z","avatar_url":"https://github.com/NLPchina.png","language":"Java","readme":"Word2VEC_java\n=============\n\nword2vec java版本的一个实现\n\n\n\n有人抱怨没有测试代码。我工作中用到。写了个例子正好发这里。大家领会下精神把\n\n有人抱怨没有语料 https://pan.baidu.com/s/1jIy3YSY 大家用这个吧\n\n\n````\npackage com.kuyun.document_class;\n\nimport java.io.BufferedReader;\nimport java.io.File;\nimport java.io.FileNotFoundException;\nimport java.io.FileOutputStream;\nimport java.io.IOException;\nimport java.io.UnsupportedEncodingException;\nimport java.util.List;\n\nimport org.ansj.domain.Term;\nimport org.ansj.splitWord.analysis.ToAnalysis;\n\nimport com.alibaba.fastjson.JSONObject;\nimport com.ansj.vec.Learn;\nimport com.ansj.vec.Word2VEC;\n\nimport love.cq.util.IOUtil;\nimport love.cq.util.StringUtil;\n\npublic class Word2VecTest {\n    private static final File sportCorpusFile = new File(\"corpus/result.txt\");\n\n    public static void main(String[] args) throws IOException {\n        File[] files = new File(\"corpus/sport/\").listFiles();\n        \n        //构建语料\n        try (FileOutputStream fos = new FileOutputStream(sportCorpusFile)) {\n            for (File file : files) {\n                if (file.canRead() \u0026\u0026 file.getName().endsWith(\".txt\")) {\n                    parserFile(fos, file);\n                }\n            }\n        }\n        \n        //进行分词训练\n        \n        Learn lean = new Learn() ;\n        \n        lean.learnFile(sportCorpusFile) ;\n        \n        lean.saveModel(new File(\"model/vector.mod\")) ;\n        \n        \n        \n        //加载测试\n        \n        Word2VEC w2v = new Word2VEC() ;\n        \n        w2v.loadJavaModel(\"model/vector.mod\") ;\n        \n        System.out.println(w2v.distance(\"姚明\")); ;\n\n    }\n\n    private static void parserFile(FileOutputStream fos, File file) throws FileNotFoundException,\n                                                                   IOException {\n        // TODO Auto-generated method stub\n        try (BufferedReader br = IOUtil.getReader(file.getAbsolutePath(), IOUtil.UTF8)) {\n            String temp = null;\n            JSONObject parse = null;\n            while ((temp = br.readLine()) != null) {\n                parse = JSONObject.parseObject(temp);\n                paserStr(fos, parse.getString(\"title\"));\n                paserStr(fos, StringUtil.rmHtmlTag(parse.getString(\"content\")));\n            }\n        }\n    }\n\n    private static void paserStr(FileOutputStream fos, String title) throws IOException {\n        List\u003cTerm\u003e parse2 = ToAnalysis.parse(title) ;\n        StringBuilder sb = new StringBuilder() ;\n        for (Term term : parse2) {\n            sb.append(term.getName()) ;\n            sb.append(\" \");\n        }\n        fos.write(sb.toString().getBytes()) ;\n        fos.write(\"\\n\".getBytes()) ;\n    }\n}\n\n````\n","funding_links":[],"categories":[],"sub_categories":[],"project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fnlpchina%2Fword2vec_java","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fnlpchina%2Fword2vec_java","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fnlpchina%2Fword2vec_java/lists"}