{"id":13414833,"url":"https://github.com/SmileXie/zhihu_crawler","last_synced_at":"2025-03-14T22:32:16.549Z","repository":{"id":27441400,"uuid":"30919591","full_name":"SmileXie/zhihu_crawler","owner":"SmileXie","description":"Crawler of zhihu.com","archived":false,"fork":false,"pushed_at":"2017-04-20T07:22:46.000Z","size":77,"stargazers_count":268,"open_issues_count":0,"forks_count":139,"subscribers_count":40,"default_branch":"master","last_synced_at":"2024-07-31T21:53:16.499Z","etag":null,"topics":[],"latest_commit_sha":null,"homepage":"","language":"Python","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"mit","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/SmileXie.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null}},"created_at":"2015-02-17T14:07:56.000Z","updated_at":"2024-04-23T02:43:45.000Z","dependencies_parsed_at":"2022-07-18T21:34:43.536Z","dependency_job_id":null,"html_url":"https://github.com/SmileXie/zhihu_crawler","commit_stats":null,"previous_names":[],"tags_count":0,"template":false,"template_full_name":null,"repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/SmileXie%2Fzhihu_crawler","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/SmileXie%2Fzhihu_crawler/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/SmileXie%2Fzhihu_crawler/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/SmileXie%2Fzhihu_crawler/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/SmileXie","download_url":"https://codeload.github.com/SmileXie/zhihu_crawler/tar.gz/refs/heads/master","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":243658055,"owners_count":20326459,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":[],"created_at":"2024-07-30T21:00:37.532Z","updated_at":"2025-03-14T22:32:16.279Z","avatar_url":"https://github.com/SmileXie.png","language":"Python","funding_links":[],"categories":["Crawler"],"sub_categories":[],"readme":"小趴趴--知乎版\n================================\n对知乎精华回答的爬虫收集与分析。\n\n* 20160502：近日知乎登录添加了验证码机制，当前的代码已无法实现自动登录知乎。可以修改代码通过保存cookie的方式登录知乎，再开始爬虫。\n\n## 算法简述\n* 收集范围：知乎各话题下的精华回答。\n* 爬虫算法：\n* 以[根话题的话题树](https://www.zhihu.com/topic/19776749/organize/entire)为启始，按广度优先遍历各子话题，深度为3。\n![目录树](https://raw.githubusercontent.com/SmileXie/zhihu_crawler/master/images/topic_tree.png)\n  * 各话题下的精华回答，按页遍历，例如从 https://www.zhihu.com/topic/19776749/top-answers?page=1\n遍历到\nhttps://www.zhihu.com/topic/19776749/top-answers?page=50\n解析各精华回答\n* 解析精华回答的各项属性，包括：\n  * 精华回答的点赞数，答案长度；\n  * 答题用户的id，获得的点赞数，地区，性别，学历，学校，专业等信息\n\n## 统计结果\n* 统计结果请见：[http://www.jianshu.com/p/6d53b34165d2](http://www.jianshu.com/p/6d53b34165d2)\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2FSmileXie%2Fzhihu_crawler","html_url":"https://awesome.ecosyste.ms/projects/github.com%2FSmileXie%2Fzhihu_crawler","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2FSmileXie%2Fzhihu_crawler/lists"}