{"id":13536069,"url":"https://github.com/speed/newcrawler","last_synced_at":"2025-04-02T02:32:32.676Z","repository":{"id":87800339,"uuid":"47316027","full_name":"speed/newcrawler","owner":"speed","description":"Free Web Scraping Tool with Java","archived":false,"fork":false,"pushed_at":"2023-11-25T09:09:47.000Z","size":150824,"stargazers_count":584,"open_issues_count":25,"forks_count":115,"subscribers_count":31,"default_branch":"master","last_synced_at":"2024-02-14T23:39:40.133Z","etag":null,"topics":["crawler","docker","scraping","spider"],"latest_commit_sha":null,"homepage":"http://www.newcrawler.com","language":"JavaScript","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":null,"status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/speed.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":null,"code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null}},"created_at":"2015-12-03T07:37:35.000Z","updated_at":"2024-02-13T06:24:35.000Z","dependencies_parsed_at":"2024-01-13T10:42:16.337Z","dependency_job_id":"7536f3d5-93ca-40ab-ab6a-cfa02ecc3f39","html_url":"https://github.com/speed/newcrawler","commit_stats":null,"previous_names":[],"tags_count":2,"template":false,"template_full_name":null,"repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/speed%2Fnewcrawler","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/speed%2Fnewcrawler/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/speed%2Fnewcrawler/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/speed%2Fnewcrawler/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/speed","download_url":"https://codeload.github.com/speed/newcrawler/tar.gz/refs/heads/master","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":222795273,"owners_count":17038797,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["crawler","docker","scraping","spider"],"created_at":"2024-08-01T09:00:34.357Z","updated_at":"2024-11-03T01:30:28.466Z","avatar_url":"https://github.com/speed.png","language":"JavaScript","funding_links":[],"categories":["JavaScript"],"sub_categories":[],"readme":"\n\n\u003c!-- http://shields.io/--\u003e\n\nNewCrawler\n=========================\n\n    Free Web Scraping Tool\n    \n    \nNewCrawler Quick Start\n==============\n\n\u003ewww.newcrawler.com\n\nLinux\n----\n\nInstalling software packages on Centos / Fedora servers:\n\n\u003ex86\n\n\u003ecurl -fsSL https://raw.githubusercontent.com/speed/newcrawler/master/install_i586.sh | sh\n\n\u003ex64\n\n\u003ecurl -fsSL https://raw.githubusercontent.com/speed/newcrawler/master/install_x86_64.sh | sh\n\n\nInstalling software packages on Ubuntu / Debian servers:\n\n\u003ex86\n\n\u003ecurl -fsSL https://raw.githubusercontent.com/speed/newcrawler/master/install_Debian_i586.sh | sh\n\n\u003ex64\n\n\u003ecurl -fsSL https://raw.githubusercontent.com/speed/newcrawler/master/install_Debian_x86_64.sh | sh\n\n\nInstalling NewCrawler and Chrome software packages on Centos / Fedora servers:\n\n\u003ex86\n\n\u003ecurl -fsSL https://raw.githubusercontent.com/speed/newcrawler/master/install_NewCrawler_Chrome_MySQL_x86_64.sh | sh\n\n\n\n\n\n\t\t# OS Version 、 NewCrawler Directory\n\t\t\n\t\t[root@localhost ~]# rpm -q centos-release\n\t\tcentos-release-7-0.1406.el7.centos.2.5.x86_64\n\n\t\t[root@localhost ~]# ls\n\t\tinstall.sh  newcrawler\n\n\t\t[root@localhost ~]# ls newcrawler\n\t\tdb  jetty  jre  phantomjs  start.sh  stop.sh  war\n\nModify the database to MySQL or use the default file database\n\n\t#edit 'war/WEB-INF/classes/datanucleus.properties'\n\t\n\tjavax.jdo.option.ConnectionURL=jdbc:mysql://127.0.0.1:3306/newcrawler?characterEncoding=UTF-8\n\tjavax.jdo.option.ConnectionUserName=root\n\tjavax.jdo.option.ConnectionPassword=123456\n\t\nWindows\n----\n\n\u003ex86\n\n\u003ehttps://github.com/speed/windows-32bit-jetty-jre\n\n\u003ex64\n\n\u003ehttps://github.com/speed/windows-64bit-jetty-jre\n\n\n\nGoogle App Engine\n----\n\n\u003ehttps://github.com/speed/newcrawler-gae-shell\n\n\nDocker\n----\n\n\u003edocker pull newcrawler/spider\n\n\u003edocker run -itd -p --net=host 8500:8500 --name=newcrawler newcrawler/spider\n\n\u003edocker logs -f newcrawler\n\nDocker aliyun\n----\n\n\u003edocker run -itd -p --net=host 8500:8500 --name=newcrawler registry.cn-shenzhen.aliyuncs.com/speed/spider\n\n\t\nStartup NewCrawler\n----\n\n\u003esh newcrawler/start.sh \u0026\n\nhttp://127.0.0.1:8500 \n\n\nShutdown NewCrawler\n----\n\n\u003esh newcrawler/stop.sh\n\nUpgrade NewCrawler\n----\n\n\u003esh newcrawler/upgrade.sh\n\nInstall Chrome\n----\nhttps://github.com/speed/selenium\n\n[![ScreenShot](https://raw.githubusercontent.com/speed/resources/master/images/NewCrawler_Video.jpg)](http://www.newcrawler.com/demo.html)\n\n\n\nNewCrawler Cluster\n=========================\n\n![ScreenShot](https://raw.githubusercontent.com/speed/resources/master/images/NewCrawler%20Cluster2.png)\n\n\n\n\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fspeed%2Fnewcrawler","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fspeed%2Fnewcrawler","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fspeed%2Fnewcrawler/lists"}