{"id":46486412,"url":"https://github.com/crawler-commons/crawler-commons","last_synced_at":"2026-03-06T09:34:12.575Z","repository":{"id":30119305,"uuid":"33669215","full_name":"crawler-commons/crawler-commons","owner":"crawler-commons","description":"A set of reusable Java components that implement functionality common to any web crawler","archived":false,"fork":false,"pushed_at":"2026-02-23T22:11:13.000Z","size":4448,"stargazers_count":253,"open_issues_count":34,"forks_count":89,"subscribers_count":31,"default_branch":"master","last_synced_at":"2026-02-23T22:47:04.732Z","etag":null,"topics":["java","library","open-source","robots-txt","robotstxt","sitemaps","web-crawler"],"latest_commit_sha":null,"homepage":"","language":"Java","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"apache-2.0","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/crawler-commons.png","metadata":{"files":{"readme":"README.md","changelog":"CHANGES.txt","contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null,"zenodo":null,"notice":null,"maintainers":null,"copyright":null,"agents":null,"dco":null,"cla":null}},"created_at":"2015-04-09T13:05:29.000Z","updated_at":"2026-02-23T14:47:09.000Z","dependencies_parsed_at":"2023-02-10T23:01:07.221Z","dependency_job_id":"3920c5eb-c365-4101-b6f6-4e63d5232e6e","html_url":"https://github.com/crawler-commons/crawler-commons","commit_stats":null,"previous_names":[],"tags_count":17,"template":false,"template_full_name":null,"purl":"pkg:github/crawler-commons/crawler-commons","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/crawler-commons%2Fcrawler-commons","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/crawler-commons%2Fcrawler-commons/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/crawler-commons%2Fcrawler-commons/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/crawler-commons%2Fcrawler-commons/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/crawler-commons","download_url":"https://codeload.github.com/crawler-commons/crawler-commons/tar.gz/refs/heads/master","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/crawler-commons%2Fcrawler-commons/sbom","scorecard":{"id":307606,"data":{"date":"2025-08-11","repo":{"name":"github.com/crawler-commons/crawler-commons","commit":"d185a090bc29d1eea697cf6ed565dc92258be2a3"},"scorecard":{"version":"v5.2.1-40-gf6ed084d","commit":"f6ed084d17c9236477efd66e5b258b9d4cc7b389"},"score":4.6,"checks":[{"name":"Packaging","score":-1,"reason":"packaging workflow not detected","details":["Warn: no GitHub/GitLab publishing workflow detected."],"documentation":{"short":"Determines if the project is published as a package that others can easily download, install, easily update, and uninstall.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#packaging"}},{"name":"Code-Review","score":2,"reason":"Found 5/19 approved changesets -- score normalized to 2","details":null,"documentation":{"short":"Determines if the project requires human code review before pull requests (aka merge requests) are merged.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#code-review"}},{"name":"Dangerous-Workflow","score":10,"reason":"no dangerous workflow patterns detected","details":null,"documentation":{"short":"Determines if the project's GitHub Action workflows avoid dangerous patterns.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#dangerous-workflow"}},{"name":"Binary-Artifacts","score":10,"reason":"no binaries found in the repo","details":null,"documentation":{"short":"Determines if the project has generated executable (binary) artifacts in the source repository.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#binary-artifacts"}},{"name":"Maintained","score":10,"reason":"30 commit(s) and 2 issue activity found in the last 90 days -- score normalized to 10","details":null,"documentation":{"short":"Determines if the project is \"actively maintained\".","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#maintained"}},{"name":"CII-Best-Practices","score":0,"reason":"no effort to earn an OpenSSF best practices badge detected","details":null,"documentation":{"short":"Determines if the project has an OpenSSF (formerly CII) Best Practices Badge.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#cii-best-practices"}},{"name":"Pinned-Dependencies","score":0,"reason":"dependency not pinned by hash detected -- score normalized to 0","details":["Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/build.yml:19: update your workflow using https://app.stepsecurity.io/secureworkflow/crawler-commons/crawler-commons/build.yml/master?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/build.yml:22: update your workflow using https://app.stepsecurity.io/secureworkflow/crawler-commons/crawler-commons/build.yml/master?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/code_coverage.yml:15: update your workflow using https://app.stepsecurity.io/secureworkflow/crawler-commons/crawler-commons/code_coverage.yml/master?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/code_coverage.yml:19: update your workflow using https://app.stepsecurity.io/secureworkflow/crawler-commons/crawler-commons/code_coverage.yml/master?enable=pin","Warn: third-party GitHubAction not pinned by hash: .github/workflows/code_coverage.yml:26: update your workflow using https://app.stepsecurity.io/secureworkflow/crawler-commons/crawler-commons/code_coverage.yml/master?enable=pin","Info:   0 out of   4 GitHub-owned GitHubAction dependencies pinned","Info:   0 out of   1 third-party GitHubAction dependencies pinned"],"documentation":{"short":"Determines if the project has declared and pinned the dependencies of its build process.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#pinned-dependencies"}},{"name":"Token-Permissions","score":0,"reason":"detected GitHub workflow tokens with excessive permissions","details":["Warn: no topLevel permission defined: .github/workflows/build.yml:1","Warn: no topLevel permission defined: .github/workflows/code_coverage.yml:1","Info: no jobLevel write permissions found"],"documentation":{"short":"Determines if the project's workflows follow the principle of least privilege.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#token-permissions"}},{"name":"Security-Policy","score":0,"reason":"security policy file not detected","details":["Warn: no security policy file detected","Warn: no security file to analyze","Warn: no security file to analyze","Warn: no security file to analyze"],"documentation":{"short":"Determines if the project has published a security policy.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#security-policy"}},{"name":"Fuzzing","score":0,"reason":"project is not fuzzed","details":["Warn: no fuzzer integrations found"],"documentation":{"short":"Determines if the project uses fuzzing.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#fuzzing"}},{"name":"License","score":10,"reason":"license file detected","details":["Info: project has a license file: LICENSE:0","Info: FSF or OSI recognized license: Apache License 2.0: LICENSE:0"],"documentation":{"short":"Determines if the project has defined a license.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#license"}},{"name":"Signed-Releases","score":-1,"reason":"no releases found","details":null,"documentation":{"short":"Determines if the project cryptographically signs release artifacts.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#signed-releases"}},{"name":"Branch-Protection","score":0,"reason":"branch protection not enabled on development/release branches","details":["Warn: branch protection not enabled for branch 'master'"],"documentation":{"short":"Determines if the default and release branches are protected with GitHub's branch protection settings.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#branch-protection"}},{"name":"Vulnerabilities","score":10,"reason":"0 existing vulnerabilities detected","details":null,"documentation":{"short":"Determines if the project has open, known unfixed vulnerabilities.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#vulnerabilities"}},{"name":"SAST","score":0,"reason":"SAST tool is not run on all commits -- score normalized to 0","details":["Warn: 0 commits out of 17 are checked with a SAST tool"],"documentation":{"short":"Determines if the project uses static code analysis.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#sast"}}]},"last_synced_at":"2025-08-17T22:28:02.890Z","repository_id":30119305,"created_at":"2025-08-17T22:28:02.890Z","updated_at":"2025-08-17T22:28:02.890Z"},"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":286080680,"owners_count":30169038,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2026-03-06T07:56:45.623Z","status":"ssl_error","status_checked_at":"2026-03-06T07:55:55.621Z","response_time":250,"last_error":"SSL_connect returned=1 errno=0 peeraddr=140.82.121.6:443 state=error: unexpected eof while reading","robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":false,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["java","library","open-source","robots-txt","robotstxt","sitemaps","web-crawler"],"created_at":"2026-03-06T09:34:12.019Z","updated_at":"2026-03-06T09:34:12.561Z","avatar_url":"https://github.com/crawler-commons.png","language":"Java","funding_links":[],"categories":[],"sub_categories":[],"readme":"[![Build Status](https://github.com/crawler-commons/crawler-commons/workflows/crawler-commons%20build/badge.svg)](https://github.com/crawler-commons/crawler-commons/actions?query=workflow%3A%22crawler-commons+build%22)\n[![license](https://img.shields.io/github/license/crawler-commons/crawler-commons.svg?maxAge=2592000?style=plastic)](http://www.apache.org/licenses/LICENSE-2.0)\n[![Coverage Status](https://coveralls.io/repos/github/crawler-commons/crawler-commons/badge.svg?branch=master)](https://coveralls.io/github/crawler-commons/crawler-commons?branch=master)\n\n# Overview\n\nCrawler-Commons is a set of reusable Java components that implement functionality common to any web crawler.  \nThese components benefit from collaboration among various existing web crawler projects, and reduce duplication of effort.\n\n# Table of Contents\n- [Documentation](#user-documentation)\n- [Mailing List](#mailing-list)\n- [Installation](#installation)\n- [News](#news)\n\n# User Documentation\n\n## Javadocs\n* [1.6](https://crawler-commons.github.io/crawler-commons/1.6/)\n* [1.5](https://crawler-commons.github.io/crawler-commons/1.5/)\n* [1.4](https://crawler-commons.github.io/crawler-commons/1.4/)\n* [1.3](https://crawler-commons.github.io/crawler-commons/1.3/)\n* [1.2](https://crawler-commons.github.io/crawler-commons/1.2/)\n* [1.1](https://crawler-commons.github.io/crawler-commons/1.1/)\n* [1.0](https://crawler-commons.github.io/crawler-commons/1.0/)\n* [0.10](https://crawler-commons.github.io/crawler-commons/0.10/)\n* [0.9](https://crawler-commons.github.io/crawler-commons/0.9/)\n* [0.8](https://crawler-commons.github.io/crawler-commons/0.8/)\n* [0.7](https://crawler-commons.github.io/crawler-commons/0.7/)\n* [0.6](https://crawler-commons.github.io/crawler-commons/0.6/apidocs/)\n\n# Mailing List\n\nThere is a mailing list on [Google Groups](https://groups.google.com/forum/?fromgroups#!forum/crawler-commons).\n\n# Installation\n\nUsing Maven, add the following dependency to your pom.xml:\n~~~xml\n\u003cdependency\u003e\n    \u003cgroupId\u003ecom.github.crawler-commons\u003c/groupId\u003e\n    \u003cartifactId\u003ecrawler-commons\u003c/artifactId\u003e\n    \u003cversion\u003e1.6\u003c/version\u003e\n\u003c/dependency\u003e\n~~~\n\nUsing Gradle, add the folling to your build file:\n```groovy\ndependencies {\n    implementation group: 'com.github.crawler-commons', name: 'crawler-commons', version: '1.6'\n}\n```\n\n# News\n\n## 4th December 2025 – crawler-commons 1.6 released\n\nWe are pleased to announce the release of version 1.6 of Crawler-Commons!\n\nThe new release includes multiple dependency upgrades, several improvements and bug fixes. See the [CHANGES.txt](https://github.com/crawler-commons/crawler-commons/blob/crawler-commons-1.6/CHANGES.txt) file for the complete list of changes.\n\n**Important**:\n\n- This release adds support for IDN2008 domain names and public suffixes in EffectiveTldFinder. If you rely on a recent version of the public suffix list, please upgrade to release 1.6! See [issue report #551](https://github.com/crawler-commons/crawler-commons/issues/551) for more information.\n\n\n## 2nd July 2025 – crawler-commons 1.5 released\n\nWe are pleased to announce the release of version 1.5 of Crawler-Commons!\n\nThe new release includes multiple dependency upgrades, several improvements and bug fixes. See the [CHANGES.txt](https://github.com/crawler-commons/crawler-commons/blob/crawler-commons-1.5/CHANGES.txt) file for the complete list of changes.\n\n**Breaking changes**:\n\n- The robots.txt parser is now pedantic regarding the user-agent names passed to the [parseContent() method](https://crawler-commons.github.io/crawler-commons/1.5/crawlercommons/robots/SimpleRobotRulesParser.html#parseContent(java.lang.String,byte[],java.lang.String,java.util.Collection)). The names in the `robotNames` parameter must be lower-case and the wildcard agent name \"`*`\" must not be included. An exception is thrown if these conditions are not met. Please see the Javadoc and [issue report #453](https://github.com/crawler-commons/crawler-commons/issues/453) for more information.\n\n\n## 18th July 2023  - crawler-commons 1.4 released\n\nWe are pleased to announce the 1.4 release of Crawler-Commons.\n\nThe new release includes many improvements and bug fixes, several dependency upgrades and improvements to the automatic build system. The following are the most notable improvements and changes:\n- Java 11 is now required to run or build crawler-commons\n- the robots.txt parser (SimpleRobotRulesParser) is now compliant with [RFC 9309](https://www.rfc-editor.org/rfc/rfc9309.html) and provides a new [API entry point](https://crawler-commons.github.io/crawler-commons/1.4/crawlercommons/robots/SimpleRobotRulesParser.html#parseContent(java.lang.String,byte%5B%5D,java.lang.String,java.util.Collection)) accepting a collection of single-word user-agent product tokens which allows for faster and RFC-compliant matching of robots.txt user-agent lines. Please note that user-agent product tokens must be lower-case.\n\nSee the [CHANGES.txt](https://github.com/crawler-commons/crawler-commons/blob/crawler-commons-1.4/CHANGES.txt) file included with the release for the detailed list of changes.\n\n\n## 28th July 2022  - crawler-commons 1.3 released\n\nWe are glad to announce the 1.3 release of Crawler-Commons. See the [CHANGES.txt](https://github.com/crawler-commons/crawler-commons/blob/crawler-commons-1.3/CHANGES.txt) file included with the release for a complete list of details.\nThe new release includes multiple dependency upgrades, improvements to the automatic builds, and a tighter protections against XXE vulnerability issues in the Sitemap parser.\n\n\n## 14th October 2021  - crawler-commons 1.2 released\n\nWe are glad to announce the 1.2 release of Crawler-Commons. See the [CHANGES.txt](https://github.com/crawler-commons/crawler-commons/blob/crawler-commons-1.2/CHANGES.txt) file included with the release for a complete list of details.\nThis version fixes an XXE vulnerability issue in the Sitemap parser and includes several improvements to the URL normalizer and the Sitemaps parser.\n\n\n## 29th June 2020  - crawler-commons 1.1 released\n\nWe are glad to announce the 1.1 release of Crawler-Commons. See the [CHANGES.txt](https://github.com/crawler-commons/crawler-commons/blob/crawler-commons-1.1/CHANGES.txt) file included with the release for a full list of details.\n\n## 21st March 2019  - crawler-commons 1.0 released\n\nWe are glad to announce the 1.0 release of Crawler-Commons. See the [CHANGES.txt](https://github.com/crawler-commons/crawler-commons/blob/crawler-commons-1.0/CHANGES.txt) file included with the release for a full list of details.\nAmong other bug fixes and improvements this version adds support for parsing sitemap extensions (image, video, news, alternate links).\n\n## 7th June 2018  - crawler-commons 0.10 released\n\nWe are glad to announce the 0.10 release of Crawler-Commons. See the [CHANGES.txt](https://github.com/crawler-commons/crawler-commons/blob/crawler-commons-0.10/CHANGES.txt) file included with the release for a full list of details.\nThis version contains among other things improvements to the Sitemap parsing and the removal of the Tika dependency. \n\n## 31st October 2017  - crawler-commons 0.9 released\n\nWe are glad to announce the 0.9 release of Crawler-Commons. See the [CHANGES.txt](https://github.com/crawler-commons/crawler-commons/blob/crawler-commons-0.9/CHANGES.txt) file included with the release for a full list of details.\nThe main changes are the removal of DOM-based sitemap parser as the SAX equivalent introduced in the previous version has better performance and is also more robust. You might need to change your code to replace `SiteMapParserSAX` with `SiteMapParser`.\nThe parser is now aware of namespaces, and by default does not force the namespace to be the one recommended in the specification (`http://www.sitemaps.org/schemas/sitemap/0.9`) as variants can be found in the wild. You can set the behaviour using the method _setStrictNamespace(boolean)_.\n\nAs usual, the version 0.9 contains numerous improvements and bugfixes and all users are invited to upgrade to this version.\n\n## 9th June 2017  - crawler-commons 0.8 released\n\nWe are glad to announce the 0.8 release of Crawler-Commons. See the [CHANGES.txt](https://github.com/crawler-commons/crawler-commons/blob/crawler-commons-0.8/CHANGES.txt) file included with the release for a full list of details.\nThe main changes are the removal of the HTTP fetcher support, which has been put in a [separate project](https://github.com/crawler-commons/http-fetcher). We also added a SAX-based parser for processing sitemaps, which requires less memory \nand is more robust to malformed documents than its DOM-based counterpart. The latter has been kept for now but might be removed in the future.\n\n## 24th November 2016  - crawler-commons 0.7 released\n\nWe are glad to announce the 0.7 release of Crawler-Commons. See the [CHANGES.txt](https://github.com/crawler-commons/crawler-commons/blob/crawler-commons-0.7/CHANGES.txt) file included with the release for a full list of details.\nThe main changes are that Crawler-Commons now requires JAVA 8 and that the package crawlercommons.url has been replaced with crawlercommons.domains. If your project uses CC then you might want to run the following command on it\n\n```\nfind . -type f -print0 | xargs -0 sed -i 's/import crawlercommons\\.url\\./import crawlercommons\\.domains\\./'\n```\n\nPlease note also that this is the last release containing the HTTP fetcher support, which is deprecated and will be removed from the next version.\n\nThe version 0.7 contains numerous improvements and bugfixes and all users are invited to upgrade to this version.\n\n\n## 11th June 2015 - crawler-commons 0.6 is released\n\nWe are glad to announce the 0.6 release of Crawler Commons. See the [CHANGES.txt](https://github.com/crawler-commons/crawler-commons/blob/crawler-commons-0.6/CHANGES.txt) file included with the release for a full list of details.\n\nWe suggest all users to upgrade to this version. Details of how to do so can be found on  [Maven Central](http://search.maven.org/#artifactdetails%7Ccom.github.crawler-commons%7Ccrawler-commons%7C0.6%7Cjar). Please note that the groupId has changed to com.github.crawler-commons.\n\nThe Java documentation can be found [here](http://crawler-commons.github.io/crawler-commons/0.6/apidocs/).\n\n## 22nd April 2015 - crawler-commons has moved\n\nThe crawler-commons project is now being hosted at GitHub, due to the demise of Google code hosting.\n\n## 15th October 2014 - crawler-commons 0.5 is released\n\nWe are glad to announce the 0.5 release of Crawler Commons. This release mainly improves Sitemap parsing as well as an upgrade to [Apache Tika 1.6](http://tika.apache.org).\n\nSee the [CHANGES.txt](https://github.com/crawler-commons/crawler-commons/blob/crawler-commons-0.5/CHANGES.txt) file included with the release for a full list of details. Additionally the Java documentation can be found [here](http://crawler-commons.googlecode.com/svn/wiki/javadoc/0.5/index.html).\n\nWe suggest all users to upgrade to this version. The Crawler Commons project artifacts are released as Maven artifacts and can be found at [Maven Central](http://search.maven.org/#search%7Cgav%7C1%7Cg%3A%22com.google.code.crawler-commons%22%20AND%20a%3A%22crawler-commons%22).\n\n## 11th April 2014 - crawler-commons 0.4 is released\n\nWe are glad to announce the 0.4 release of Crawler Commons. Amongst other improvements, this release includes support for Googlebot-compatible regular expressions in URL specifications, further improvements to robots.txt parsing and an upgrade of httpclient to v4.2.6\\.\n\nSee the [CHANGES.txt](https://github.com/crawler-commons/crawler-commons/blob/master/CHANGES.txt) file included with the release for a full list of details.\n\nWe suggest all users to upgrade to this version. Details of how to do so can be found on [Maven Central](http://search.maven.org/#search%7Cgav%7C1%7Cg%3A%22com.google.code.crawler-commons%22%20AND%20a%3A%22crawler-commons%22).\n\n## 11 Oct 2013 - crawler-commons 0.3 is released\n\nThis release improves robots.txt and sitemap parsing support, updates Tika to the latest released version (1.4), and removes some left-over cruft from the pre-Maven build setup.\n\nSee the [CHANGES.txt](https://github.com/crawler-commons/crawler-commons/blob/master/CHANGES.txt) file included with the release for a full list of details.\n\n## 24 Jun 2013 - Nutch 1.7 now uses crawler-commons for robots.txt parsing\n\nSimilar to the previous note about Nutch 2.2, there's now a version of Nutch in the 1.x tree that also uses crawler-commons. See [Apache Nutch v1.7 Released](http://nutch.apache.org/#24th+June+2013+-+Apache+Nutch+v1.7+Released) for more details.\n\n## 08 Jun 2013 - Nutch 2.2 now uses crawler-commons for robots.txt parsing\n\nSee [Apache Nutch v2.2 Released](http://nutch.apache.org/#08+June+2013+-+Apache+Nutch+v2.2+Released) for more details.\n\n## 02 Feb 2013 - crawler-commons 0.2 is released\n\nThis release improves robots.txt and sitemap parsing support.\n\nSee the [CHANGES.txt](https://github.com/crawler-commons/crawler-commons/blob/master/CHANGES.txt) file included with the release for a full list of details.\n\n# License\nPublished under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0), see [LICENSE](https://github.com/crawler-commons/crawler-commons/blob/master/LICENSE)\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fcrawler-commons%2Fcrawler-commons","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fcrawler-commons%2Fcrawler-commons","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fcrawler-commons%2Fcrawler-commons/lists"}