{"id":47251766,"url":"https://github.com/us/crw","last_synced_at":"2026-05-09T10:30:08.952Z","repository":{"id":341719655,"uuid":"1171030160","full_name":"us/crw","owner":"us","description":"Fast, lightweight Firecrawl alternative in Rust. Web scraper, crawler \u0026 search API with MCP server for AI agents. Drop-in Firecrawl-compatible API (/v1/scrape, /v1/crawl, /v1/search). 2.3x faster than Tavily, 1.5x faster than Firecrawl in 1K-URL benchmarks. 6 MB RAM, single binary. Self-host or use managed cloud.","archived":false,"fork":false,"pushed_at":"2026-04-30T15:11:38.000Z","size":2719,"stargazers_count":70,"open_issues_count":1,"forks_count":5,"subscribers_count":0,"default_branch":"main","last_synced_at":"2026-04-30T17:10:42.421Z","etag":null,"topics":["ai","ai-agents","crawler","data-extraction","docker","firecrawl","firecrawl-alternative","html-to-markdown","llm","markdown","mcp","mcp-server","rust","scraping-api","self-hosted","tavily-alternative","web-crawler","web-scraper","web-scraping","web-search-api"],"latest_commit_sha":null,"homepage":"https://fastcrw.com","language":"Rust","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"agpl-3.0","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/us.png","metadata":{"files":{"readme":"README.md","changelog":"CHANGELOG.md","contributing":null,"funding":".github/FUNDING.yml","license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":"ROADMAP.md","authors":null,"dei":null,"publiccode":null,"codemeta":null,"zenodo":null,"notice":null,"maintainers":null,"copyright":null,"agents":null,"dco":null,"cla":null},"funding":{"custom":["https://fastcrw.com"]}},"created_at":"2026-03-02T19:51:43.000Z","updated_at":"2026-04-30T15:14:13.000Z","dependencies_parsed_at":null,"dependency_job_id":null,"html_url":"https://github.com/us/crw","commit_stats":null,"previous_names":["us/crw"],"tags_count":29,"template":false,"template_full_name":null,"purl":"pkg:github/us/crw","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/us%2Fcrw","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/us%2Fcrw/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/us%2Fcrw/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/us%2Fcrw/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/us","download_url":"https://codeload.github.com/us/crw/tar.gz/refs/heads/main","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/us%2Fcrw/sbom","scorecard":null,"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":286080680,"owners_count":32628215,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2026-05-04T10:08:07.713Z","status":"ssl_error","status_checked_at":"2026-05-04T10:08:02.005Z","response_time":58,"last_error":"SSL_read: unexpected eof while reading","robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":false,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["ai","ai-agents","crawler","data-extraction","docker","firecrawl","firecrawl-alternative","html-to-markdown","llm","markdown","mcp","mcp-server","rust","scraping-api","self-hosted","tavily-alternative","web-crawler","web-scraper","web-scraping","web-search-api"],"created_at":"2026-03-14T15:22:08.617Z","updated_at":"2026-05-05T00:04:26.253Z","avatar_url":"https://github.com/us.png","language":"Rust","funding_links":["https://fastcrw.com"],"categories":["MCP 服务器精选列表","Web Data Extraction"],"sub_categories":["🌐 浏览器自动化与网页交互"],"readme":"\u003ca name=\"readme-top\"\u003e\u003c/a\u003e\n\u003cp align=\"center\"\u003e\n  \u003ca href=\"https://fastcrw.com\"\u003e\n    \u003cimg src=\"docs/logo.png\" alt=\"fastCRW\" height=\"120\" /\u003e\n  \u003c/a\u003e\n  \u003cp align=\"center\"\u003eThe web scraper built for AI agents. Single binary. Zero config.\u003c/p\u003e\n  \u003cp align=\"center\"\u003e\n    \u003ca href=\"https://crates.io/crates/crw-server\"\u003e\u003cimg src=\"https://img.shields.io/crates/v/crw-server.svg\" alt=\"crates.io\"\u003e\u003c/a\u003e\n    \u003ca href=\"https://github.com/us/crw/actions\"\u003e\u003cimg src=\"https://github.com/us/crw/workflows/CI/badge.svg\" alt=\"CI\"\u003e\u003c/a\u003e\n    \u003ca href=\"LICENSE\"\u003e\u003cimg src=\"https://img.shields.io/badge/license-AGPL--3.0-blue.svg\" alt=\"License\"\u003e\u003c/a\u003e\n    \u003ca href=\"https://github.com/us/crw/stargazers\"\u003e\u003cimg src=\"https://img.shields.io/github/stars/us/crw?style=social\" alt=\"GitHub Stars\"\u003e\u003c/a\u003e\n    \u003ca href=\"https://fastcrw.com\"\u003e\u003cimg src=\"https://img.shields.io/badge/Managed%20Cloud-fastcrw.com-blueviolet\" alt=\"fastcrw.com\"\u003e\u003c/a\u003e\n  \u003c/p\u003e\n  \u003cp align=\"center\"\u003e\n    \u003ca href=\"https://twitter.com/fastcrw\"\u003e\n      \u003cimg src=\"https://img.shields.io/badge/Follow%20on%20X-000000?style=for-the-badge\u0026logo=x\u0026logoColor=white\" alt=\"Follow on X\" /\u003e\n    \u003c/a\u003e\n    \u003ca href=\"https://www.linkedin.com/company/fastcrw\"\u003e\n      \u003cimg src=\"https://img.shields.io/badge/Follow%20on%20LinkedIn-0077B5?style=for-the-badge\u0026logo=linkedin\u0026logoColor=white\" alt=\"Follow on LinkedIn\" /\u003e\n    \u003c/a\u003e\n    \u003ca href=\"https://discord.gg/kkFh2SC8\"\u003e\n      \u003cimg src=\"https://img.shields.io/badge/Join%20our%20Discord-5865F2?style=for-the-badge\u0026logo=discord\u0026logoColor=white\" alt=\"Join our Discord\" /\u003e\n    \u003c/a\u003e\n  \u003c/p\u003e\n  \u003cp align=\"center\"\u003e\n    \u003ca href=\"https://www.producthunt.com/products/fastcrw?utm_source=badge-featured\u0026utm_medium=badge\u0026utm_campaign=badge-fastcrw\" target=\"_blank\" rel=\"noopener noreferrer\"\u003e\u003cimg src=\"https://api.producthunt.com/widgets/embed-image/v1/featured.svg?post_id=1116966\u0026theme=light\u0026t=1775671073751\" alt=\"fastCRW - Search + scrape live web results for AI agents | Product Hunt\" width=\"250\" height=\"54\" /\u003e\u003c/a\u003e\n  \u003c/p\u003e\n  \u003cp align=\"center\"\u003e\n    Works with: Claude Code · Cursor · Windsurf · Cline · Copilot · Continue.dev · Codex · Gemini CLI\n  \u003c/p\u003e\n  \u003cp align=\"center\"\u003e\n    \u003ca href=\"#quick-start\"\u003eQuick Start\u003c/a\u003e \u0026bull;\n    \u003ca href=\"#connect-to-ai-agents--mcp-skill-onboarding\"\u003eAI Agents\u003c/a\u003e \u0026bull;\n    \u003ca href=\"#benchmark\"\u003eBenchmarks\u003c/a\u003e \u0026bull;\n    \u003ca href=\"https://docs.fastcrw.com/#rest-api\"\u003eAPI Reference\u003c/a\u003e \u0026bull;\n    \u003ca href=\"https://fastcrw.com\"\u003eCloud\u003c/a\u003e \u0026bull;\n    \u003ca href=\"https://discord.gg/kkFh2SC8\"\u003eDiscord\u003c/a\u003e\n  \u003c/p\u003e\n  \u003cp align=\"center\"\u003e\n    \u003cb\u003eEnglish\u003c/b\u003e | \u003ca href=\"README.zh-CN.md\"\u003e中文\u003c/a\u003e\n  \u003c/p\u003e\n\u003c/p\u003e\n\n---\n\n## What's New\n\n### [0.5.0](https://github.com/us/crw/compare/v0.4.2...v0.5.0) (2026-05-04)\n\n\n### Features\n\n* **core:** add deadline module and request/renderer config scaffolding ([5a4e69a](https://github.com/us/crw/commit/5a4e69ae605d15c0090f3d866db0f8f4fa23a715))\n* **core:** thread end-to-end Deadline through scrape pipeline ([5991986](https://github.com/us/crw/commit/5991986cdac9756500dab40b8bf05ad454dbd21c))\n* **crawl:** key per-domain rate limiter by eTLD+1 ([39c7954](https://github.com/us/crw/commit/39c7954881cfde47e34dad2eaa4141f1f10b1156))\n* **crawl:** per-host concurrency cap on the eTLD+1 limiter ([274f462](https://github.com/us/crw/commit/274f462b2755a02fc2485bddc7ac8ad3fd11c0e3))\n* **renderer:** add browserless/chromium opt-in stealth profile ([236f626](https://github.com/us/crw/commit/236f62682f29011a959bdef5a9770475a809f0a9))\n* **renderer:** chrome nav-budget cap + truncated/deadline_exceeded flags ([c57cef8](https://github.com/us/crw/commit/c57cef8c6ad2ba2fefce7f4110685bc779359378))\n* **renderer:** chrome request-paused interception pump (T27) ([13fcaa4](https://github.com/us/crw/commit/13fcaa4c5560f254d67682bff96ba24e39cdf13e))\n* **renderer:** leak-through fallback when global breaker open \u0026 host clean ([86a9e36](https://github.com/us/crw/commit/86a9e36880f2dddcb3d7b7bd6c993825559cf487))\n* **renderer:** outcome-aware breaker + extraction and stealth fixes ([86dd10f](https://github.com/us/crw/commit/86dd10fd014235cb9bd107e32c7cf6e04cb03367))\n* **renderer:** own per-eTLD+1 host limiter in FallbackRenderer ([0577516](https://github.com/us/crw/commit/0577516bd41dc284f24cbaf3ed95544504ba50be))\n* **renderer:** recover FC-wins URLs to reach 92% bench coverage ([ba12424](https://github.com/us/crw/commit/ba12424e44c34aa44bb8a41bc1f16d1dd87f498a))\n\n\n### Bug Fixes\n\n* **compose:** auto-restart and bound memory for renderer containers ([dd610cc](https://github.com/us/crw/commit/dd610ccae2579138d5438795e1d5ac441a0fafc3))\n* **core:** emit meaningful Timeout value when deadline already expired ([607bb27](https://github.com/us/crw/commit/607bb27692686f3563af52ea721d7dfb800d0405))\n* **crawl:** prioritize anti-bot detection over placeholder warning ([05aa933](https://github.com/us/crw/commit/05aa93358f3fa9826eb97db114bef06d1754dae3))\n* escalate to JS renderer on HTTP failure and empty markdown ([9fc7934](https://github.com/us/crw/commit/9fc79344702e30be0555e63a02aa5377f15cca93))\n* **mcp:** apply per-endpoint timeouts to proxy client ([741f1b2](https://github.com/us/crw/commit/741f1b245e064b267b4fb0dfb5487099bc86e2e4))\n* **renderer:** enforce Deadline in HttpFetcher via tokio::time::timeout ([b1c4058](https://github.com/us/crw/commit/b1c4058f47eed204d413a51c56d8ae43f547ff63))\n* **renderer:** keep larger thin-result HTML when stitching attempts ([8147236](https://github.com/us/crw/commit/8147236cc6b94d3c2db34f1128a687d9e110dc35))\n* **renderer:** rescue 39 bench failures via UA, retry, and thin-content escalation ([ddacb49](https://github.com/us/crw/commit/ddacb49e92688c3a20c7f7fe32da58d83c620f31))\n* **server:** classify anti-bot challenges as anti_bot, not no-markdown ([3ece4dd](https://github.com/us/crw/commit/3ece4dd5b5318f71fe3744fa9d09948afaa738de))\n\n\n### Performance\n\n* **renderer:** drop fixed 2s JS wait, rely on SPA selector poll ([cb043f7](https://github.com/us/crw/commit/cb043f7754f870df67a3a56e41a552ba7f7867f4))\n* **renderer:** tighten tier timeouts and bump LP retry threshold ([3f93d60](https://github.com/us/crw/commit/3f93d6052251eb72abee20ea4992ca3cdfc7ddb4))\n* **renderer:** widen breaker tolerance to 20 failures / 10s cooldown ([6525a84](https://github.com/us/crw/commit/6525a84c18e2c4fafa92cc0d29203310755d3ef1))\n\n\n### Miscellaneous\n\n* release 0.5.0 ([3987de1](https://github.com/us/crw/commit/3987de1b15b5d7605cc26645d14b74020c8eb7a9))\n\n### [0.4.2](https://github.com/us/crw/compare/v0.4.1...v0.4.2) (2026-04-29)\n\n\n### Features\n\n* **core:** add render decision types and prometheus metrics scaffold ([e08682b](https://github.com/us/crw/commit/e08682b761822a7100e0f40cffe4cd4f3dcf2a5c))\n* **renderer:** add per-host renderer preference cache ([21e41d1](https://github.com/us/crw/commit/21e41d1330bed255854824f55c3419a590a86411))\n* **renderer:** track HTTP routing and warn on pinned-renderer failure ([3208d27](https://github.com/us/crw/commit/3208d277aa4c480c1257648dc86a7151dcfb8976))\n* **renderer:** wire host preferences, circuit breakers, and CF detection ([0c53c64](https://github.com/us/crw/commit/0c53c645562c43a4f62aa22f1e7f603c42b3b3f3))\n\n\n### Bug Fixes\n\n* **core,renderer:** surface render metadata and harden host normalization ([ee4130b](https://github.com/us/crw/commit/ee4130b62467defb61c5b85bf267c767b3bd909a))\n* **renderer:** correct failure classification and routing decisions ([4d684bd](https://github.com/us/crw/commit/4d684bdaa0cd1a27f011d73322099258a0f713be))\n* **renderer:** probe lifecycle, RAII guard, breaker counter ([02044f5](https://github.com/us/crw/commit/02044f573cd6274231b3856cd799d7e74d61f9ba))\n\n### [0.4.1](https://github.com/us/crw/compare/v0.4.0...v0.4.1) (2026-04-28)\n\n\n### Features\n\n* add per-request renderer field for scrape and crawl APIs ([#29](https://github.com/us/crw/issues/29)) ([f1e0b63](https://github.com/us/crw/commit/f1e0b63fd28be0ceb38342086a309f92bbbc1e53))\n* **crw-browse:** add interactive browser MCP server with phase-2 tools ([e78879d](https://github.com/us/crw/commit/e78879db18c7c4b3df2a4984349a65b4493b1cda))\n* honor renderer mode and force_js in config (fixes [#28](https://github.com/us/crw/issues/28)) ([b76e473](https://github.com/us/crw/commit/b76e473facbce08a841ef8bd9fdfac97a552a8fd))\n\n\n### Bug Fixes\n\n* detect failed JS renders and fail over to next renderer ([fca8fd5](https://github.com/us/crw/commit/fca8fd5cadb4fa3c96bf5a315f96ab6d1e63989c))\n* **docs:** use absolute logo paths in site.config.js ([c5c9321](https://github.com/us/crw/commit/c5c93215561094a35038ab6af2b21e91c16199f4))\n* **docs:** use absolute paths for logo and favicon assets ([cdb1451](https://github.com/us/crw/commit/cdb14517da6425c105b95749dec35bbc9e977f5e))\n\n[Full changelog →](CHANGELOG.md)\n\n---\n\n# fastCRW — Open Source Web Scraping API for AI Agents\n\n**Power AI agents with clean web data.** Single Rust binary, zero config, Firecrawl-compatible API. The open-source Firecrawl alternative you can self-host for free — or use our [managed cloud](https://fastcrw.com).\n\n\u003e **Don't want to self-host?** [**Sign up free →**](https://fastcrw.com) — managed cloud with global proxy network, web search, and dashboard. Same API, zero infra. **500 free credits, no credit card required.**\n\n---\n\n## Why CRW? — Firecrawl \u0026 Crawl4AI Alternative\n\n- **Single binary, 6 MB RAM** — no Redis, no Node.js, no containers. Firecrawl needs 5 containers and 4 GB+. Crawl4AI requires Python + Playwright\n- **5.5x faster than Firecrawl** — 833ms avg vs 4,600ms ([see benchmarks](#benchmark)). P50 at 446ms\n- **73/100 search win rate** — beats Firecrawl (25/100) and Tavily (2/100) in head-to-head benchmarks\n- **Free self-hosting** — $0/1K scrapes vs Firecrawl's $0.83–5.33. No infra, no cold starts (85ms). No API key required for local mode\n- **Agent ready** — add to any MCP client in one command. Embedded mode: no server needed\n- **Firecrawl-compatible API** — drop-in replacement. Same `/v1/scrape`, `/v1/crawl`, `/v1/map` endpoints. HTML to markdown, structured data extraction, website crawler — all built-in\n- **Built for RAG pipelines** — clean LLM-ready markdown output for vector databases and AI data ingestion\n- **Open source** — AGPL-3.0, developed transparently. [Join our community](https://discord.gg/kkFh2SC8)\n\n| Metric | CRW (self-hosted) | fastcrw.com (cloud) | Firecrawl | Tavily | Crawl4AI |\n|---|---|---|---|---|---|\n| **Coverage (1K URLs)** | **92.0%** | **92.0%** | 77.2% | — | — |\n| **Avg Scrape Latency** | **833ms** | **833ms** | 4,600ms | — | — |\n| **Avg Search Latency** | **880ms** | **880ms** | 954ms | 2,000ms | — |\n| **Search Win Rate** | **73/100** | **73/100** | 25/100 | 2/100 | — |\n| **Idle RAM** | 6.6 MB | 0 (managed) | ~500 MB+ | — (cloud) | — |\n| **Cold start** | 85 ms | 0 (always-on) | 30–60 s | — | — |\n| **Self-hosting** | **Single binary** | — | Multi-container | No | Python + Playwright |\n| **Cost / 1K scrapes** | **$0** (self-hosted) | From $13/mo | $0.83–5.33 | — | $0 |\n| **License** | AGPL-3.0 | Managed | AGPL-3.0 | Proprietary | Apache-2.0 |\n\n---\n\n## Web Scraping \u0026 Crawling Features\n\n**Core**\n\n| Feature | Description |\n|---------|-------------|\n| [**Scrape**](#scrape) | Convert any URL to markdown, HTML, JSON, or links |\n| [**Crawl**](#crawl) | Async BFS website crawler with rate limiting |\n| [**Map**](#map) | Discover all URLs on a site instantly |\n| [**Search**](#search) | Web search + content scraping (cloud) |\n\n**More**\n\n| Feature | Description |\n|---------|-------------|\n| [**LLM Extraction**](#llm-structured-extraction) | Send a JSON schema, get validated structured data back |\n| [**JS Rendering**](#js-rendering) | Auto-detect SPAs, render via LightPanda or Chrome |\n| [**CLI**](#cli) | Scrape any URL from your terminal — no server needed |\n| [**MCP Server**](#mcp-server-for-ai-agents) | Built-in stdio + HTTP transport for any AI agent |\n\n**Use Cases:** RAG pipelines · AI agent web access · content monitoring · data extraction · HTML to markdown conversion · web archiving\n\n---\n\n## Quick Start\n\n```bash\n# Install:\ncurl -fsSL https://raw.githubusercontent.com/us/crw/main/install.sh | CRW_BINARY=crw sh\n\n# Scrape:\ncrw example.com\n\n# Add to Claude Code (local):\nclaude mcp add crw -- npx crw-mcp\n# Add to Claude Code (cloud — includes web search, 500 free credits at fastcrw.com):\nclaude mcp add -e CRW_API_URL=https://fastcrw.com/api -e CRW_API_KEY=your-key crw -- npx crw-mcp\n```\n\n\u003e Or: `pip install crw` (Python SDK) · `npx crw-mcp` (zero install) · `brew install us/crw/crw` (Homebrew) · [All install options →](https://docs.fastcrw.com/installation/)\n\n### Scrape\n\nConvert any URL to clean markdown, HTML, or structured JSON.\n\n```python\nfrom crw import CrwClient\n\nclient = CrwClient(api_url=\"https://fastcrw.com/api\", api_key=\"YOUR_API_KEY\")  # local: CrwClient()\nresult = client.scrape(\"https://example.com\")\nprint(result[\"markdown\"])\n```\n\n\u003e **Local mode:** `CrwClient()` with no arguments runs a self-contained scraping engine — no server, no API key, no setup. The SDK automatically downloads the `crw-mcp` binary on first use.\n\n\u003cdetails\u003e\n\u003csummary\u003e\u003cb\u003eCLI / cURL\u003c/b\u003e\u003c/summary\u003e\n\n**CLI:**\n```bash\ncrw example.com\ncrw example.com --format html\ncrw example.com --js --css 'article'\n```\n\n**Self-hosted** (`crw-server` running on `:3000`):\n```bash\ncurl -X POST http://localhost:3000/v1/scrape \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\"url\": \"https://example.com\"}'\n```\n\n**Cloud:**\n```bash\ncurl -X POST https://fastcrw.com/api/v1/scrape \\\n  -H \"Authorization: Bearer YOUR_API_KEY\" \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\"url\": \"https://example.com\"}'\n```\n\u003c/details\u003e\n\nOutput:\n```\n# Example Domain\n\nThis domain is for use in illustrative examples in documents.\nYou may use this domain in literature without prior coordination.\n```\n\n#### Renderer selection \u0026 response metadata\n\nCRW picks between three rendering backends per request:\n\n- **`http`** (1 credit) — plain HTTP fetch. Used for static pages.\n- **`lightpanda`** (1 credit) — lightweight JS renderer for most SPAs.\n- **`chrome`** (2 credits) — full Chromium for sites where LightPanda's hydration crashes (e.g. some Next.js App Router pages).\n\nBy default the engine auto-selects, learns per-host preferences after repeated failures, and falls over chrome → lightpanda → http transparently. Pass `\"renderer\"` to pin one of `auto | http | lightpanda | chrome` (Firecrawl's `engine` is also accepted as an alias).\n\nEvery successful response includes routing metadata so callers can audit and debug:\n\n```jsonc\n{\n  \"data\": {\n    \"markdown\": \"...\",\n    \"renderDecision\": {\n      \"kind\": \"failover\",                 // autoDefault | autoPromoted | userPinned | failover | breakerSkipped\n      \"chain\": [\"lightpanda\", \"chrome\"],  // renderers actually attempted\n      \"reason\": \"nextJsClientError\"       // why the chain advanced\n    },\n    \"creditCost\": 2,\n    \"warnings\": [\n      \"lightpanda returned a failed render (nextjs_client_error)\"\n    ],\n    \"metadata\": { \"renderedWith\": \"chrome\", /* … */ }\n  }\n}\n```\n\nWhen you hard-pin a renderer that fails (e.g. `\"renderer\":\"lightpanda\"` on a hydration-crashing page), `success` stays `true` for protocol compatibility — but `data.warnings[]` carries an actionable hint suggesting `renderer=\"chrome\"` or auto mode. Clients should surface the warnings array.\n\n### Crawl\n\nScrape all pages of a website asynchronously.\n\n```python\nfrom crw import CrwClient\n\nclient = CrwClient(api_url=\"https://fastcrw.com/api\", api_key=\"YOUR_API_KEY\")  # local: CrwClient()\npages = client.crawl(\"https://docs.example.com\", max_depth=2, max_pages=50)\nfor page in pages:\n    print(page[\"metadata\"][\"sourceURL\"], page[\"markdown\"][:80])\n```\n\n\u003cdetails\u003e\n\u003csummary\u003e\u003cb\u003eCLI / cURL\u003c/b\u003e\u003c/summary\u003e\n\n```bash\n# Start crawl\ncurl -X POST http://localhost:3000/v1/crawl \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\"url\": \"https://docs.example.com\", \"maxDepth\": 2, \"maxPages\": 50}'\n\n# Check status (use job ID from above)\ncurl http://localhost:3000/v1/crawl/JOB_ID\n```\n\u003c/details\u003e\n\n### Map\n\nDiscover all URLs on a site instantly.\n\n```python\nfrom crw import CrwClient\n\nclient = CrwClient(api_url=\"https://fastcrw.com/api\", api_key=\"YOUR_API_KEY\")  # local: CrwClient()\nurls = client.map(\"https://example.com\")\nprint(urls)  # [\"https://example.com\", \"https://example.com/about\", ...]\n```\n\n\u003cdetails\u003e\n\u003csummary\u003e\u003cb\u003ecURL\u003c/b\u003e\u003c/summary\u003e\n\n```bash\ncurl -X POST http://localhost:3000/v1/map \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\"url\": \"https://example.com\"}'\n```\n\u003c/details\u003e\n\n### Search\n\nSearch the web and get full page content from results.\n\n```python\nfrom crw import CrwClient\n\n# Cloud only — requires fastcrw.com API key\nclient = CrwClient(api_url=\"https://fastcrw.com/api\", api_key=\"YOUR_KEY\")\nresults = client.search(\"open source web scraper 2026\", limit=10)\n```\n\n\u003e **Cloud only:** `search()` requires a [fastcrw.com](https://fastcrw.com) API key (**500 free credits, no credit card**). Local/embedded mode provides `scrape`, `crawl`, and `map`.\n\n\u003cdetails\u003e\n\u003csummary\u003e\u003cb\u003ecURL\u003c/b\u003e\u003c/summary\u003e\n\n```bash\ncurl -X POST https://fastcrw.com/api/v1/search \\\n  -H \"Authorization: Bearer YOUR_API_KEY\" \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\"query\": \"open source web scraper 2026\", \"limit\": 10}'\n```\n\u003c/details\u003e\n\n### API Endpoints\n\n| Method | Endpoint | Description |\n|--------|----------|-------------|\n| `POST` | `/v1/scrape` | Scrape a single URL, optionally with LLM extraction |\n| `POST` | `/v1/crawl` | Start async BFS crawl (returns job ID) |\n| `GET` | `/v1/crawl/:id` | Check crawl status and retrieve results |\n| `DELETE` | `/v1/crawl/:id` | Cancel a running crawl job |\n| `POST` | `/v1/map` | Discover all URLs on a site |\n| `POST` | `/v1/search` | Web search with optional content scraping (cloud only) |\n| `GET` | `/health` | Health check (no auth required) |\n| `POST` | `/mcp` | Streamable HTTP MCP transport |\n\n[Full API reference →](https://docs.fastcrw.com/#rest-api)\n\n---\n\n## Connect to AI Agents — MCP, Skill, Onboarding\n\nAdd CRW to any AI agent or MCP client in seconds.\n\n### Skill\n\nInstall the CRW skill to all detected agents with one command:\n\n```bash\nnpx crw-mcp init --all\n```\n\nRestart your agent after installing. Works with Claude Code, Cursor, Gemini CLI, Codex, OpenCode, and Windsurf.\n\n### MCP Server for AI Agents\n\nAdd CRW to any MCP-compatible client:\n\n```json\n{\n  \"mcpServers\": {\n    \"crw\": {\n      \"command\": \"npx\",\n      \"args\": [\"crw-mcp\"]\n    }\n  }\n}\n```\n\n\u003e Works with Claude Desktop, Cursor, Windsurf, Cline, Continue.dev, and any MCP client.\n\u003e\n\u003e **Config file locations:** Claude Code — `claude mcp add` (no file edit). Claude Desktop — `~/Library/Application Support/Claude/claude_desktop_config.json`. Cursor — `.cursor/mcp.json`. Windsurf — `~/.codeium/windsurf/mcp_config.json`. [All clients →](https://docs.fastcrw.com/mcp-clients/)\n\n**Cloud mode** — adds `crw_search` for web search:\n\n```json\n{\n  \"mcpServers\": {\n    \"crw\": {\n      \"command\": \"npx\",\n      \"args\": [\"crw-mcp\"],\n      \"env\": {\n        \"CRW_API_URL\": \"https://fastcrw.com/api\",\n        \"CRW_API_KEY\": \"your-api-key\"\n      }\n    }\n  }\n}\n```\n\n### MCP Tools\n\n| Mode | Tool | Description |\n|------|------|-------------|\n| **Embedded + Cloud** | `crw_scrape` | Scrape a URL → markdown, HTML, JSON, links |\n| **Embedded + Cloud** | `crw_crawl` | Start async BFS crawl (returns job ID) |\n| **Embedded + Cloud** | `crw_check_crawl_status` | Poll crawl job status (use with `crw_crawl`) |\n| **Embedded + Cloud** | `crw_map` | Discover all URLs on a site |\n| **Cloud only** | `crw_search` | Web search + optional content scraping |\n\n\u003e **Embedded mode** (default): no server, no API key, no setup — the MCP binary runs a self-contained scraping engine. **Cloud mode** (`CRW_API_URL` set): forwards calls to [fastcrw.com](https://fastcrw.com), adds `crw_search`.\n\n[Full MCP docs →](https://docs.fastcrw.com/mcp/)\n\n**Claude Code shortcut:**\n\n```bash\n# Local (embedded — no server, no API key):\nclaude mcp add crw -- npx crw-mcp\n\n# Cloud (+ web search):\nclaude mcp add -e CRW_API_URL=https://fastcrw.com/api -e CRW_API_KEY=your-key crw -- npx crw-mcp\n```\n\n\u003e Listed on the [MCP Registry](https://registry.modelcontextprotocol.io/?q=crw)\n\n### Agent Onboarding\n\nAre you an AI agent? Fetch this skill file to configure CRW for the user's coding agents. The SKILL.md contains tool descriptions, usage examples, authentication setup, and edge case handling:\n\n```bash\ncurl -s https://fastcrw.com/agent-onboarding/SKILL.md\n```\n\n---\n\n## Benchmark\n\n### Search — CRW vs Firecrawl vs Tavily (100 queries, concurrent)\n\n| Metric | CRW | Firecrawl | Tavily |\n|---|---|---|---|\n| **Avg Latency** | **880ms** | 954ms | 2,000ms |\n| **Median Latency** | **785ms** | 932ms | 1,724ms |\n| **Win Rate** | **73/100** | 25/100 | 2/100 |\n\nCRW is **2.3x faster than Tavily** and won 73% of latency races. [Full search benchmark →](https://fastcrw.com/benchmarks/tavily-search)\n\n### Scrape — CRW vs Firecrawl (1,000 URLs, JS rendering enabled)\n\nTested on [Firecrawl's scrape-content-dataset-v1](https://huggingface.co/datasets/firecrawl/scrape-content-dataset-v1):\n\n| Metric | CRW | Firecrawl v2.5 |\n|---|---|---|\n| **Coverage** | **92.0%** | 77.2% |\n| **Avg Latency** | **833ms** | 4,600ms |\n| **P50 Latency** | **446ms** | — |\n| **Noise Rejection** | **88.4%** | noise 6.8% |\n| **Idle RAM** | **6.6 MB** | ~500 MB+ |\n| **Cost / 1K scrapes** | **$0** (self-hosted) | $0.83–5.33 |\n\n\u003cdetails\u003e\n\u003csummary\u003e\u003cb\u003eResource comparison\u003c/b\u003e\u003c/summary\u003e\n\n| Metric | CRW | Firecrawl |\n|---|---|---|\n| Min RAM | ~7 MB | 4 GB |\n| Recommended RAM | ~64 MB (under load) | 8–16 GB |\n| Docker images | single ~8 MB binary | ~2–3 GB total |\n| Cold start | 85 ms | 30–60 seconds |\n| Containers needed | 1 (+optional sidecar) | 5 |\n\n\u003c/details\u003e\n\n[Full benchmark details →](https://docs.fastcrw.com/introduction/#benchmarks)\n\nRun the benchmark yourself:\n\n```bash\npip install datasets aiohttp\npython bench/run_bench.py\n```\n\n---\n\n## Install\n\n### MCP Server (`crw-mcp`) — recommended for AI agents\n\n```bash\nnpx crw-mcp                           # zero install (npm)\npip install crw                        # Python SDK (auto-downloads binary)\nbrew install us/crw/crw-mcp            # Homebrew\ncargo install crw-mcp                  # Cargo\ndocker run -i ghcr.io/us/crw crw-mcp  # Docker\n```\n\n### CLI (`crw`) — scrape URLs from your terminal\n\n```bash\nbrew install us/crw/crw\n\n# One-line install (auto-detects OS \u0026 arch):\ncurl -fsSL https://raw.githubusercontent.com/us/crw/main/install.sh | CRW_BINARY=crw sh\n\n# APT (Debian/Ubuntu):\ncurl -fsSL https://apt.fastcrw.com/gpg.key | sudo gpg --dearmor -o /usr/share/keyrings/crw.gpg\necho \"deb [signed-by=/usr/share/keyrings/crw.gpg] https://apt.fastcrw.com stable main\" | sudo tee /etc/apt/sources.list.d/crw.list\nsudo apt update \u0026\u0026 sudo apt install crw\n\ncargo install crw-cli\n```\n\n### API Server (`crw-server`) — Firecrawl-compatible REST API\n\nFor serving multiple apps, other languages (Node.js, Go, Java), or as a shared microservice.\n\n```bash\nbrew install us/crw/crw-server\n\n# One-line install:\ncurl -fsSL https://raw.githubusercontent.com/us/crw/main/install.sh | CRW_BINARY=crw-server sh\n\n# Docker:\ndocker run -p 3000:3000 ghcr.io/us/crw\n```\n\nCustom port:\n```bash\nCRW_SERVER__PORT=8080 crw-server                                       # env var\ndocker run -p 8080:8080 -e CRW_SERVER__PORT=8080 ghcr.io/us/crw       # Docker\n```\n\n**Docker Compose** ships with `lightpanda` enabled by default; `chrome` is opt-in to keep small VPS deploys lean (~500MB image + 1GB resident):\n\n```bash\n# baseline — http + lightpanda\ndocker compose up -d\n\n# add chrome failover (recommended for production)\ndocker compose --profile heavy up -d\n\n# stealth tier — browserless/chromium with anti-fingerprint plugin\n# (+2.5pt bench success on bot-defended sites; SSPL-3.0, see warning below)\necho \"BROWSERLESS_TOKEN=$(openssl rand -hex 24)\" \u003e\u003e .env\ndocker compose -f docker-compose.yml -f docker-compose.stealth.yml \\\n  --profile stealth up -d\n```\n\nWithout `--profile heavy` or `--profile stealth`, the engine still serves all endpoints — chrome-required URLs will exhaust their lightpanda failover and surface `data.warnings[]` instead of routing to chrome.\n\n\u003e ⚠️ **Stealth profile licensing — compliance risk to review.**\n\u003e `--profile stealth` pulls `ghcr.io/browserless/chromium`, which is\n\u003e **SSPL-3.0**. SSPL §13 obliges anyone who makes the functionality of the\n\u003e Program available to third parties as a service (commercial *or*\n\u003e otherwise) to release the *Service Source Code* — the full\n\u003e management/automation/hosting stack around it. CRW (AGPL-3) connects\n\u003e over a network socket only, so the opencore CRW source is most likely\n\u003e outside §13's reach — but the boundary is fact-specific and we are not\n\u003e lawyers. Get legal review before exposing this stack to third parties.\n\u003e The default `--profile heavy` (chromedp/headless-shell, Apache-2/BSD)\n\u003e carries none of this risk.\n\n\u003e **When do you need `crw-server`?** Only if you want a REST API endpoint. The Python SDK (`CrwClient()`) and MCP binary (`crw-mcp`) both run a self-contained engine — no server required.\n\n---\n\n## SDKs\n\n### Python\n\n```bash\npip install crw\n```\n\n```python\nfrom crw import CrwClient\n\n# Cloud (fastcrw.com — includes web search):\nclient = CrwClient(api_url=\"https://fastcrw.com/api\", api_key=\"YOUR_API_KEY\")\n# Local (embedded, no server needed):\n# client = CrwClient()\n\n# Scrape\nresult = client.scrape(\"https://example.com\", formats=[\"markdown\", \"links\"])\nprint(result[\"markdown\"])\n\n# Crawl (blocks until complete)\npages = client.crawl(\"https://docs.example.com\", max_depth=2, max_pages=50)\n\n# Map\nurls = client.map(\"https://example.com\")\n\n# Search (cloud only)\nresults = client.search(\"AI news\", limit=10, sources=[\"web\", \"news\"])\n```\n\n\u003e **Requires:** Python 3.9+. Local mode auto-downloads the `crw-mcp` binary on first use — no manual setup.\n\n### Community SDKs\n\n- [`crewai-crw`](https://pypi.org/project/crewai-crw/) — CRW scraping tools for CrewAI agents\n- [`langchain-crw`](https://pypi.org/project/langchain-crw/) — CRW document loader for LangChain\n\n\u003e **Node.js:** No official SDK yet — use the REST API directly or `npx crw-mcp` for MCP. [SDK examples →](https://docs.fastcrw.com/sdk-examples/)\n\n---\n\n## Integrations\n\n**Frameworks:** [CrewAI](https://pypi.org/project/crewai-crw/) · [LangChain](https://pypi.org/project/langchain-crw/) · [Agno](https://github.com/agno-agi/agno/pull/7183) · [Dify](https://github.com/langgenius/dify)\n\n**Platforms:** [n8n](https://fastcrw.com/blog/n8n-web-scraping-crw) · [Flowise](https://github.com/FlowiseAI/Flowise/pull/6066)\n\nMissing your favorite tool? [Open an issue →](https://github.com/us/crw/issues) · [All integrations →](https://docs.fastcrw.com/integrations/)\n\n---\n\n## LLM Structured Extraction\n\nSend a JSON schema, get validated structured data back using LLM function calling. [Full extraction docs →](https://docs.fastcrw.com/extract/)\n\n```bash\ncurl -X POST http://localhost:3000/v1/scrape \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\n    \"url\": \"https://example.com/product\",\n    \"formats\": [\"json\"],\n    \"jsonSchema\": {\n      \"type\": \"object\",\n      \"properties\": {\n        \"name\": { \"type\": \"string\" },\n        \"price\": { \"type\": \"number\" }\n      },\n      \"required\": [\"name\", \"price\"]\n    }\n  }'\n```\n\nConfigure the LLM provider:\n\n```toml\n[extraction.llm]\nprovider = \"anthropic\"        # \"anthropic\" or \"openai\"\napi_key = \"sk-...\"            # or CRW_EXTRACTION__LLM__API_KEY env var\nmodel = \"claude-sonnet-4-20250514\"\n```\n\n---\n\n## JS Rendering\n\nCRW auto-detects SPAs and renders them via a headless browser. [Full JS rendering docs →](https://docs.fastcrw.com/js-rendering/)\n\n```bash\ncrw-server setup   # downloads LightPanda, creates config.local.toml\n```\n\n| Renderer | Protocol | Best for |\n|----------|----------|----------|\n| LightPanda | CDP over WebSocket | Low-resource environments (default); simple sites |\n| Chrome (chromedp/headless-shell) | CDP over WebSocket | Modern React/Vite/Next SPAs; recommended for production |\n| Chrome (browserless/chromium, opt-in `stealth` profile) | CDP over WebSocket | Bot-defended sites (Cloudflare Turnstile, DataDome) — SSPL-3.0, see compose notes |\n| Playwright | CDP over WebSocket | Full browser compatibility |\n\n\u003e **Renderer choice matters for SPAs.** LightPanda is fast and cheap but its\n\u003e JS runtime does not fully cover every modern bundle format. For React /\n\u003e Vite / Next sites whose content appears only after hydration, configure\n\u003e Chrome (or Playwright) alongside LightPanda — CRW will fall back to Chrome\n\u003e automatically when LightPanda returns a loading placeholder. Leaving\n\u003e LightPanda as the *only* renderer may silently return\n\u003e `\"Loading...\"`-style shell content for these sites.\n\nWith Docker Compose, LightPanda runs as a sidecar automatically:\n\n```bash\ndocker compose up\n```\n\n---\n\n## CLI\n\nScrape any URL from your terminal — no server, no config. [Full CLI docs →](https://docs.fastcrw.com/quick-start/)\n\n```bash\ncrw example.com                        # markdown to stdout\ncrw example.com --format html          # HTML output\ncrw example.com --format links         # extract all links\ncrw example.com --js                   # with JS rendering\ncrw example.com --css 'article'        # CSS selector\ncrw example.com --stealth              # stealth mode (rotate UAs)\ncrw example.com -o page.md             # write to file\n```\n\n---\n\n## Self-Hosting\n\nOnce [installed](#api-server-crw-server--firecrawl-compatible-rest-api), start the server and optionally enable JS rendering:\n\n```bash\ncrw-server                    # start REST API on :3000\ncrw-server setup              # optional: downloads LightPanda for JS rendering\ndocker compose up             # alternative: Docker with LightPanda sidecar\n```\n\nSee the [self-hosting guide](https://docs.fastcrw.com/#self-hosting) for production hardening, auth, reverse proxy, and resource tuning.\n\n---\n\n## Open Source vs Cloud\n\n| | Self-hosted (free) | [fastcrw.com](https://fastcrw.com) Cloud |\n|---|---|---|\n| Core scraping | ✅ | ✅ |\n| JS rendering | ✅ (LightPanda/Chrome) | ✅ |\n| Web search | ❌ | ✅ |\n| Global proxy network | ❌ | ✅ |\n| Dashboard | ❌ | ✅ |\n| Commercial use without open-sourcing | Requires AGPL compliance | ✅ Included |\n| Cost | $0 | From $13/mo |\n\n\u003e [**Sign up free →**](https://fastcrw.com) — **500 free credits**, no credit card required.\n\n---\n\n## Architecture\n\n```\n┌─────────────────────────────────────────────┐\n│                 crw-server                  │\n│         Axum HTTP API + Auth + MCP          │\n├──────────┬──────────┬───────────────────────┤\n│ crw-crawl│crw-extract│    crw-renderer      │\n│ BFS crawl│ HTML→MD   │  HTTP + CDP(WS)      │\n│ robots   │ LLM/JSON  │  LightPanda/Chrome   │\n│ sitemap  │ clean/read│  auto-detect SPA     │\n├──────────┴──────────┴───────────────────────┤\n│                 crw-core                    │\n│        Types, Config, Errors                │\n└─────────────────────────────────────────────┘\n```\n\n| Crate | Description | |\n|-------|-------------|-|\n| [`crw-core`](crates/crw-core) | Core types, config, and error handling | [![crates.io](https://img.shields.io/crates/v/crw-core.svg)](https://crates.io/crates/crw-core) |\n| [`crw-renderer`](crates/crw-renderer) | HTTP + CDP browser rendering engine | [![crates.io](https://img.shields.io/crates/v/crw-renderer.svg)](https://crates.io/crates/crw-renderer) |\n| [`crw-extract`](crates/crw-extract) | HTML → markdown/plaintext extraction | [![crates.io](https://img.shields.io/crates/v/crw-extract.svg)](https://crates.io/crates/crw-extract) |\n| [`crw-crawl`](crates/crw-crawl) | Async BFS crawler with robots.txt \u0026 sitemap | [![crates.io](https://img.shields.io/crates/v/crw-crawl.svg)](https://crates.io/crates/crw-crawl) |\n| [`crw-server`](crates/crw-server) | Axum API server (Firecrawl-compatible) | [![crates.io](https://img.shields.io/crates/v/crw-server.svg)](https://crates.io/crates/crw-server) |\n| [`crw-mcp`](crates/crw-mcp) | MCP stdio server (embedded + proxy mode) | [![crates.io](https://img.shields.io/crates/v/crw-mcp.svg)](https://crates.io/crates/crw-mcp) |\n| [`crw-cli`](crates/crw-cli) | Standalone CLI (`crw` binary, no server) | [![crates.io](https://img.shields.io/crates/v/crw-cli.svg)](https://crates.io/crates/crw-cli) |\n\n[Full architecture docs →](https://docs.fastcrw.com/architecture/)\n\n---\n\n## Configuration\n\nLayered TOML config with environment variable overrides:\n\n1. `config.default.toml` — built-in defaults\n2. `config.local.toml` — local overrides (or `CRW_CONFIG=myconfig`)\n3. Environment variables — `CRW_` prefix, `__` separator (e.g. `CRW_SERVER__PORT=8080`)\n\n```toml\n[server]\nhost = \"0.0.0.0\"\nport = 3000\nrate_limit_rps = 10\n\n[renderer]\nmode = \"auto\"  # auto | lightpanda | playwright | chrome | none\n\n[crawler]\nmax_concurrency = 10\nrequests_per_second = 10.0\nrespect_robots_txt = true\n\n[auth]\n# api_keys = [\"fc-key-1234\"]\n```\n\nSee [full configuration reference](https://docs.fastcrw.com/#configuration).\n\n---\n\n## Security\n\n- **SSRF protection** — blocks loopback, private IPs, cloud metadata (`169.254.x.x`), IPv6 mapped addresses, and non-HTTP schemes (`file://`, `data:`)\n- **Auth** — optional Bearer token with constant-time comparison\n- **robots.txt** — RFC 9309 compliant with wildcard patterns\n- **Rate limiting** — token-bucket algorithm, returns 429 with `error_code`\n- **Resource limits** — max body 1 MB, max crawl depth 10, max pages 1000\n\n[Full security docs →](https://docs.fastcrw.com/self-hosting-hardening/)\n\n---\n\n## Resources\n\n- [Documentation](https://docs.fastcrw.com)\n- [API Reference](https://docs.fastcrw.com/#rest-api)\n- [MCP Setup Guide](https://docs.fastcrw.com/#mcp)\n- [Playground](https://docs.fastcrw.com/playground/)\n- [Changelog](CHANGELOG.md)\n\n---\n\n## Contributing\n\nContributions are welcome! Please open an issue or submit a pull request.\n\n1. Fork the repository\n2. Install pre-commit hooks: `make hooks`\n3. Create your feature branch (`git checkout -b feat/my-feature`)\n4. Commit your changes (`git commit -m 'feat: add my feature'`)\n5. Push to the branch (`git push origin feat/my-feature`)\n6. Open a Pull Request\n\nThe pre-commit hook runs the same checks as CI (`cargo fmt`, `cargo clippy`, `cargo test`). Run manually with `make check`.\n\n### Contributors\n\n\u003ca href=\"https://github.com/us/crw/graphs/contributors\"\u003e\n  \u003cimg alt=\"contributors\" src=\"https://contrib.rocks/image?repo=us/crw\"/\u003e\n\u003c/a\u003e\n\n---\n\n## License\n\nCRW is open-source under [AGPL-3.0](LICENSE). For a managed version without AGPL obligations, see [fastcrw.com](https://fastcrw.com).\n\n---\n\n## Get Started\n\n- **Self-host free:** `curl -fsSL https://raw.githubusercontent.com/us/crw/main/install.sh | sh` — works in 30 seconds\n- **Cloud:** [**Sign up free →**](https://fastcrw.com) — **500 free credits**, no credit card required\n- **Questions?** [Join our Discord](https://discord.gg/kkFh2SC8)\n\n---\n\n**It is the sole responsibility of end users to respect websites' policies when scraping.** Users are advised to adhere to applicable privacy policies and terms of use. By default, CRW respects `robots.txt` directives.\n\n\u003cp align=\"right\"\u003e\n  \u003ca href=\"#readme-top\"\u003e↑ Back to Top ↑\u003c/a\u003e\n\u003c/p\u003e\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fus%2Fcrw","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fus%2Fcrw","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fus%2Fcrw/lists"}