{"id":25939088,"url":"https://github.com/cocoindex-io/cocoindex","last_synced_at":"2026-06-17T17:01:20.337Z","repository":{"id":280532531,"uuid":"942312530","full_name":"cocoindex-io/cocoindex","owner":"cocoindex-io","description":"Incremental engine for long horizon agents 🌟 Star if you like it!","archived":false,"fork":false,"pushed_at":"2026-06-09T06:37:51.000Z","size":113080,"stargazers_count":10227,"open_issues_count":61,"forks_count":801,"subscribers_count":50,"default_branch":"main","last_synced_at":"2026-06-09T08:26:12.800Z","etag":null,"topics":["agentic-data-framework","ai","ai-agents","change-data-capture","codebase-intelligence","context-engineering","data-engineering","data-indexing","data-processing","etl","help-wanted","indexing","knowledge-graph","llm","long-horizon-agent","python","rag","real-time","rust","semantic-search"],"latest_commit_sha":null,"homepage":"https://cocoindex.io","language":"Rust","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"apache-2.0","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/cocoindex-io.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":"CONTRIBUTING.md","funding":null,"license":"LICENSE","code_of_conduct":"CODE_OF_CONDUCT.md","threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":".github/SECURITY.md","support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null,"zenodo":null,"notice":null,"maintainers":null,"copyright":null,"agents":"AGENTS.md","dco":null,"cla":null}},"created_at":"2025-03-03T23:03:09.000Z","updated_at":"2026-06-09T08:22:30.000Z","dependencies_parsed_at":"2026-03-17T08:03:30.300Z","dependency_job_id":null,"html_url":"https://github.com/cocoindex-io/cocoindex","commit_stats":null,"previous_names":["cocoindex-io/cocoindex"],"tags_count":205,"template":false,"template_full_name":null,"purl":"pkg:github/cocoindex-io/cocoindex","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/cocoindex-io%2Fcocoindex","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/cocoindex-io%2Fcocoindex/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/cocoindex-io%2Fcocoindex/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/cocoindex-io%2Fcocoindex/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/cocoindex-io","download_url":"https://codeload.github.com/cocoindex-io/cocoindex/tar.gz/refs/heads/main","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/cocoindex-io%2Fcocoindex/sbom","scorecard":null,"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":286080680,"owners_count":34457743,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2026-05-26T15:22:16.424Z","status":"online","status_checked_at":"2026-06-17T02:00:05.408Z","response_time":127,"last_error":null,"robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":true,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["agentic-data-framework","ai","ai-agents","change-data-capture","codebase-intelligence","context-engineering","data-engineering","data-indexing","data-processing","etl","help-wanted","indexing","knowledge-graph","llm","long-horizon-agent","python","rag","real-time","rust","semantic-search"],"created_at":"2025-03-04T04:15:41.620Z","updated_at":"2026-06-17T17:01:20.323Z","avatar_url":"https://github.com/cocoindex-io.png","language":"Rust","funding_links":[],"categories":["Rust","Libraries","Graph ETL","Recently Updated","A01_文本生成_文本对话","Repos","Table of Contents","🧰 Frameworks that Facilitate RAG","Stream Processing","Corporate and Analytical Applications","Vector Databases \u0026 Retrieval Platforms","\u003ca name=\"Rust\"\u003e\u003c/a\u003eRust","Data Pipelines \u0026 Streaming","Data / ingestion / MLOps substrate","Frameworks","Awesome Vector Search Engine","开源工具","Resources","🤖 AI \u0026 Machine Learning"],"sub_categories":["Data processing","Triple Stores (RDF Databases)","[Mar 15, 2025](/content/2025/03/15/README.md)","大语言对话模型及数据","Streaming Engine","Data Integration and Specialized Solutions","RAG Survey 2024","Library","好用工具"],"readme":"\u003cp align=\"center\"\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"https://cocoindex.io/blobs/github/homepage/enterprise-hero-dark.svg\"\u003e\n    \u003csource media=\"(prefers-color-scheme: light)\" srcset=\"https://cocoindex.io/blobs/github/homepage/enterprise-hero-light.svg\"\u003e\n    \u003cimg src=\"https://cocoindex.io/blobs/github/homepage/enterprise-hero-light.svg\" alt=\"Enterprise corpus — codebase, Slack, meeting notes, and documentation — flowing continuously through the CocoIndex incremental sync engine into a production AI agent with always-fresh context. Only the Δ (delta) is reprocessed on every change. Keywords: RAG pipeline, agent memory, enterprise retrieval, AI agent context, live indexing, retrieval-augmented generation, production LLM apps, streaming ETL, incremental ingestion.\" width=\"100%\" draggable=\"false\"/\u003e\n  \u003c/picture\u003e\n\u003c/p\u003e\n\u003ch1 align=\"center\"\u003eYour agents deserve \u003cem\u003efresh context.\u003c/em\u003e\u003c/h1\u003e\n\n\u003cp align=\"center\"\u003e\n  \u003cstrong\u003eStar us\u0026nbsp;❤️\u0026nbsp;→\u003c/strong\u003e\u0026nbsp;\u003ca href=\"https://github.com/cocoindex-io/cocoindex\" title=\"Star CocoIndex on GitHub — open-source incremental indexing framework for AI agents\"\u003e\u003cpicture\u003e\u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"https://cocoindex.io/blobs/github/homepage/star-btn-small-dark.svg\"\u003e\u003csource media=\"(prefers-color-scheme: light)\" srcset=\"https://cocoindex.io/blobs/github/homepage/star-btn-small-light.svg\"\u003e\u003cimg src=\"https://cocoindex.io/blobs/github/homepage/star-btn-small-light.svg\" alt=\"Star CocoIndex on GitHub — open-source Python framework for RAG, vector search, and live agent context\" height=\"36\" align=\"absmiddle\"/\u003e\u003c/picture\u003e\u003c/a\u003e \u0026nbsp;·\u0026nbsp;\n  \u003ca href=\"https://cocoindex.io\" title=\"Visit cocoindex.io — the CocoIndex homepage\"\u003e\u003cpicture\u003e\u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"https://cocoindex.io/blobs/github/homepage/coco-inline-dark.svg\"\u003e\u003csource media=\"(prefers-color-scheme: light)\" srcset=\"https://cocoindex.io/blobs/github/homepage/coco-inline-light.svg\"\u003e\u003cimg src=\"https://cocoindex.io/blobs/github/homepage/coco-inline-light.svg\" alt=\"cocoindex.io — the CocoIndex homepage: incremental data pipelines for AI agents\" height=\"36\" align=\"absmiddle\"/\u003e\u003c/picture\u003e\u003c/a\u003e \u0026nbsp;·\u0026nbsp;\n  \u003ca href=\"https://cocoindex.io/docs\" title=\"Read the CocoIndex documentation — guides, quickstart, connectors, transformations, and API reference\"\u003e\u003cpicture\u003e\u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"https://cocoindex.io/blobs/github/homepage/docs-inline-dark.svg\"\u003e\u003csource media=\"(prefers-color-scheme: light)\" srcset=\"https://cocoindex.io/blobs/github/homepage/docs-inline-light.svg\"\u003e\u003cimg src=\"https://cocoindex.io/blobs/github/homepage/docs-inline-light.svg\" alt=\"CocoIndex documentation — quickstart, connectors, ops, transformations, target stores, RAG and knowledge graph recipes\" height=\"36\" align=\"absmiddle\"/\u003e\u003c/picture\u003e\u003c/a\u003e \u0026nbsp;·\u0026nbsp;\n  \u003ca href=\"https://discord.com/invite/zpA9S2DR7s\" title=\"Join the CocoIndex Discord — community chat, showcase, release notes, help and support\"\u003e\u003cpicture\u003e\u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"https://cocoindex.io/blobs/github/homepage/discord-inline-dark.svg\"\u003e\u003csource media=\"(prefers-color-scheme: light)\" srcset=\"https://cocoindex.io/blobs/github/homepage/discord-inline-light.svg\"\u003e\u003cimg src=\"https://cocoindex.io/blobs/github/homepage/discord-inline-light.svg\" alt=\"Join the CocoIndex Discord community — help, showcase, release notes, and live chat with maintainers\" height=\"36\" align=\"absmiddle\"/\u003e\u003c/picture\u003e\u003c/a\u003e\n\u003c/p\u003e\n\n\n\u003cp align=\"center\"\u003eCocoIndex turns codebases, meeting notes, inboxes, Slack, PDFs, and videos into live, continuously fresh context for your AI agents and LLM apps to reason over effectively — with minimal incremental processing.  Get your production AI agent ready in 10 minutes with reliable, continuously fresh data — no stale batches, no context gap\n\u003c/p\u003e\n\u003cp align=\"center\"\u003e\n  \u003cb\u003eIncremental\u003c/b\u003e · only the delta \u0026nbsp;·\u0026nbsp; \u003cb\u003eAny scale\u003c/b\u003e · parallel by default \u0026nbsp;·\u0026nbsp; \u003cb\u003eDeclarative\u003c/b\u003e · Python, 5 min\n\u003c/p\u003e\n\n\n\u003cdiv align=\"center\"\u003e\n\n[![stars](https://img.shields.io/github/stars/cocoindex-io/cocoindex?style=flat-square\u0026label=stars\u0026color=FB6A76)](https://github.com/cocoindex-io/cocoindex)\n[![downloads](https://img.shields.io/pepy/dt/cocoindex?style=flat-square\u0026label=downloads\u0026color=16A534)](https://pepy.tech/projects/cocoindex)\n[![pypi](https://img.shields.io/pypi/v/cocoindex?style=flat-square\u0026label=pypi\u0026color=E59A63)](https://pypi.org/project/cocoindex/)\n[![python](https://img.shields.io/badge/python-3.10--3.13-3572A5?style=flat-square)](https://www.python.org/)\n[![rust](https://img.shields.io/badge/rust-core-db6d28?style=flat-square)](https://www.rust-lang.org/)\n[![license](https://img.shields.io/badge/license-Apache--2.0-5B5BD6?style=flat-square)](https://opensource.org/licenses/Apache-2.0)\n[![discord](https://img.shields.io/discord/1314801574169673738?style=flat-square\u0026logo=discord\u0026logoColor=white\u0026label=discord\u0026color=5865F2)](https://discord.com/invite/zpA9S2DR7s)\n\n[![CI](https://img.shields.io/github/actions/workflow/status/cocoindex-io/cocoindex/CI.yml?event=push\u0026style=flat-square\u0026label=CI)](https://github.com/cocoindex-io/cocoindex/actions/workflows/CI.yml)\n[![release](https://img.shields.io/github/actions/workflow/status/cocoindex-io/cocoindex/release.yml?event=push\u0026style=flat-square\u0026label=release)](https://github.com/cocoindex-io/cocoindex/actions/workflows/release.yml)\n[![links](https://img.shields.io/github/actions/workflow/status/cocoindex-io/cocoindex/links.yml?event=push\u0026style=flat-square\u0026label=link%20check)](https://github.com/cocoindex-io/cocoindex/actions/workflows/links.yml)\n\n\u003c/div\u003e\n\n\u003cp align=\"center\"\u003e\u003ca href=\"https://trendshift.io/repositories/13939\" target=\"_blank\"\u003e\u003cimg src=\"https://trendshift.io/api/badge/repositories/13939\" alt=\"cocoindex-io/cocoindex | Trendshift\" width=\"250\" height=\"55\"/\u003e\u003c/a\u003e\u003c/p\u003e\n\n\u003cbr/\u003e\n\n\u003cdiv align=\"center\"\u003e\n\n[Deutsch](https://readme-i18n.com/cocoindex-io/cocoindex?lang=de) |\n[English](https://readme-i18n.com/cocoindex-io/cocoindex?lang=en) |\n[Español](https://readme-i18n.com/cocoindex-io/cocoindex?lang=es) |\n[français](https://readme-i18n.com/cocoindex-io/cocoindex?lang=fr) |\n[日本語](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ja) |\n[한국어](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ko) |\n[Português](https://readme-i18n.com/cocoindex-io/cocoindex?lang=pt) |\n[Русский](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ru) |\n[中文](https://readme-i18n.com/cocoindex-io/cocoindex?lang=zh)\n\n\u003c/div\u003e\n\n\n\u003cbr/\u003e\u003cbr/\u003e\n\n\u003ch2 align=\"center\"\u003eBuilt with CocoIndex ❤️\u003c/h2\u003e\n\n\u003c!-- Flagship: CocoIndex-code — full-bleed clickable hero --\u003e\n\u003cp align=\"center\"\u003e\n  \u003ca href=\"https://cocoindex.io/cocoindex-code\" title=\"CocoIndex-code — flagship MCP server for AI coding agents: AST-aware, incremental, semantic code index. Claude Code and Cursor see your whole repo instantly.\"\u003e\u003cpicture\u003e\u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"https://cocoindex.io/blobs/github/homepage/cocoindex-code-hero-dark.svg\"\u003e\u003csource media=\"(prefers-color-scheme: light)\" srcset=\"https://cocoindex.io/blobs/github/homepage/cocoindex-code-hero-light.svg\"\u003e\u003cimg src=\"https://cocoindex.io/blobs/github/homepage/cocoindex-code-hero-light.svg\" alt=\"CocoIndex-code — flagship MCP server for AI coding agents. AST-aware incremental semantic code index that keeps live call graphs, symbols, vectors, and chunks fresh on every commit. 70% fewer tokens per turn, 80-90% cache hits on re-index, sub-second freshness. Supports Python, TypeScript, Rust, and Go. Features: Δ-only incremental processing, semantic search by meaning (not grep), call graphs and blast-radius analysis, global repo view for duplicates and architecture. Build coding agents (generate, refactor) and code-review agents (catch, approve). One install — Claude Code, Cursor, and other MCP-aware agents see your whole repository instantly. Keywords: MCP server, coding agent, code intelligence, AST chunking, semantic code search, call graph, vector embedding, repository context, Claude Code, Cursor, incremental indexing, blast radius.\" width=\"100%\"/\u003e\u003c/picture\u003e\u003c/a\u003e\n\u003c/p\u003e\n\n\u003cp align=\"center\"\u003e\u003ca href=\"examples\"\u003e\u003cb\u003eSee all 20+ examples · updated every week →\u003c/b\u003e\u003c/a\u003e\u003c/p\u003e\n\n\u003cbr/\u003e\n\n\u003ch3 align=\"center\"\u003eGet started\u003c/h3\u003e\n\n```sh\npip install -U cocoindex\n```\n\nDeclare *what* should be in your target — CocoIndex keeps it in sync forever, recomputing only the Δ.\n\n```python\nimport cocoindex as coco\nfrom cocoindex.connectors import localfs, postgres\nfrom cocoindex.ops.text import RecursiveSplitter\n\n@coco.fn(memo=True)                          # ← cached by hash(input) + hash(code)\nasync def index_file(file, table):\n    for chunk in RecursiveSplitter().split(await file.read_text()):\n        table.declare_row(text=chunk.text, embedding=embed(chunk.text))\n\n@coco.fn\nasync def main(src):\n    table = await postgres.mount_table_target(PG, table_name=\"docs\")\n    table.declare_vector_index(column=\"embedding\")\n    await coco.mount_each(index_file, localfs.walk_dir(src).items(), table)\n\ncoco.App(coco.AppConfig(name=\"docs\"), main, src=\"./docs\").update_blocking()\n```\n\n\u003cp align=\"center\"\u003eRun once to backfill. Re-run anytime — only the changed files re-embed.\u003c/p\u003e\n\n\u003cp align=\"center\"\u003e\n  Building with an AI coding agent?\u003cbr/\u003e\n  Drop in our \u003ca href=\"skills/cocoindex/\"\u003e\u003cb\u003eCocoIndex skill\u003c/b\u003e\u003c/a\u003e so your agent writes correct v1 code — concepts, APIs, patterns, all in one file.\u003cbr/\u003e\n  \u003csub\u003eSee \u003ca href=\"https://cocoindex.io/docs/getting_started/ai_coding_agents/\"\u003eUse with AI coding agents\u003c/a\u003e for install steps.\u003c/sub\u003e\n\u003c/p\u003e\n\n\u003cp align=\"center\"\u003e\n  \u003ca href=\"https://cocoindex.io/docs/getting_started/quickstart\" title=\"Full CocoIndex quickstart — install, declare sources and targets, run the incremental engine, set up vector search or knowledge graph in 5 minutes\"\u003e\u003cpicture\u003e\u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"https://cocoindex.io/blobs/github/homepage/quickstart-btn-dark.svg\"\u003e\u003csource media=\"(prefers-color-scheme: light)\" srcset=\"https://cocoindex.io/blobs/github/homepage/quickstart-btn-light.svg\"\u003e\u003cimg src=\"https://cocoindex.io/blobs/github/homepage/quickstart-btn-light.svg\" alt=\"Full quickstart — open-book icon linking to the CocoIndex documentation quickstart: pip install, declare sources and targets, run the incremental engine\" height=\"36\" align=\"absmiddle\"/\u003e\u003c/picture\u003e\u003c/a\u003e\n  \u0026nbsp;\u0026nbsp;\n  \u003ca href=\"https://cocoindex.io/docs/programming_guide/core_concepts\" title=\"Learn the CocoIndex core concepts — sources, targets, flows, incremental engine, lineage\"\u003e\u003cpicture\u003e\u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"https://cocoindex.io/blobs/github/homepage/learn-concept-btn-dark.svg\"\u003e\u003csource media=\"(prefers-color-scheme: light)\" srcset=\"https://cocoindex.io/blobs/github/homepage/learn-concept-btn-light.svg\"\u003e\u003cimg src=\"https://cocoindex.io/blobs/github/homepage/learn-concept-btn-light.svg\" alt=\"Learn the concept — lightbulb icon linking to the CocoIndex core-concepts guide: sources, targets, flows, incremental engine, and data lineage\" height=\"36\" align=\"absmiddle\"/\u003e\u003c/picture\u003e\u003c/a\u003e\n\u003c/p\u003e\n\n\n\u003cp align=\"center\"\u003e\n  \u003ca href=\"https://github.com/cocoindex-io/cocoindex\" title=\"Star CocoIndex on GitHub — open-source Python framework for live agent context\"\u003e\u003cpicture\u003e\u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"https://cocoindex.io/blobs/github/homepage/comm-github-dark.svg\"\u003e\u003csource media=\"(prefers-color-scheme: light)\" srcset=\"https://cocoindex.io/blobs/github/homepage/comm-github-light.svg\"\u003e\u003cimg src=\"https://cocoindex.io/blobs/github/homepage/comm-github-light.svg\" alt=\"Animated GitHub Star button for the cocoindex-io/cocoindex repository: a cursor clicks the star, it fills yellow, confetti bursts, the star count ticks up, and an 'Appreciate a star if you like it!' caption with a beating heart shows below the button\" width=\"620\"/\u003e\u003c/picture\u003e\u003c/a\u003e\n\u003c/p\u003e\n\n\u003cbr/\u003e\u003cbr/\u003e\n\n\u003ch2 align=\"center\"\u003eReact — \u003cem\u003efor data engineering\u003c/em\u003e\u003c/h2\u003e\n\n\u003cp align=\"center\"\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"https://cocoindex.io/blobs/github/homepage/react4de-hero-dark.svg\"\u003e\n    \u003csource media=\"(prefers-color-scheme: light)\" srcset=\"https://cocoindex.io/blobs/github/homepage/react4de-hero-light.svg\"\u003e\n    \u003cimg src=\"https://cocoindex.io/blobs/github/homepage/react4de-hero-light.svg\" alt=\"React — for data engineering. The CocoIndex mental model: Target = F(Source). A persistent-state-driven dataflow where you declare the desired target state and the engine keeps it in sync with the latest source data and code, forever, at low latency and low cost. Source files (.py, .md, .pdf, .ts) flow through your Python transformation F into a live target dots-matrix index; only the Δ is reprocessed on every change, and every target dot traces back to its exact source byte. Four core properties: Python not a DAG (sky), declare target state (yellow bullseye), lineage end-to-end (coral connected dots), and incremental at any scale (mint Δ+1). Your code is as simple as the one-off version — the engine does the rest. Keywords: React for data engineering, declarative ETL, persistent state, data lineage, dataflow, Δ only, incremental indexing, CocoIndex.\" width=\"100%\"/\u003e\n  \u003c/picture\u003e\n\u003c/p\u003e\n\n\u003cp align=\"center\"\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"https://cocoindex.io/blobs/github/homepage/either-side-change-dark.svg\"\u003e\n    \u003csource media=\"(prefers-color-scheme: light)\" srcset=\"https://cocoindex.io/blobs/github/homepage/either-side-change-light.svg\"\u003e\n    \u003cimg src=\"https://cocoindex.io/blobs/github/homepage/either-side-change-light.svg\" alt=\"What happens when either side changes — CocoIndex tracks per-row provenance so the Δ propagates at minimum cost. Two scenarios shown in one illustration: (top) Source change — one file (b.md) is edited and only one target dot re-syncs (coral pulse). (bottom) Code change — the transformation function F is rewritten from v1 to v2 and only the dots whose outputs depend on the changed code re-run (amber/yellow pulses). Source on the left, F in the center (Python code block), target dots-matrix on the right. Keywords: incremental indexing, change data capture, delta processing, fine-grained invalidation, code-aware caching, hash-of-code invalidation, memoization, reproducible pipelines, incremental recomputation.\" width=\"100%\"/\u003e\n  \u003c/picture\u003e\n\u003c/p\u003e\n\n\u003cp align=\"center\"\u003e\u003ca href=\"https://cocoindex.io/react-cocoindex\"\u003e\u003cb\u003eSee the React ↔ CocoIndex mental model →\u003c/b\u003e\u003c/a\u003e\u003c/p\u003e\n\n\n\u003cbr/\u003e\u003cbr/\u003e\n\n\u003ch2 align=\"center\"\u003e\u003cem\u003eIncremental engine\u003c/em\u003e for long-horizon agents\u003c/h2\u003e\n\n\u003cp align=\"center\"\u003e\n  Data transformation for any engineer, designed for AI workloads —\u003cbr/\u003e\n  with a smart incremental engine for \u003cem\u003ealways-fresh, explainable data.\u003c/em\u003e\n\u003c/p\u003e\n\n\u003cp align=\"center\"\u003e\n  \u003ca href=\"https://cocoindex.io/docs/programming_guide/core_concepts\" title=\"Learn the CocoIndex core concepts — sources, targets, flows, incremental engine, lineage\"\u003e\u003cpicture\u003e\u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"https://cocoindex.io/blobs/github/homepage/learn-concept-btn-dark.svg\"\u003e\u003csource media=\"(prefers-color-scheme: light)\" srcset=\"https://cocoindex.io/blobs/github/homepage/learn-concept-btn-light.svg\"\u003e\u003cimg src=\"https://cocoindex.io/blobs/github/homepage/learn-concept-btn-light.svg\" alt=\"Learn the concept — purple button with a lightbulb icon linking to the CocoIndex core-concepts guide: sources, targets, flows, incremental engine, and data lineage\" height=\"44\" align=\"absmiddle\"/\u003e\u003c/picture\u003e\u003c/a\u003e\n\u003c/p\u003e\n\n\u003cp align=\"center\"\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"https://cocoindex.io/blobs/github/homepage/incremental-engine-dark.svg\"\u003e\n    \u003csource media=\"(prefers-color-scheme: light)\" srcset=\"https://cocoindex.io/blobs/github/homepage/incremental-engine-light.svg\"\u003e\n    \u003cimg src=\"https://cocoindex.io/blobs/github/homepage/incremental-engine-light.svg\" alt=\"CocoIndex's Python-native transformation flows connect 8 source categories (Codebases, Meeting Notes, Web · APIs, File System · Blob Stores, Databases, Message Queues, Images · Video, Voice · Transcripts) through the incremental engine out to 6 target stores (Relational DB, Data Warehouse, Vector DB, Graph DB, Message Queue, Feature Store). A flow.py code block (@coco.fn · def f(src): · chunks = split(src) · target.row(embed(chunks))) shows the shared pipeline; only the Δ is reprocessed — unchanged src hits the cache, changed src re-runs split() and Δ → re-embed. The persistent data-pipeline control plane runs eight always-on subsystems: live caching, pipeline catalog, version tracking, continuously learning, lineage, task scheduling, metrics collection, and failure management. Keywords: data pipeline, ETL, source connectors, vector database, graph database, incremental engine, streaming ingestion, caching, lineage, versioning, scheduling, metrics, retries.\" width=\"100%\"/\u003e\n  \u003c/picture\u003e\n\u003c/p\u003e\n\n\n\u003cbr/\u003e\u003cbr/\u003e\n\n\u003ch2 align=\"center\"\u003eWhy \u003cem\u003eincremental?\u003c/em\u003e\u003c/h2\u003e\n\n\u003cp align=\"center\"\u003eYour agents are only as good as the data they see.\u003cbr/\u003eBatch pipelines drift stale. CocoIndex stays live — and only runs the Δ.\u003c/p\u003e\n\n\u003cp align=\"center\"\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"https://cocoindex.io/blobs/github/homepage/why-incremental-dark.svg\"\u003e\n    \u003csource media=\"(prefers-color-scheme: light)\" srcset=\"https://cocoindex.io/blobs/github/homepage/why-incremental-dark.svg\"\u003e\n    \u003cimg src=\"https://cocoindex.io/blobs/github/homepage/why-incremental-dark.svg\" alt=\"Why incremental? — one illustration combining the four core benefits of CocoIndex's incremental engine. Sub-second fresh (mint): a stopwatch ticking under a second, source changes propagate to the target in under a second so agents see the world as it is, not as it was yesterday. 10× cheaper at scale (yellow): a 10,000-row corpus block where only a thin Δ 0.1% column re-runs and 99.9% stays cached — you skip the other 99.9% of your corpus and pay a fraction of the compute, embedding, and LLM bill. Explainable by default (coral): a lineage thread links a source byte (handbook.md L42) to a target vector — every vector, row, or graph node in the target traces back to its exact source byte for debuggable, auditable, regulator-friendly AI pipelines. Production-grade (purple): a shield stamped with the Rust crab surrounded by retry loops, back-off dots, a DLQ tray, and a no-data-loss check — Rust core with retries, exponential back-off, dead-letter queues, and no-data-loss guarantees, production-ready for long-horizon AI agents. Keywords: incremental indexing, Δ-only reprocessing, sub-second freshness, low-latency RAG, cost-efficient embeddings, data lineage, retrieval-augmented generation, Rust core, retries, back-off, dead letters, no data loss, long-horizon agents.\" width=\"100%\"/\u003e\n  \u003c/picture\u003e\n\u003c/p\u003e\n\n\n\u003cbr/\u003e\u003cbr/\u003e\n\n\u003ch2 align=\"center\"\u003eWhat can you \u003cem\u003ebuild?\u003c/em\u003e\u003c/h2\u003e\n\n\u003cp align=\"center\"\u003e\u003ca href=\"examples\" title=\"Browse all 20+ CocoIndex examples on GitHub — code, PDF, HN, knowledge graph, podcast, CSV-to-Kafka, image, and more\"\u003e\u003cb\u003eSee all 20+ examples · updated every week →\u003c/b\u003e\u003c/a\u003e\u003c/p\u003e\n\n\u003cp align=\"center\"\u003e\u003cb\u003eWorking starters from \u003ca href=\"examples\"\u003ethe examples tree\u003c/a\u003e — clone, plug your source, ship.\u003c/b\u003e\u003c/p\u003e\n\n\u003cp align=\"center\"\u003e\n  \u003ca href=\"examples/code_embedding\" title=\"Real-time code index — walk a git repo, chunk source files with an AST-aware splitter, embed with sentence-transformers, and upsert to pgvector / LanceDB. Fully incremental: only files touched by the latest commit re-embed. Good for coding agents, code review, semantic find-by-meaning.\"\u003e\u003cimg src=\"https://cocoindex.io/blobs/github/homepage/example-code.svg\" alt=\"Real-time code index — walk a git repo, AST-chunk source files, embed with sentence-transformers, upsert to pgvector / LanceDB, incremental on every commit. Keywords: code search, code embedding, semantic code retrieval, Python.\" width=\"70%\"/\u003e\u003c/a\u003e\n\u003c/p\u003e\n\n\u003cp align=\"center\"\u003e\n  \u003ca href=\"examples/pdf_embedding\" title=\"PDF → RAG index — ingest PDFs from local / S3 / Google Drive, extract text, chunk with a recursive splitter, embed each chunk, and upsert into pgvector / LanceDB with a vector index. Classic RAG stack, incremental — only edited PDFs re-embed.\"\u003e\u003cimg src=\"https://cocoindex.io/blobs/github/homepage/example-pdf.svg\" alt=\"PDF → RAG index — ingest PDFs from local, S3, or GDrive, extract + chunk text, embed chunks, upsert to pgvector / LanceDB. Classic retrieval-augmented-generation stack, incremental. Keywords: RAG, document Q\u0026A, PDF search, vector database.\" width=\"70%\"/\u003e\u003c/a\u003e\n\u003c/p\u003e\n\n\u003cp align=\"center\"\u003e\n  \u003ca href=\"examples/hn_trending_topics\" title=\"HN trending topics — fetch Hacker News threads via the Algolia API, recursively pull nested comments, LLM-extract typed topic lists per message with Gemini 2.5 Flash, and rank topics by weighted mention count (thread = 5 points, comment = 1 point).\"\u003e\u003cimg src=\"https://cocoindex.io/blobs/github/homepage/example-hn-trending.svg\" alt=\"HN trending topics — pull Hacker News threads via Algolia, recursively parse comments, LLM-extract topics with Gemini 2.5 Flash, rank by weighted hit count (thread=5, comment=1), store in Postgres. Incremental. Keywords: Hacker News, trending topics, LLM extraction, Gemini, Postgres, news intelligence, topic ranking.\" width=\"70%\"/\u003e\u003c/a\u003e\n\u003c/p\u003e\n\n\u003cp align=\"center\"\u003e\n  \u003ca href=\"examples/conversation_to_knowledge\" title=\"Conversation → knowledge graph — pull people, topics, decisions, and action items out of meeting transcripts, Slack, podcasts, or support calls with an LLM extractor, and upsert into Neo4j or Kuzu. Incremental: only changed turns re-extract.\"\u003e\u003cimg src=\"https://cocoindex.io/blobs/github/homepage/example-kg.svg\" alt=\"Conversation → knowledge graph — LLM extracts people, topics, decisions, action items from transcripts and upserts into Neo4j / Kuzu. Live graph, incremental. Keywords: knowledge graph, entity extraction, meeting intelligence, agent memory.\" width=\"70%\"/\u003e\u003c/a\u003e\n\u003c/p\u003e\n\n\u003cp align=\"center\"\u003e\n  \u003ca href=\"examples/multi_codebase_summarization\" title=\"Multi-repo summarization — walk N git repositories, extract READMEs / public APIs / modules, LLM-summarize each one, and roll up into a single top-level summary. Incremental: only repos with new commits re-run.\"\u003e\u003cimg src=\"https://cocoindex.io/blobs/github/homepage/example-multicode.svg\" alt=\"Multi-repo summarization — walk N git repos, extract structure, LLM-summarize per-repo + a rolled-up org summary, refresh on every push. Keywords: internal platform, developer experience, monorepo, SDK docs.\" width=\"70%\"/\u003e\u003c/a\u003e\n\u003c/p\u003e\n\n\u003cp align=\"center\"\u003e\n  \u003ca href=\"examples/patient_intake_extraction_baml\" title=\"Structured extraction — read messy forms, PDFs, invoices, or free-text and extract typed, schema-validated fields with BAML or DSPy, then write rows into Postgres or a warehouse. Incremental: only changed documents re-extract.\"\u003e\u003cimg src=\"https://cocoindex.io/blobs/github/homepage/example-intake.svg\" alt=\"Structured extraction — BAML / DSPy typed schema extraction from forms, PDFs, intakes, invoices into Postgres / warehouse. Incremental. Keywords: ETL, LLM extraction, schema-first, patient intake, invoice processing, KYC, contracts.\" width=\"70%\"/\u003e\u003c/a\u003e\n\u003c/p\u003e\n\n\u003cp align=\"center\"\u003e\n  \u003ca href=\"examples/conversation_to_knowledge\" title=\"Podcast → knowledge graph — download YouTube podcast audio, transcribe with speaker diarization (Whisper / AssemblyAI), LLM-extract structured statements and entities per speaker, resolve duplicates across episodes with embeddings, and store the whole graph (speakers, statements, topics) in SurrealDB or Neo4j. Incremental.\"\u003e\u003cimg src=\"https://cocoindex.io/blobs/github/homepage/example-podcast.svg\" alt=\"Podcast → knowledge graph — transcribe YouTube / Spotify audio with speaker diarization, LLM-extract speakers and statements, resolve entities across episodes, store in SurrealDB / Neo4j. Keywords: podcast, diarization, YouTube, Whisper, SurrealDB, knowledge graph, entity resolution.\" width=\"70%\"/\u003e\u003c/a\u003e\n\u003c/p\u003e\n\n\u003cp align=\"center\"\u003e\n  \u003ca href=\"examples/csv_to_kafka\" title=\"CSV → Kafka live — watch a folder of CSV files (local or S3) and publish each row as a JSON message keyed by its primary key to a Kafka topic on StreamNative / Confluent / self-hosted. Sub-second incremental — only changed rows publish.\"\u003e\u003cimg src=\"https://cocoindex.io/blobs/github/homepage/example-csv-kafka.svg\" alt=\"CSV → Kafka live — watch a folder of CSV files, publish each row as a JSON message to a Kafka topic via CocoIndex's Kafka target connector. Incremental, sub-second, no producer loop. Keywords: Kafka, CDC, streaming, StreamNative, Confluent, CSV ingestion, event streaming.\" width=\"70%\"/\u003e\u003c/a\u003e\n\u003c/p\u003e\n\n\u003cbr/\u003e\n\n\u003cp align=\"center\"\u003e\u003cpicture\u003e\u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"https://cocoindex.io/blobs/github/homepage/share-build-dark.svg\"\u003e\u003csource media=\"(prefers-color-scheme: light)\" srcset=\"https://cocoindex.io/blobs/github/homepage/share-build-light.svg\"\u003e\u003cimg src=\"https://cocoindex.io/blobs/github/homepage/share-build-light.svg\" alt=\"Share what you build — a banner with a trail of tiny hearts rising from the bottom behind the text, inviting the CocoIndex community to share projects built with the framework\" height=\"36\" draggable=\"false\"/\u003e\u003c/picture\u003e\u003c/p\u003e\n\n\u003cp align=\"center\"\u003eBuilding something with CocoIndex? \u003cb\u003eWe want to see it.\u003c/b\u003e\u003cbr/\u003eTag \u003ca href=\"https://x.com/cocoindex_io\" title=\"Tag @cocoindex_io on X to showcase your CocoIndex project\"\u003e@cocoindex_io\u003c/a\u003e on X or drop a link in \u003ca href=\"https://discord.com/invite/zpA9S2DR7s\" title=\"Share your project in the CocoIndex Discord #showcase channel\"\u003e#showcase\u003c/a\u003e on Discord. We'll boost it. 🥥\u003c/p\u003e\n\n\n\u003cbr/\u003e\u003cbr/\u003e\n\n\u003ch2 align=\"center\"\u003eCommunity\u003c/h2\u003e\n\n\u003ctable width=\"100%\" border=\"0\" cellspacing=\"0\" role=\"presentation\"\u003e\n  \u003ctr\u003e\n    \u003ctd align=\"center\" valign=\"middle\" width=\"25%\"\u003e\n      \u003ca href=\"https://discord.com/invite/zpA9S2DR7s\" title=\"Join the CocoIndex Discord — community chat, showcase, help, release notes\"\u003e\u003cpicture\u003e\u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"https://cocoindex.io/blobs/github/homepage/comm-discord-dark.svg\"\u003e\u003csource media=\"(prefers-color-scheme: light)\" srcset=\"https://cocoindex.io/blobs/github/homepage/comm-discord-light.svg\"\u003e\u003cimg src=\"https://cocoindex.io/blobs/github/homepage/comm-discord-light.svg\" alt=\"Join the CocoIndex Discord community — live chat with maintainers and users, showcase your projects, get help building RAG pipelines and knowledge graphs\" width=\"100%\"/\u003e\u003c/picture\u003e\u003c/a\u003e\n    \u003c/td\u003e\n    \u003ctd align=\"center\" valign=\"middle\" width=\"25%\"\u003e\n      \u003ca href=\"https://www.youtube.com/@cocoindex-io\" title=\"Subscribe to the CocoIndex YouTube channel — live demos, tutorials, and deep dives\"\u003e\u003cpicture\u003e\u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"https://cocoindex.io/blobs/github/homepage/comm-youtube-dark.svg\"\u003e\u003csource media=\"(prefers-color-scheme: light)\" srcset=\"https://cocoindex.io/blobs/github/homepage/comm-youtube-light.svg\"\u003e\u003cimg src=\"https://cocoindex.io/blobs/github/homepage/comm-youtube-light.svg\" alt=\"Subscribe to the CocoIndex YouTube channel — video tutorials, live demos, architecture deep dives, and AI agent recipes\" width=\"100%\" draggable=\"false\"/\u003e\u003c/picture\u003e\u003c/a\u003e\n    \u003c/td\u003e\n    \u003ctd align=\"center\" valign=\"middle\" width=\"25%\"\u003e\n      \u003ca href=\"https://cocoindex.io/blogs/\" title=\"Read the CocoIndex blog — engineering posts, release notes, and tutorials\"\u003e\u003cpicture\u003e\u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"https://cocoindex.io/blobs/github/homepage/comm-blog-dark.svg\"\u003e\u003csource media=\"(prefers-color-scheme: light)\" srcset=\"https://cocoindex.io/blobs/github/homepage/comm-blog-light.svg\"\u003e\u003cimg src=\"https://cocoindex.io/blobs/github/homepage/comm-blog-light.svg\" alt=\"Read the CocoIndex blog — engineering deep dives, release notes, RAG and knowledge graph tutorials, and case studies\" width=\"100%\"/\u003e\u003c/picture\u003e\u003c/a\u003e\n    \u003c/td\u003e\n    \u003ctd align=\"center\" valign=\"middle\" width=\"25%\"\u003e\n      \u003ca href=\"https://x.com/cocoindex_io\" title=\"Follow @cocoindex_io on X (Twitter) for release notes, demos, and updates\"\u003e\u003cpicture\u003e\u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"https://cocoindex.io/blobs/github/homepage/comm-x-dark.svg\"\u003e\u003csource media=\"(prefers-color-scheme: light)\" srcset=\"https://cocoindex.io/blobs/github/homepage/comm-x-light.svg\"\u003e\u003cimg src=\"https://cocoindex.io/blobs/github/homepage/comm-x-light.svg\" alt=\"Follow @cocoindex_io on X (formerly Twitter) for release notes, demos, launches, and AI data pipeline updates\" width=\"100%\" draggable=\"false\"/\u003e\u003c/picture\u003e\u003c/a\u003e\n    \u003c/td\u003e\n  \u003c/tr\u003e\n\u003c/table\u003e\n\n\u003cbr/\u003e\u003cbr/\u003e\n\n\u003cp align=\"center\"\u003e\n  \u003cimg src=\"https://cocoindex.io/blobs/github/homepage/we-love-contributors.svg\" alt=\"We love Contributors — section title banner with a pulsing coral heart badge and cream twinkle sparkles. Every typo fix, new connector, and doc tweak makes CocoIndex better. Keywords: open-source contribution, pull request, typo fix, new connector, good first issue, Hacktoberfest, community, coconut heart.\" width=\"620\"/\u003e\n\u003c/p\u003e\n\n\u003cp align=\"center\"\u003e\n  \u003cb\u003eWe are \u003cem\u003eso\u003c/em\u003e excited to meet you.\u003c/b\u003e\u003cbr/\u003e\n  Every typo fix, new connector, doc tweak, or full-on rewrite makes CocoIndex better.\u003cbr/\u003e\n  Come hang out — big PRs and small ones, both welcome.\n\u003c/p\u003e\n\n\u003cp align=\"center\"\u003e\n  📝 \u003ca href=\"https://cocoindex.io/docs/contributing/guide\"\u003e\u003cb\u003eRead the contributing guide\u003c/b\u003e\u003c/a\u003e \u0026nbsp;·\u0026nbsp;\n  🐛 \u003ca href=\"https://github.com/cocoindex-io/cocoindex/labels/good%20first%20issue\"\u003e\u003cb\u003egood first issues\u003c/b\u003e\u003c/a\u003e \u0026nbsp;·\u0026nbsp;\n  💬 \u003ca href=\"https://discord.com/invite/zpA9S2DR7s\"\u003e\u003cb\u003eSay hi on Discord\u003c/b\u003e\u003c/a\u003e\n\u003c/p\u003e\n\n\u003cbr/\u003e\u003cbr/\u003e\n\n\u003ch2 align=\"center\"\u003eCocoIndex \u003cem\u003eEnterprise\u003c/em\u003e\u003c/h2\u003e\n\n\u003cp align=\"center\"\u003e\n  \u003cpicture\u003e\n    \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"https://cocoindex.io/blobs/github/homepage/enterprise-scale-dark.svg\"\u003e\n    \u003csource media=\"(prefers-color-scheme: light)\" srcset=\"https://cocoindex.io/blobs/github/homepage/enterprise-scale-light.svg\"\u003e\n    \u003cimg src=\"https://cocoindex.io/blobs/github/homepage/enterprise-scale-light.svg\" alt=\"CocoIndex Enterprise — built for enterprise scale. Four headline stats for PB-scale incremental indexing: PB corpus scale incrementally indexed (coral), 10× fewer LLM embedding calls vs. full recompute (yellow), 100% lineage coverage with every byte traceable (mint), Δ only the delta always (sky). Below, a wide 50×8 corpus matrix of 400 dim tiles represents a petabyte-scale store where a single coral Δ slice of 8 tiles re-runs while the other 99.9% stays cached. Keywords: enterprise RAG, petabyte-scale indexing, incremental compute, delta-only, lineage, parallel chunking, zero-copy, failure isolation.\" width=\"100%\"/\u003e\n  \u003c/picture\u003e\n\u003c/p\u003e\n\n\u003ch3 align=\"center\"\u003eLarge corpus — \u003cem\u003ebuilt for enterprise scale.\u003c/em\u003e\u003c/h3\u003e\n\n\u003cp align=\"center\"\u003e\n  Incremental compute is the only way to keep large corpora fresh without re-embedding them every cycle.\u003cbr/\u003e\n  CocoIndex scales from a single repo to petabyte-scale stores — parallel by default, delta-only by design.\n\u003c/p\u003e\n\n\u003cbr/\u003e\n\n\u003ch3 align=\"center\"\u003eProcess once. \u003cem\u003eReconcile forever.\u003c/em\u003e\u003c/h3\u003e\n\n\u003cp align=\"center\"\u003e\n  When a source changes, CocoIndex identifies the affected records, propagates the change\u003cbr/\u003e\n  across joins and lookups, updates the target, and retires stale rows —\u003cbr/\u003e\n  without touching anything that didn't change.\n\u003c/p\u003e\n\n\u003cbr/\u003e\n\n\u003ch3 align=\"center\"\u003eBuilt on a \u003cem\u003eRust engine.\u003c/em\u003e\u003c/h3\u003e\n\n\u003cp align=\"center\"\u003e\n  The core is Rust — production-grade from day zero.\u003cbr/\u003e\n  Parallel chunking, zero-copy transforms where possible, and failure isolation\u003cbr/\u003e\n  so one bad record doesn't stall the flow.\n\u003c/p\u003e\n\n\u003cbr/\u003e\u003cbr/\u003e\n\n\u003cp align=\"center\"\u003e\n  \u003ca href=\"https://cocoindex.io/enterprise/\" title=\"Explore CocoIndex Enterprise — PB-scale incremental data pipelines for AI agents\"\u003e\u003cimg src=\"https://cocoindex.io/blobs/github/homepage/enterprise-btn.svg\" alt=\"Explore CocoIndex Enterprise — bright blue pill button linking to cocoindex.io/enterprise, the PB-scale incremental data pipeline for AI agents\" height=\"44\" align=\"absmiddle\"/\u003e\u003c/a\u003e\n\u003c/p\u003e\n\n\u003cbr/\u003e\u003cbr/\u003e\n\n\u003cp align=\"center\"\u003e\u003csub\u003eApache 2.0 · © CocoIndex contributors 🥥\u003c/sub\u003e\u003c/p\u003e\n\n\u003cimg referrerpolicy=\"no-referrer-when-downgrade\" src=\"https://static.scarf.sh/a.png?x-pxid=7f27e85b-be3a-411a-b612-0b9d53711814\u0026page=README.md\" alt=\"\" width=\"1\" height=\"1\" /\u003e\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fcocoindex-io%2Fcocoindex","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fcocoindex-io%2Fcocoindex","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fcocoindex-io%2Fcocoindex/lists"}