{"id":29248659,"url":"https://github.com/hkuds/rag-anything","last_synced_at":"2026-01-16T06:54:51.352Z","repository":{"id":299444161,"uuid":"997220241","full_name":"HKUDS/RAG-Anything","owner":"HKUDS","description":"\"RAG-Anything: All-in-One RAG System\"","archived":false,"fork":false,"pushed_at":"2025-07-03T08:40:35.000Z","size":2468,"stargazers_count":1033,"open_issues_count":20,"forks_count":93,"subscribers_count":15,"default_branch":"main","last_synced_at":"2025-07-03T09:34:49.902Z","etag":null,"topics":["agent","large-language-model","rag"],"latest_commit_sha":null,"homepage":"","language":"Python","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"mit","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/HKUDS.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null,"zenodo":null}},"created_at":"2025-06-06T06:47:29.000Z","updated_at":"2025-07-03T09:29:07.000Z","dependencies_parsed_at":"2025-06-28T23:15:50.661Z","dependency_job_id":null,"html_url":"https://github.com/HKUDS/RAG-Anything","commit_stats":null,"previous_names":["hkuds/rag-anything"],"tags_count":5,"template":false,"template_full_name":null,"purl":"pkg:github/HKUDS/RAG-Anything","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/HKUDS%2FRAG-Anything","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/HKUDS%2FRAG-Anything/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/HKUDS%2FRAG-Anything/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/HKUDS%2FRAG-Anything/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/HKUDS","download_url":"https://codeload.github.com/HKUDS/RAG-Anything/tar.gz/refs/heads/main","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/HKUDS%2FRAG-Anything/sbom","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":263421930,"owners_count":23464051,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["agent","large-language-model","rag"],"created_at":"2025-07-04T00:08:24.813Z","updated_at":"2026-01-16T06:54:51.340Z","avatar_url":"https://github.com/HKUDS.png","language":"Python","funding_links":[],"categories":[],"sub_categories":[],"readme":"\u003cdiv align=\"center\"\u003e\n\n\u003cdiv style=\"margin: 20px 0;\"\u003e\n  \u003cimg src=\"./assets/logo.png\" width=\"120\" height=\"120\" alt=\"RAG-Anything Logo\" style=\"border-radius: 20px; box-shadow: 0 8px 32px rgba(0, 217, 255, 0.3);\"\u003e\n\u003c/div\u003e\n\n# 🚀 RAG-Anything: All-in-One RAG Framework\n\n\u003ca href=\"https://trendshift.io/repositories/14959\" target=\"_blank\"\u003e\u003cimg src=\"https://trendshift.io/api/badge/repositories/14959\" alt=\"HKUDS%2FRAG-Anything | Trendshift\" style=\"width: 250px; height: 55px;\" width=\"250\" height=\"55\"/\u003e\u003c/a\u003e\n\n\u003cdiv align=\"center\"\u003e\n  \u003cimg src=\"https://readme-typing-svg.herokuapp.com?font=Orbitron\u0026size=24\u0026duration=3000\u0026pause=1000\u0026color=00D9FF\u0026center=true\u0026vCenter=true\u0026width=600\u0026lines=Welcome+to+RAG-Anything;Next-Gen+Multimodal+RAG+System;Powered+by+Advanced+AI+Technology\" alt=\"Typing Animation\" /\u003e\n\u003c/div\u003e\n\n\u003cdiv align=\"center\"\u003e\n  \u003cdiv style=\"background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 15px; padding: 25px; text-align: center;\"\u003e\n    \u003cp\u003e\n      \u003ca href='https://github.com/HKUDS/RAG-Anything'\u003e\u003cimg src='https://img.shields.io/badge/🔥Project-Page-00d9ff?style=for-the-badge\u0026logo=github\u0026logoColor=white\u0026labelColor=1a1a2e'\u003e\u003c/a\u003e\n      \u003ca href='https://arxiv.org/abs/2510.12323'\u003e\u003cimg src='https://img.shields.io/badge/📄arXiv-2510.12323-ff6b6b?style=for-the-badge\u0026logo=arxiv\u0026logoColor=white\u0026labelColor=1a1a2e'\u003e\u003c/a\u003e\n      \u003ca href='https://github.com/HKUDS/LightRAG'\u003e\u003cimg src='https://img.shields.io/badge/⚡Based%20on-LightRAG-4ecdc4?style=for-the-badge\u0026logo=lightning\u0026logoColor=white\u0026labelColor=1a1a2e'\u003e\u003c/a\u003e\n    \u003c/p\u003e\n    \u003cp\u003e\n      \u003ca href=\"https://github.com/HKUDS/RAG-Anything/stargazers\"\u003e\u003cimg src='https://img.shields.io/github/stars/HKUDS/RAG-Anything?color=00d9ff\u0026style=for-the-badge\u0026logo=star\u0026logoColor=white\u0026labelColor=1a1a2e' /\u003e\u003c/a\u003e\n      \u003cimg src=\"https://img.shields.io/badge/🐍Python-3.10-4ecdc4?style=for-the-badge\u0026logo=python\u0026logoColor=white\u0026labelColor=1a1a2e\"\u003e\n      \u003ca href=\"https://pypi.org/project/raganything/\"\u003e\u003cimg src=\"https://img.shields.io/pypi/v/raganything.svg?style=for-the-badge\u0026logo=pypi\u0026logoColor=white\u0026labelColor=1a1a2e\u0026color=ff6b6b\"\u003e\u003c/a\u003e\n      \u003ca href=\"https://github.com/astral-sh/uv\"\u003e\u003cimg src=\"https://img.shields.io/badge/⚡uv-Ready-ff6b6b?style=for-the-badge\u0026logo=python\u0026logoColor=white\u0026labelColor=1a1a2e\"\u003e\u003c/a\u003e\n    \u003c/p\u003e\n    \u003cp\u003e\n      \u003ca href=\"https://discord.gg/yF2MmDJyGJ\"\u003e\u003cimg src=\"https://img.shields.io/badge/💬Discord-Community-7289da?style=for-the-badge\u0026logo=discord\u0026logoColor=white\u0026labelColor=1a1a2e\"\u003e\u003c/a\u003e\n      \u003ca href=\"https://github.com/HKUDS/RAG-Anything/issues/7\"\u003e\u003cimg src=\"https://img.shields.io/badge/💬WeChat-Group-07c160?style=for-the-badge\u0026logo=wechat\u0026logoColor=white\u0026labelColor=1a1a2e\"\u003e\u003c/a\u003e\n    \u003c/p\u003e\n    \u003cp\u003e\n      \u003ca href=\"README_zh.md\"\u003e\u003cimg src=\"https://img.shields.io/badge/🇨🇳中文版-1a1a2e?style=for-the-badge\"\u003e\u003c/a\u003e\n      \u003ca href=\"README.md\"\u003e\u003cimg src=\"https://img.shields.io/badge/🇺🇸English-1a1a2e?style=for-the-badge\"\u003e\u003c/a\u003e\n    \u003c/p\u003e\n  \u003c/div\u003e\n\u003c/div\u003e\n\n\u003c/div\u003e\n\n\u003cdiv align=\"center\"\u003e\n  \u003cdiv style=\"width: 100%; height: 2px; margin: 20px 0; background: linear-gradient(90deg, transparent, #00d9ff, transparent);\"\u003e\u003c/div\u003e\n\u003c/div\u003e\n\n\u003cdiv align=\"center\"\u003e\n  \u003ca href=\"#-quick-start\" style=\"text-decoration: none;\"\u003e\n    \u003cimg src=\"https://img.shields.io/badge/Quick%20Start-Get%20Started%20Now-00d9ff?style=for-the-badge\u0026logo=rocket\u0026logoColor=white\u0026labelColor=1a1a2e\"\u003e\n  \u003c/a\u003e\n\u003c/div\u003e\n\n---\n\n## 🎉 News\n- [X] [2025.10]🎯📢 🚀 We have released the technical report of [RAG-Anything](http://arxiv.org/abs/2510.12323). Access it now to explore our latest research findings.\n- [X] [2025.08]🎯📢 🔍 RAG-Anything now features **VLM-Enhanced Query** mode! When documents include images, the system seamlessly integrates them into VLM for advanced multimodal analysis, combining visual and textual context for deeper insights.\n- [X] [2025.07]🎯📢 RAG-Anything now features a [context configuration module](docs/context_aware_processing.md), enabling intelligent integration of relevant contextual information to enhance multimodal content processing.\n- [X] [2025.07]🎯📢 🚀 RAG-Anything now supports multimodal query capabilities, enabling enhanced RAG with seamless processing of text, images, tables, and equations.\n- [X] [2025.07]🎯📢 🎉 RAG-Anything has reached 1k🌟 stars on GitHub! Thank you for your incredible support and valuable contributions to the project.\n\n---\n\n## 🌟 System Overview\n\n*Next-Generation Multimodal Intelligence*\n\n\u003cdiv style=\"background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%); border-radius: 15px; padding: 25px; margin: 20px 0; border: 2px solid #00d9ff; box-shadow: 0 0 30px rgba(0, 217, 255, 0.3);\"\u003e\n\nModern documents increasingly contain diverse multimodal content—text, images, tables, equations, charts, and multimedia—that traditional text-focused RAG systems cannot effectively process. **RAG-Anything** addresses this challenge as a comprehensive **All-in-One Multimodal Document Processing RAG system** built on [LightRAG](https://github.com/HKUDS/LightRAG).\n\nAs a unified solution, RAG-Anything **eliminates the need for multiple specialized tools**. It provides **seamless processing and querying across all content modalities** within a single integrated framework. Unlike conventional RAG approaches that struggle with non-textual elements, our all-in-one system delivers **comprehensive multimodal retrieval capabilities**.\n\nUsers can query documents containing **interleaved text**, **visual diagrams**, **structured tables**, and **mathematical formulations** through **one cohesive interface**. This consolidated approach makes RAG-Anything particularly valuable for academic research, technical documentation, financial reports, and enterprise knowledge management where rich, mixed-content documents demand a **unified processing framework**.\n\n\u003cimg src=\"assets/rag_anything_framework.png\" alt=\"RAG-Anything\" /\u003e\n\n\u003c/div\u003e\n\n### 🎯 Key Features\n\n\u003cdiv style=\"background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); border-radius: 15px; padding: 25px; margin: 20px 0;\"\u003e\n\n- **🔄 End-to-End Multimodal Pipeline** - Complete workflow from document ingestion and parsing to intelligent multimodal query answering\n- **📄 Universal Document Support** - Seamless processing of PDFs, Office documents, images, and diverse file formats\n- **🧠 Specialized Content Analysis** - Dedicated processors for images, tables, mathematical equations, and heterogeneous content types\n- **🔗 Multimodal Knowledge Graph** - Automatic entity extraction and cross-modal relationship discovery for enhanced understanding\n- **⚡ Adaptive Processing Modes** - Flexible MinerU-based parsing or direct multimodal content injection workflows\n- **📋 Direct Content List Insertion** - Bypass document parsing by directly inserting pre-parsed content lists from external sources\n- **🎯 Hybrid Intelligent Retrieval** - Advanced search capabilities spanning textual and multimodal content with contextual understanding\n\n\u003c/div\u003e\n\n---\n\n## 🏗️ Algorithm \u0026 Architecture\n\n\u003cdiv style=\"background: linear-gradient(135deg, #0f0f23 0%, #1a1a2e 100%); border-radius: 15px; padding: 25px; margin: 20px 0; border-left: 5px solid #00d9ff;\"\u003e\n\n### Core Algorithm\n\n**RAG-Anything** implements an effective **multi-stage multimodal pipeline** that fundamentally extends traditional RAG architectures to seamlessly handle diverse content modalities through intelligent orchestration and cross-modal understanding.\n\n\u003c/div\u003e\n\n\u003cdiv align=\"center\"\u003e\n  \u003cdiv style=\"width: 100%; max-width: 600px; margin: 20px auto; padding: 20px; background: linear-gradient(135deg, rgba(0, 217, 255, 0.1) 0%, rgba(0, 217, 255, 0.05) 100%); border-radius: 15px; border: 1px solid rgba(0, 217, 255, 0.2);\"\u003e\n    \u003cdiv style=\"display: flex; justify-content: space-around; align-items: center; flex-wrap: wrap; gap: 20px;\"\u003e\n      \u003cdiv style=\"text-align: center;\"\u003e\n        \u003cdiv style=\"font-size: 24px; margin-bottom: 10px;\"\u003e📄\u003c/div\u003e\n        \u003cdiv style=\"font-size: 14px; color: #00d9ff;\"\u003eDocument Parsing\u003c/div\u003e\n      \u003c/div\u003e\n      \u003cdiv style=\"font-size: 20px; color: #00d9ff;\"\u003e→\u003c/div\u003e\n      \u003cdiv style=\"text-align: center;\"\u003e\n        \u003cdiv style=\"font-size: 24px; margin-bottom: 10px;\"\u003e🧠\u003c/div\u003e\n        \u003cdiv style=\"font-size: 14px; color: #00d9ff;\"\u003eContent Analysis\u003c/div\u003e\n      \u003c/div\u003e\n      \u003cdiv style=\"font-size: 20px; color: #00d9ff;\"\u003e→\u003c/div\u003e\n      \u003cdiv style=\"text-align: center;\"\u003e\n        \u003cdiv style=\"font-size: 24px; margin-bottom: 10px;\"\u003e🔍\u003c/div\u003e\n        \u003cdiv style=\"font-size: 14px; color: #00d9ff;\"\u003eKnowledge Graph\u003c/div\u003e\n      \u003c/div\u003e\n      \u003cdiv style=\"font-size: 20px; color: #00d9ff;\"\u003e→\u003c/div\u003e\n      \u003cdiv style=\"text-align: center;\"\u003e\n        \u003cdiv style=\"font-size: 24px; margin-bottom: 10px;\"\u003e🎯\u003c/div\u003e\n        \u003cdiv style=\"font-size: 14px; color: #00d9ff;\"\u003eIntelligent Retrieval\u003c/div\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/div\u003e\n\u003c/div\u003e\n\n### 1. Document Parsing Stage\n\n\u003cdiv style=\"background: linear-gradient(90deg, #1a1a2e 0%, #16213e 100%); border-radius: 10px; padding: 20px; margin: 15px 0; border-left: 4px solid #4ecdc4;\"\u003e\n\nThe system provides high-fidelity document extraction through adaptive content decomposition. It intelligently segments heterogeneous elements while preserving contextual relationships. Universal format compatibility is achieved via specialized optimized parsers.\n\n**Key Components:**\n\n- **⚙️ MinerU Integration**: Leverages [MinerU](https://github.com/opendatalab/MinerU) for high-fidelity document structure extraction and semantic preservation across complex layouts.\n\n- **🧩 Adaptive Content Decomposition**: Automatically segments documents into coherent text blocks, visual elements, structured tables, mathematical equations, and specialized content types while preserving contextual relationships.\n\n- **📁 Universal Format Support**: Provides comprehensive handling of PDFs, Office documents (DOC/DOCX/PPT/PPTX/XLS/XLSX), images, and emerging formats through specialized parsers with format-specific optimization.\n\n\u003c/div\u003e\n\n### 2. Multi-Modal Content Understanding \u0026 Processing\n\n\u003cdiv style=\"background: linear-gradient(90deg, #16213e 0%, #0f3460 100%); border-radius: 10px; padding: 20px; margin: 15px 0; border-left: 4px solid #ff6b6b;\"\u003e\n\nThe system automatically categorizes and routes content through optimized channels. It uses concurrent pipelines for parallel text and multimodal processing. Document hierarchy and relationships are preserved during transformation.\n\n**Key Components:**\n\n- **🎯 Autonomous Content Categorization and Routing**: Automatically identify, categorize, and route different content types through optimized execution channels.\n\n- **⚡ Concurrent Multi-Pipeline Architecture**: Implements concurrent execution of textual and multimodal content through dedicated processing pipelines. This approach maximizes throughput efficiency while preserving content integrity.\n\n- **🏗️ Document Hierarchy Extraction**: Extracts and preserves original document hierarchy and inter-element relationships during content transformation.\n\n\u003c/div\u003e\n\n### 3. Multimodal Analysis Engine\n\n\u003cdiv style=\"background: linear-gradient(90deg, #0f3460 0%, #1a1a2e 100%); border-radius: 10px; padding: 20px; margin: 15px 0; border-left: 4px solid #00d9ff;\"\u003e\n\nThe system deploys modality-aware processing units for heterogeneous data modalities:\n\n**Specialized Analyzers:**\n\n- **🔍 Visual Content Analyzer**:\n  - Integrate vision model for image analysis.\n  - Generates context-aware descriptive captions based on visual semantics.\n  - Extracts spatial relationships and hierarchical structures between visual elements.\n\n- **📊 Structured Data Interpreter**:\n  - Performs systematic interpretation of tabular and structured data formats.\n  - Implements statistical pattern recognition algorithms for data trend analysis.\n  - Identifies semantic relationships and dependencies across multiple tabular datasets.\n\n- **📐 Mathematical Expression Parser**:\n  - Parses complex mathematical expressions and formulas with high accuracy.\n  - Provides native LaTeX format support for seamless integration with academic workflows.\n  - Establishes conceptual mappings between mathematical equations and domain-specific knowledge bases.\n\n- **🔧 Extensible Modality Handler**:\n  - Provides configurable processing framework for custom and emerging content types.\n  - Enables dynamic integration of new modality processors through plugin architecture.\n  - Supports runtime configuration of processing pipelines for specialized use cases.\n\n\u003c/div\u003e\n\n### 4. Multimodal Knowledge Graph Index\n\n\u003cdiv style=\"background: linear-gradient(90deg, #1a1a2e 0%, #16213e 100%); border-radius: 10px; padding: 20px; margin: 15px 0; border-left: 4px solid #4ecdc4;\"\u003e\n\nThe multi-modal knowledge graph construction module transforms document content into structured semantic representations. It extracts multimodal entities, establishes cross-modal relationships, and preserves hierarchical organization. The system applies weighted relevance scoring for optimized knowledge retrieval.\n\n**Core Functions:**\n\n- **🔍 Multi-Modal Entity Extraction**: Transforms significant multimodal elements into structured knowledge graph entities. The process includes semantic annotations and metadata preservation.\n\n- **🔗 Cross-Modal Relationship Mapping**: Establishes semantic connections and dependencies between textual entities and multimodal components. This is achieved through automated relationship inference algorithms.\n\n- **🏗️ Hierarchical Structure Preservation**: Maintains original document organization through \"belongs_to\" relationship chains. These chains preserve logical content hierarchy and sectional dependencies.\n\n- **⚖️ Weighted Relationship Scoring**: Assigns quantitative relevance scores to relationship types. Scoring is based on semantic proximity and contextual significance within the document structure.\n\n\u003c/div\u003e\n\n### 5. Modality-Aware Retrieval\n\n\u003cdiv style=\"background: linear-gradient(90deg, #16213e 0%, #0f3460 100%); border-radius: 10px; padding: 20px; margin: 15px 0; border-left: 4px solid #ff6b6b;\"\u003e\n\nThe hybrid retrieval system combines vector similarity search with graph traversal algorithms for comprehensive content retrieval. It implements modality-aware ranking mechanisms and maintains relational coherence between retrieved elements to ensure contextually integrated information delivery.\n\n**Retrieval Mechanisms:**\n\n- **🔀 Vector-Graph Fusion**: Integrates vector similarity search with graph traversal algorithms. This approach leverages both semantic embeddings and structural relationships for comprehensive content retrieval.\n\n- **📊 Modality-Aware Ranking**: Implements adaptive scoring mechanisms that weight retrieval results based on content type relevance. The system adjusts rankings according to query-specific modality preferences.\n\n- **🔗 Relational Coherence Maintenance**: Maintains semantic and structural relationships between retrieved elements. This ensures coherent information delivery and contextual integrity.\n\n\u003c/div\u003e\n\n---\n\n## 🚀 Quick Start\n\n*Initialize Your AI Journey*\n\n\u003cdiv align=\"center\"\u003e\n  \u003cimg src=\"https://user-images.githubusercontent.com/74038190/212284158-e840e285-664b-44d7-b79b-e264b5e54825.gif\" width=\"400\"\u003e\n\u003c/div\u003e\n\n### Installation\n\n#### Option 1: Install from PyPI (Recommended)\n\n```bash\n# Basic installation\npip install raganything\n\n# With optional dependencies for extended format support:\npip install 'raganything[all]'              # All optional features\npip install 'raganything[image]'            # Image format conversion (BMP, TIFF, GIF, WebP)\npip install 'raganything[text]'             # Text file processing (TXT, MD)\npip install 'raganything[image,text]'       # Multiple features\n```\n\n#### Option 2: Install from Source\n```bash\n# Install uv (if not already installed)\ncurl -LsSf https://astral.sh/uv/install.sh | sh\n\n# Clone and setup the project with uv\ngit clone https://github.com/HKUDS/RAG-Anything.git\ncd RAG-Anything\n\n# Install the package and dependencies in a virtual environment\nuv sync\n\n# If you encounter network timeouts (especially for opencv packages):\n# UV_HTTP_TIMEOUT=120 uv sync\n\n# Run commands directly with uv (recommended approach)\nuv run python examples/raganything_example.py --help\n\n# Install with optional dependencies\nuv sync --extra image --extra text  # Specific extras\nuv sync --all-extras                 # All optional features\n```\n\n#### Optional Dependencies\n\n- **`[image]`** - Enables processing of BMP, TIFF, GIF, WebP image formats (requires Pillow)\n- **`[text]`** - Enables processing of TXT and MD files (requires ReportLab)\n- **`[all]`** - Includes all Python optional dependencies\n\n\u003e **⚠️ Office Document Processing Requirements:**\n\u003e - Office documents (.doc, .docx, .ppt, .pptx, .xls, .xlsx) require **LibreOffice** installation\n\u003e - Download from [LibreOffice official website](https://www.libreoffice.org/download/download/)\n\u003e - **Windows**: Download installer from official website\n\u003e - **macOS**: `brew install --cask libreoffice`\n\u003e - **Ubuntu/Debian**: `sudo apt-get install libreoffice`\n\u003e - **CentOS/RHEL**: `sudo yum install libreoffice`\n\n**Check MinerU installation:**\n\n```bash\n# Verify installation\nmineru --version\n\n# Check if properly configured\npython -c \"from raganything import RAGAnything; rag = RAGAnything(); print('✅ MinerU installed properly' if rag.check_parser_installation() else '❌ MinerU installation issue')\"\n```\n\nModels are downloaded automatically on first use. For manual download, refer to [MinerU Model Source Configuration](https://github.com/opendatalab/MinerU/blob/master/README.md#22-model-source-configuration).\n\n### Usage Examples\n\n#### 1. End-to-End Document Processing\n\n```python\nimport asyncio\nfrom raganything import RAGAnything, RAGAnythingConfig\nfrom lightrag.llm.openai import openai_complete_if_cache, openai_embed\nfrom lightrag.utils import EmbeddingFunc\n\nasync def main():\n    # Set up API configuration\n    api_key = \"your-api-key\"\n    base_url = \"your-base-url\"  # Optional\n\n    # Create RAGAnything configuration\n    config = RAGAnythingConfig(\n        working_dir=\"./rag_storage\",\n        parser=\"mineru\",  # Parser selection: mineru or docling\n        parse_method=\"auto\",  # Parse method: auto, ocr, or txt\n        enable_image_processing=True,\n        enable_table_processing=True,\n        enable_equation_processing=True,\n    )\n\n    # Define LLM model function\n    def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):\n        return openai_complete_if_cache(\n            \"gpt-4o-mini\",\n            prompt,\n            system_prompt=system_prompt,\n            history_messages=history_messages,\n            api_key=api_key,\n            base_url=base_url,\n            **kwargs,\n        )\n\n    # Define vision model function for image processing\n    def vision_model_func(\n        prompt, system_prompt=None, history_messages=[], image_data=None, messages=None, **kwargs\n    ):\n        # If messages format is provided (for multimodal VLM enhanced query), use it directly\n        if messages:\n            return openai_complete_if_cache(\n                \"gpt-4o\",\n                \"\",\n                system_prompt=None,\n                history_messages=[],\n                messages=messages,\n                api_key=api_key,\n                base_url=base_url,\n                **kwargs,\n            )\n        # Traditional single image format\n        elif image_data:\n            return openai_complete_if_cache(\n                \"gpt-4o\",\n                \"\",\n                system_prompt=None,\n                history_messages=[],\n                messages=[\n                    {\"role\": \"system\", \"content\": system_prompt}\n                    if system_prompt\n                    else None,\n                    {\n                        \"role\": \"user\",\n                        \"content\": [\n                            {\"type\": \"text\", \"text\": prompt},\n                            {\n                                \"type\": \"image_url\",\n                                \"image_url\": {\n                                    \"url\": f\"data:image/jpeg;base64,{image_data}\"\n                                },\n                            },\n                        ],\n                    }\n                    if image_data\n                    else {\"role\": \"user\", \"content\": prompt},\n                ],\n                api_key=api_key,\n                base_url=base_url,\n                **kwargs,\n            )\n        # Pure text format\n        else:\n            return llm_model_func(prompt, system_prompt, history_messages, **kwargs)\n\n    # Define embedding function\n    embedding_func = EmbeddingFunc(\n        embedding_dim=3072,\n        max_token_size=8192,\n        func=lambda texts: openai_embed(\n            texts,\n            model=\"text-embedding-3-large\",\n            api_key=api_key,\n            base_url=base_url,\n        ),\n    )\n\n    # Initialize RAGAnything\n    rag = RAGAnything(\n        config=config,\n        llm_model_func=llm_model_func,\n        vision_model_func=vision_model_func,\n        embedding_func=embedding_func,\n    )\n\n    # Process a document\n    await rag.process_document_complete(\n        file_path=\"path/to/your/document.pdf\",\n        output_dir=\"./output\",\n        parse_method=\"auto\"\n    )\n\n    # Query the processed content\n    # Pure text query - for basic knowledge base search\n    text_result = await rag.aquery(\n        \"What are the main findings shown in the figures and tables?\",\n        mode=\"hybrid\"\n    )\n    print(\"Text query result:\", text_result)\n\n    # Multimodal query with specific multimodal content\n    multimodal_result = await rag.aquery_with_multimodal(\n    \"Explain this formula and its relevance to the document content\",\n    multimodal_content=[{\n        \"type\": \"equation\",\n        \"latex\": \"P(d|q) = \\\\frac{P(q|d) \\\\cdot P(d)}{P(q)}\",\n        \"equation_caption\": \"Document relevance probability\"\n    }],\n    mode=\"hybrid\"\n)\n    print(\"Multimodal query result:\", multimodal_result)\n\nif __name__ == \"__main__\":\n    asyncio.run(main())\n```\n\n#### 2. Direct Multimodal Content Processing\n\n```python\nimport asyncio\nfrom lightrag import LightRAG\nfrom lightrag.llm.openai import openai_complete_if_cache, openai_embed\nfrom lightrag.utils import EmbeddingFunc\nfrom raganything.modalprocessors import ImageModalProcessor, TableModalProcessor\n\nasync def process_multimodal_content():\n    # Set up API configuration\n    api_key = \"your-api-key\"\n    base_url = \"your-base-url\"  # Optional\n\n    # Initialize LightRAG\n    rag = LightRAG(\n        working_dir=\"./rag_storage\",\n        llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(\n            \"gpt-4o-mini\",\n            prompt,\n            system_prompt=system_prompt,\n            history_messages=history_messages,\n            api_key=api_key,\n            base_url=base_url,\n            **kwargs,\n        ),\n        embedding_func=EmbeddingFunc(\n            embedding_dim=3072,\n            max_token_size=8192,\n            func=lambda texts: openai_embed(\n                texts,\n                model=\"text-embedding-3-large\",\n                api_key=api_key,\n                base_url=base_url,\n            ),\n        )\n    )\n    await rag.initialize_storages()\n\n    # Process an image\n    image_processor = ImageModalProcessor(\n        lightrag=rag,\n        modal_caption_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache(\n            \"gpt-4o\",\n            \"\",\n            system_prompt=None,\n            history_messages=[],\n            messages=[\n                {\"role\": \"system\", \"content\": system_prompt} if system_prompt else None,\n                {\"role\": \"user\", \"content\": [\n                    {\"type\": \"text\", \"text\": prompt},\n                    {\"type\": \"image_url\", \"image_url\": {\"url\": f\"data:image/jpeg;base64,{image_data}\"}}\n                ]} if image_data else {\"role\": \"user\", \"content\": prompt}\n            ],\n            api_key=api_key,\n            base_url=base_url,\n            **kwargs,\n        ) if image_data else openai_complete_if_cache(\n            \"gpt-4o-mini\",\n            prompt,\n            system_prompt=system_prompt,\n            history_messages=history_messages,\n            api_key=api_key,\n            base_url=base_url,\n            **kwargs,\n        )\n    )\n\n    image_content = {\n        \"img_path\": \"path/to/image.jpg\",\n        \"image_caption\": [\"Figure 1: Experimental results\"],\n        \"image_footnote\": [\"Data collected in 2024\"]\n    }\n\n    description, entity_info = await image_processor.process_multimodal_content(\n        modal_content=image_content,\n        content_type=\"image\",\n        file_path=\"research_paper.pdf\",\n        entity_name=\"Experimental Results Figure\"\n    )\n\n    # Process a table\n    table_processor = TableModalProcessor(\n        lightrag=rag,\n        modal_caption_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(\n            \"gpt-4o-mini\",\n            prompt,\n            system_prompt=system_prompt,\n            history_messages=history_messages,\n            api_key=api_key,\n            base_url=base_url,\n            **kwargs,\n        )\n    )\n\n    table_content = {\n        \"table_body\": \"\"\"\n        | Method | Accuracy | F1-Score |\n        |--------|----------|----------|\n        | RAGAnything | 95.2% | 0.94 |\n        | Baseline | 87.3% | 0.85 |\n        \"\"\",\n        \"table_caption\": [\"Performance Comparison\"],\n        \"table_footnote\": [\"Results on test dataset\"]\n    }\n\n    description, entity_info = await table_processor.process_multimodal_content(\n        modal_content=table_content,\n        content_type=\"table\",\n        file_path=\"research_paper.pdf\",\n        entity_name=\"Performance Results Table\"\n    )\n\nif __name__ == \"__main__\":\n    asyncio.run(process_multimodal_content())\n```\n\n#### 3. Batch Processing\n\n```python\n# Process multiple documents\nawait rag.process_folder_complete(\n    folder_path=\"./documents\",\n    output_dir=\"./output\",\n    file_extensions=[\".pdf\", \".docx\", \".pptx\"],\n    recursive=True,\n    max_workers=4\n)\n```\n\n#### 4. Custom Modal Processors\n\n```python\nfrom raganything.modalprocessors import GenericModalProcessor\n\nclass CustomModalProcessor(GenericModalProcessor):\n    async def process_multimodal_content(self, modal_content, content_type, file_path, entity_name):\n        # Your custom processing logic\n        enhanced_description = await self.analyze_custom_content(modal_content)\n        entity_info = self.create_custom_entity(enhanced_description, entity_name)\n        return await self._create_entity_and_chunk(enhanced_description, entity_info, file_path)\n```\n\n#### 5. Query Options\n\nRAG-Anything provides three types of query methods:\n\n**Pure Text Queries** - Direct knowledge base search using LightRAG:\n```python\n# Different query modes for text queries\ntext_result_hybrid = await rag.aquery(\"Your question\", mode=\"hybrid\")\ntext_result_local = await rag.aquery(\"Your question\", mode=\"local\")\ntext_result_global = await rag.aquery(\"Your question\", mode=\"global\")\ntext_result_naive = await rag.aquery(\"Your question\", mode=\"naive\")\n\n# Synchronous version\nsync_text_result = rag.query(\"Your question\", mode=\"hybrid\")\n```\n\n**VLM Enhanced Queries** - Automatically analyze images in retrieved context using VLM:\n```python\n# VLM enhanced query (automatically enabled when vision_model_func is provided)\nvlm_result = await rag.aquery(\n    \"Analyze the charts and figures in the document\",\n    mode=\"hybrid\"\n    # vlm_enhanced=True is automatically set when vision_model_func is available\n)\n\n# Manually control VLM enhancement\nvlm_enabled = await rag.aquery(\n    \"What do the images show in this document?\",\n    mode=\"hybrid\",\n    vlm_enhanced=True  # Force enable VLM enhancement\n)\n\nvlm_disabled = await rag.aquery(\n    \"What do the images show in this document?\",\n    mode=\"hybrid\",\n    vlm_enhanced=False  # Force disable VLM enhancement\n)\n\n# When documents contain images, VLM can see and analyze them directly\n# The system will automatically:\n# 1. Retrieve relevant context containing image paths\n# 2. Load and encode images as base64\n# 3. Send both text context and images to VLM for comprehensive analysis\n```\n\n**Multimodal Queries** - Enhanced queries with specific multimodal content analysis:\n```python\n# Query with table data\ntable_result = await rag.aquery_with_multimodal(\n    \"Compare these performance metrics with the document content\",\n    multimodal_content=[{\n        \"type\": \"table\",\n        \"table_data\": \"\"\"Method,Accuracy,Speed\n                        RAGAnything,95.2%,120ms\n                        Traditional,87.3%,180ms\"\"\",\n        \"table_caption\": \"Performance comparison\"\n    }],\n    mode=\"hybrid\"\n)\n\n# Query with equation content\nequation_result = await rag.aquery_with_multimodal(\n    \"Explain this formula and its relevance to the document content\",\n    multimodal_content=[{\n        \"type\": \"equation\",\n        \"latex\": \"P(d|q) = \\\\frac{P(q|d) \\\\cdot P(d)}{P(q)}\",\n        \"equation_caption\": \"Document relevance probability\"\n    }],\n    mode=\"hybrid\"\n)\n```\n\n#### 6. Loading Existing LightRAG Instance\n\n```python\nimport asyncio\nfrom raganything import RAGAnything, RAGAnythingConfig\nfrom lightrag import LightRAG\nfrom lightrag.llm.openai import openai_complete_if_cache, openai_embed\nfrom lightrag.kg.shared_storage import initialize_pipeline_status\nfrom lightrag.utils import EmbeddingFunc\nimport os\n\nasync def load_existing_lightrag():\n    # Set up API configuration\n    api_key = \"your-api-key\"\n    base_url = \"your-base-url\"  # Optional\n\n    # First, create or load existing LightRAG instance\n    lightrag_working_dir = \"./existing_lightrag_storage\"\n\n    # Check if previous LightRAG instance exists\n    if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):\n        print(\"✅ Found existing LightRAG instance, loading...\")\n    else:\n        print(\"❌ No existing LightRAG instance found, will create new one\")\n\n    # Create/load LightRAG instance with your configuration\n    lightrag_instance = LightRAG(\n        working_dir=lightrag_working_dir,\n        llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(\n            \"gpt-4o-mini\",\n            prompt,\n            system_prompt=system_prompt,\n            history_messages=history_messages,\n            api_key=api_key,\n            base_url=base_url,\n            **kwargs,\n        ),\n        embedding_func=EmbeddingFunc(\n            embedding_dim=3072,\n            max_token_size=8192,\n            func=lambda texts: openai_embed(\n                texts,\n                model=\"text-embedding-3-large\",\n                api_key=api_key,\n                base_url=base_url,\n            ),\n        )\n    )\n\n    # Initialize storage (this will load existing data if available)\n    await lightrag_instance.initialize_storages()\n    await initialize_pipeline_status()\n\n    # Define vision model function for image processing\n    def vision_model_func(\n        prompt, system_prompt=None, history_messages=[], image_data=None, messages=None, **kwargs\n    ):\n        # If messages format is provided (for multimodal VLM enhanced query), use it directly\n        if messages:\n            return openai_complete_if_cache(\n                \"gpt-4o\",\n                \"\",\n                system_prompt=None,\n                history_messages=[],\n                messages=messages,\n                api_key=api_key,\n                base_url=base_url,\n                **kwargs,\n            )\n        # Traditional single image format\n        elif image_data:\n            return openai_complete_if_cache(\n                \"gpt-4o\",\n                \"\",\n                system_prompt=None,\n                history_messages=[],\n                messages=[\n                    {\"role\": \"system\", \"content\": system_prompt}\n                    if system_prompt\n                    else None,\n                    {\n                        \"role\": \"user\",\n                        \"content\": [\n                            {\"type\": \"text\", \"text\": prompt},\n                            {\n                                \"type\": \"image_url\",\n                                \"image_url\": {\n                                    \"url\": f\"data:image/jpeg;base64,{image_data}\"\n                                },\n                            },\n                        ],\n                    }\n                    if image_data\n                    else {\"role\": \"user\", \"content\": prompt},\n                ],\n                api_key=api_key,\n                base_url=base_url,\n                **kwargs,\n            )\n        # Pure text format\n        else:\n            return lightrag_instance.llm_model_func(prompt, system_prompt, history_messages, **kwargs)\n\n    # Now use existing LightRAG instance to initialize RAGAnything\n    rag = RAGAnything(\n        lightrag=lightrag_instance,  # Pass existing LightRAG instance\n        vision_model_func=vision_model_func,\n        # Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance\n    )\n\n    # Query existing knowledge base\n    result = await rag.aquery(\n        \"What data has been processed in this LightRAG instance?\",\n        mode=\"hybrid\"\n    )\n    print(\"Query result:\", result)\n\n    # Add new multimodal document to existing LightRAG instance\n    await rag.process_document_complete(\n        file_path=\"path/to/new/multimodal_document.pdf\",\n        output_dir=\"./output\"\n    )\n\nif __name__ == \"__main__\":\n    asyncio.run(load_existing_lightrag())\n```\n\n#### 7. Direct Content List Insertion\n\nFor scenarios where you already have a pre-parsed content list (e.g., from external parsers or previous processing), you can directly insert it into RAGAnything without document parsing:\n\n```python\nimport asyncio\nfrom raganything import RAGAnything, RAGAnythingConfig\nfrom lightrag.llm.openai import openai_complete_if_cache, openai_embed\nfrom lightrag.utils import EmbeddingFunc\n\nasync def insert_content_list_example():\n    # Set up API configuration\n    api_key = \"your-api-key\"\n    base_url = \"your-base-url\"  # Optional\n\n    # Create RAGAnything configuration\n    config = RAGAnythingConfig(\n        working_dir=\"./rag_storage\",\n        enable_image_processing=True,\n        enable_table_processing=True,\n        enable_equation_processing=True,\n    )\n\n    # Define model functions\n    def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):\n        return openai_complete_if_cache(\n            \"gpt-4o-mini\",\n            prompt,\n            system_prompt=system_prompt,\n            history_messages=history_messages,\n            api_key=api_key,\n            base_url=base_url,\n            **kwargs,\n        )\n\n    def vision_model_func(prompt, system_prompt=None, history_messages=[], image_data=None, messages=None, **kwargs):\n        # If messages format is provided (for multimodal VLM enhanced query), use it directly\n        if messages:\n            return openai_complete_if_cache(\n                \"gpt-4o\",\n                \"\",\n                system_prompt=None,\n                history_messages=[],\n                messages=messages,\n                api_key=api_key,\n                base_url=base_url,\n                **kwargs,\n            )\n        # Traditional single image format\n        elif image_data:\n            return openai_complete_if_cache(\n                \"gpt-4o\",\n                \"\",\n                system_prompt=None,\n                history_messages=[],\n                messages=[\n                    {\"role\": \"system\", \"content\": system_prompt} if system_prompt else None,\n                    {\n                        \"role\": \"user\",\n                        \"content\": [\n                            {\"type\": \"text\", \"text\": prompt},\n                            {\"type\": \"image_url\", \"image_url\": {\"url\": f\"data:image/jpeg;base64,{image_data}\"}}\n                        ],\n                    } if image_data else {\"role\": \"user\", \"content\": prompt},\n                ],\n                api_key=api_key,\n                base_url=base_url,\n                **kwargs,\n            )\n        # Pure text format\n        else:\n            return llm_model_func(prompt, system_prompt, history_messages, **kwargs)\n\n    embedding_func = EmbeddingFunc(\n        embedding_dim=3072,\n        max_token_size=8192,\n        func=lambda texts: openai_embed(\n            texts,\n            model=\"text-embedding-3-large\",\n            api_key=api_key,\n            base_url=base_url,\n        ),\n    )\n\n    # Initialize RAGAnything\n    rag = RAGAnything(\n        config=config,\n        llm_model_func=llm_model_func,\n        vision_model_func=vision_model_func,\n        embedding_func=embedding_func,\n    )\n\n    # Example: Pre-parsed content list from external source\n    content_list = [\n        {\n            \"type\": \"text\",\n            \"text\": \"This is the introduction section of our research paper.\",\n            \"page_idx\": 0  # Page number where this content appears\n        },\n        {\n            \"type\": \"image\",\n            \"img_path\": \"/absolute/path/to/figure1.jpg\",  # IMPORTANT: Use absolute path\n            \"image_caption\": [\"Figure 1: System Architecture\"],\n            \"image_footnote\": [\"Source: Authors' original design\"],\n            \"page_idx\": 1  # Page number where this image appears\n        },\n        {\n            \"type\": \"table\",\n            \"table_body\": \"| Method | Accuracy | F1-Score |\\n|--------|----------|----------|\\n| Ours | 95.2% | 0.94 |\\n| Baseline | 87.3% | 0.85 |\",\n            \"table_caption\": [\"Table 1: Performance Comparison\"],\n            \"table_footnote\": [\"Results on test dataset\"],\n            \"page_idx\": 2  # Page number where this table appears\n        },\n        {\n            \"type\": \"equation\",\n            \"latex\": \"P(d|q) = \\\\frac{P(q|d) \\\\cdot P(d)}{P(q)}\",\n            \"text\": \"Document relevance probability formula\",\n            \"page_idx\": 3  # Page number where this equation appears\n        },\n        {\n            \"type\": \"text\",\n            \"text\": \"In conclusion, our method demonstrates superior performance across all metrics.\",\n            \"page_idx\": 4  # Page number where this content appears\n        }\n    ]\n\n    # Insert the content list directly\n    await rag.insert_content_list(\n        content_list=content_list,\n        file_path=\"research_paper.pdf\",  # Reference file name for citation\n        split_by_character=None,         # Optional text splitting\n        split_by_character_only=False,   # Optional text splitting mode\n        doc_id=None,                     # Optional custom document ID (will be auto-generated if not provided)\n        display_stats=True               # Show content statistics\n    )\n\n    # Query the inserted content\n    result = await rag.aquery(\n        \"What are the key findings and performance metrics mentioned in the research?\",\n        mode=\"hybrid\"\n    )\n    print(\"Query result:\", result)\n\n    # You can also insert multiple content lists with different document IDs\n    another_content_list = [\n        {\n            \"type\": \"text\",\n            \"text\": \"This is content from another document.\",\n            \"page_idx\": 0  # Page number where this content appears\n        },\n        {\n            \"type\": \"table\",\n            \"table_body\": \"| Feature | Value |\\n|---------|-------|\\n| Speed | Fast |\\n| Accuracy | High |\",\n            \"table_caption\": [\"Feature Comparison\"],\n            \"page_idx\": 1  # Page number where this table appears\n        }\n    ]\n\n    await rag.insert_content_list(\n        content_list=another_content_list,\n        file_path=\"another_document.pdf\",\n        doc_id=\"custom-doc-id-123\"  # Custom document ID\n    )\n\nif __name__ == \"__main__\":\n    asyncio.run(insert_content_list_example())\n```\n\n**Content List Format:**\n\nThe `content_list` should follow the standard format with each item being a dictionary containing:\n\n- **Text content**: `{\"type\": \"text\", \"text\": \"content text\", \"page_idx\": 0}`\n- **Image content**: `{\"type\": \"image\", \"img_path\": \"/absolute/path/to/image.jpg\", \"image_caption\": [\"caption\"], \"image_footnote\": [\"note\"], \"page_idx\": 1}`\n- **Table content**: `{\"type\": \"table\", \"table_body\": \"markdown table\", \"table_caption\": [\"caption\"], \"table_footnote\": [\"note\"], \"page_idx\": 2}`\n- **Equation content**: `{\"type\": \"equation\", \"latex\": \"LaTeX formula\", \"text\": \"description\", \"page_idx\": 3}`\n- **Generic content**: `{\"type\": \"custom_type\", \"content\": \"any content\", \"page_idx\": 4}`\n\n**Important Notes:**\n- **`img_path`**: Must be an absolute path to the image file (e.g., `/home/user/images/chart.jpg` or `C:\\Users\\user\\images\\chart.jpg`)\n- **`page_idx`**: Represents the page number where the content appears in the original document (0-based indexing)\n- **Content ordering**: Items are processed in the order they appear in the list\n\nThis method is particularly useful when:\n- You have content from external parsers (non-MinerU/Docling)\n- You want to process programmatically generated content\n- You need to insert content from multiple sources into a single knowledge base\n- You have cached parsing results that you want to reuse\n\n---\n\n## 🛠️ Examples\n\n*Practical Implementation Demos*\n\n\u003cdiv align=\"center\"\u003e\n  \u003cimg src=\"https://user-images.githubusercontent.com/74038190/212257455-13e3e01e-d6a6-45dc-bb92-3ab87b12dfc1.gif\" width=\"300\"\u003e\n\u003c/div\u003e\n\nThe `examples/` directory contains comprehensive usage examples:\n\n- **`raganything_example.py`**: End-to-end document processing with MinerU\n- **`modalprocessors_example.py`**: Direct multimodal content processing\n- **`office_document_test.py`**: Office document parsing test with MinerU (no API key required)\n- **`image_format_test.py`**: Image format parsing test with MinerU (no API key required)\n- **`text_format_test.py`**: Text format parsing test with MinerU (no API key required)\n\n**Run examples:**\n\n```bash\n# End-to-end processing with parser selection\npython examples/raganything_example.py path/to/document.pdf --api-key YOUR_API_KEY --parser mineru\n\n# Direct modal processing\npython examples/modalprocessors_example.py --api-key YOUR_API_KEY\n\n# Office document parsing test (MinerU only)\npython examples/office_document_test.py --file path/to/document.docx\n\n# Image format parsing test (MinerU only)\npython examples/image_format_test.py --file path/to/image.bmp\n\n# Text format parsing test (MinerU only)\npython examples/text_format_test.py --file path/to/document.md\n\n# Check LibreOffice installation\npython examples/office_document_test.py --check-libreoffice --file dummy\n\n# Check PIL/Pillow installation\npython examples/image_format_test.py --check-pillow --file dummy\n\n# Check ReportLab installation\npython examples/text_format_test.py --check-reportlab --file dummy\n```\n\n---\n\n## 🔧 Configuration\n\n*System Optimization Parameters*\n\n### Environment Variables\n\nCreate a `.env` file (refer to `.env.example`):\n\n```bash\nOPENAI_API_KEY=your_openai_api_key\nOPENAI_BASE_URL=your_base_url  # Optional\nOUTPUT_DIR=./output             # Default output directory for parsed documents\nPARSER=mineru                   # Parser selection: mineru or docling\nPARSE_METHOD=auto              # Parse method: auto, ocr, or txt\n```\n\n**Note:** For backward compatibility, legacy environment variable names are still supported:\n- `MINERU_PARSE_METHOD` is deprecated, please use `PARSE_METHOD`\n\n\u003e **Note**: API keys are only required for full RAG processing with LLM integration. The parsing test files (`office_document_test.py` and `image_format_test.py`) only test parser functionality and do not require API keys.\n\n### Parser Configuration\n\nRAGAnything now supports multiple parsers, each with specific advantages:\n\n#### MinerU Parser\n- Supports PDF, images, Office documents, and more formats\n- Powerful OCR and table extraction capabilities\n- GPU acceleration support\n\n#### Docling Parser\n- Optimized for Office documents and HTML files\n- Better document structure preservation\n- Native support for multiple Office formats\n\n### MinerU Configuration\n\n```bash\n# MinerU 2.0 uses command-line parameters instead of config files\n# Check available options:\nmineru --help\n\n# Common configurations:\nmineru -p input.pdf -o output_dir -m auto    # Automatic parsing mode\nmineru -p input.pdf -o output_dir -m ocr     # OCR-focused parsing\nmineru -p input.pdf -o output_dir -b pipeline --device cuda  # GPU acceleration\n```\n\nYou can also configure parsing through RAGAnything parameters:\n\n```python\n# Basic parsing configuration with parser selection\nawait rag.process_document_complete(\n    file_path=\"document.pdf\",\n    output_dir=\"./output/\",\n    parse_method=\"auto\",          # or \"ocr\", \"txt\"\n    parser=\"mineru\"               # Optional: \"mineru\" or \"docling\"\n)\n\n# Advanced parsing configuration with special parameters\nawait rag.process_document_complete(\n    file_path=\"document.pdf\",\n    output_dir=\"./output/\",\n    parse_method=\"auto\",          # Parsing method: \"auto\", \"ocr\", \"txt\"\n    parser=\"mineru\",              # Parser selection: \"mineru\" or \"docling\"\n\n    # MinerU special parameters - all supported kwargs:\n    lang=\"ch\",                   # Document language for OCR optimization (e.g., \"ch\", \"en\", \"ja\")\n    device=\"cuda:0\",             # Inference device: \"cpu\", \"cuda\", \"cuda:0\", \"npu\", \"mps\"\n    start_page=0,                # Starting page number (0-based, for PDF)\n    end_page=10,                 # Ending page number (0-based, for PDF)\n    formula=True,                # Enable formula parsing\n    table=True,                  # Enable table parsing\n    backend=\"pipeline\",          # Parsing backend: pipeline|hybrid-auto-engine|hybrid-http-client|vlm-auto-engine|vlm-http-client.\n    source=\"huggingface\",        # Model source: \"huggingface\", \"modelscope\", \"local\"\n    # vlm_url=\"http://127.0.0.1:3000\" # Service address when using backend=vlm-http-client\n\n    # Standard RAGAnything parameters\n    display_stats=True,          # Display content statistics\n    split_by_character=None,     # Optional character to split text by\n    doc_id=None                  # Optional document ID\n)\n```\n\n\u003e **Note**: MinerU 2.0 no longer uses the `magic-pdf.json` configuration file. All settings are now passed as command-line parameters or function arguments. RAG-Anything now supports multiple document parsers - you can choose between MinerU and Docling based on your needs.\n\n### Processing Requirements\n\nDifferent content types require specific optional dependencies:\n\n- **Office Documents** (.doc, .docx, .ppt, .pptx, .xls, .xlsx): Install [LibreOffice](https://www.libreoffice.org/download/download/)\n- **Extended Image Formats** (.bmp, .tiff, .gif, .webp): Install with `pip install raganything[image]`\n- **Text Files** (.txt, .md): Install with `pip install raganything[text]`\n\n\u003e **📋 Quick Install**: Use `pip install raganything[all]` to enable all format support (Python dependencies only - LibreOffice still needs separate installation)\n\n---\n\n## 🧪 Supported Content Types\n\n### Document Formats\n\n- **PDFs** - Research papers, reports, presentations\n- **Office Documents** - DOC, DOCX, PPT, PPTX, XLS, XLSX\n- **Images** - JPG, PNG, BMP, TIFF, GIF, WebP\n- **Text Files** - TXT, MD\n\n### Multimodal Elements\n\n- **Images** - Photographs, diagrams, charts, screenshots\n- **Tables** - Data tables, comparison charts, statistical summaries\n- **Equations** - Mathematical formulas in LaTeX format\n- **Generic Content** - Custom content types via extensible processors\n\n*For installation of format-specific dependencies, see the [Configuration](#-configuration) section.*\n\n---\n\n## 📖 Citation\n\n*Academic Reference*\n\n\u003cdiv align=\"center\"\u003e\n  \u003cdiv style=\"width: 60px; height: 60px; margin: 20px auto; position: relative;\"\u003e\n    \u003cdiv style=\"width: 100%; height: 100%; border: 2px solid #00d9ff; border-radius: 50%; position: relative;\"\u003e\n      \u003cdiv style=\"position: absolute; top: 50%; left: 50%; transform: translate(-50%, -50%); font-size: 24px; color: #00d9ff;\"\u003e📖\u003c/div\u003e\n    \u003c/div\u003e\n    \u003cdiv style=\"position: absolute; bottom: -5px; left: 50%; transform: translateX(-50%); width: 20px; height: 20px; background: white; border-right: 2px solid #00d9ff; border-bottom: 2px solid #00d9ff; transform: rotate(45deg);\"\u003e\u003c/div\u003e\n  \u003c/div\u003e\n\u003c/div\u003e\n\nIf you find RAG-Anything useful in your research, please cite our paper:\n\n```bibtex\n@misc{guo2025raganythingallinoneragframework,\n      title={RAG-Anything: All-in-One RAG Framework},\n      author={Zirui Guo and Xubin Ren and Lingrui Xu and Jiahao Zhang and Chao Huang},\n      year={2025},\n      eprint={2510.12323},\n      archivePrefix={arXiv},\n      primaryClass={cs.AI},\n      url={https://arxiv.org/abs/2510.12323},\n}\n```\n\n---\n\n## 🔗 Related Projects\n\n*Ecosystem \u0026 Extensions*\n\n\u003cdiv align=\"center\"\u003e\n  \u003ctable\u003e\n    \u003ctr\u003e\n      \u003ctd align=\"center\"\u003e\n        \u003ca href=\"https://github.com/HKUDS/LightRAG\"\u003e\n          \u003cdiv style=\"width: 100px; height: 100px; background: linear-gradient(135deg, rgba(0, 217, 255, 0.1) 0%, rgba(0, 217, 255, 0.05) 100%); border-radius: 15px; border: 1px solid rgba(0, 217, 255, 0.2); display: flex; align-items: center; justify-content: center; margin-bottom: 10px;\"\u003e\n            \u003cspan style=\"font-size: 32px;\"\u003e⚡\u003c/span\u003e\n          \u003c/div\u003e\n          \u003cb\u003eLightRAG\u003c/b\u003e\u003cbr\u003e\n          \u003csub\u003eSimple and Fast RAG\u003c/sub\u003e\n        \u003c/a\u003e\n      \u003c/td\u003e\n      \u003ctd align=\"center\"\u003e\n        \u003ca href=\"https://github.com/HKUDS/VideoRAG\"\u003e\n          \u003cdiv style=\"width: 100px; height: 100px; background: linear-gradient(135deg, rgba(0, 217, 255, 0.1) 0%, rgba(0, 217, 255, 0.05) 100%); border-radius: 15px; border: 1px solid rgba(0, 217, 255, 0.2); display: flex; align-items: center; justify-content: center; margin-bottom: 10px;\"\u003e\n            \u003cspan style=\"font-size: 32px;\"\u003e🎥\u003c/span\u003e\n          \u003c/div\u003e\n          \u003cb\u003eVideoRAG\u003c/b\u003e\u003cbr\u003e\n          \u003csub\u003eExtreme Long-Context Video RAG\u003c/sub\u003e\n        \u003c/a\u003e\n      \u003c/td\u003e\n      \u003ctd align=\"center\"\u003e\n        \u003ca href=\"https://github.com/HKUDS/MiniRAG\"\u003e\n          \u003cdiv style=\"width: 100px; height: 100px; background: linear-gradient(135deg, rgba(0, 217, 255, 0.1) 0%, rgba(0, 217, 255, 0.05) 100%); border-radius: 15px; border: 1px solid rgba(0, 217, 255, 0.2); display: flex; align-items: center; justify-content: center; margin-bottom: 10px;\"\u003e\n            \u003cspan style=\"font-size: 32px;\"\u003e✨\u003c/span\u003e\n          \u003c/div\u003e\n          \u003cb\u003eMiniRAG\u003c/b\u003e\u003cbr\u003e\n          \u003csub\u003eExtremely Simple RAG\u003c/sub\u003e\n        \u003c/a\u003e\n      \u003c/td\u003e\n    \u003c/tr\u003e\n  \u003c/table\u003e\n\u003c/div\u003e\n\n---\n\n## ⭐ Star History\n\n*Community Growth Trajectory*\n\n\u003cdiv align=\"center\"\u003e\n  \u003ca href=\"https://star-history.com/#HKUDS/RAG-Anything\u0026Date\"\u003e\n    \u003cpicture\u003e\n      \u003csource media=\"(prefers-color-scheme: dark)\" srcset=\"https://api.star-history.com/svg?repos=HKUDS/RAG-Anything\u0026type=Date\u0026theme=dark\" /\u003e\n      \u003csource media=\"(prefers-color-scheme: light)\" srcset=\"https://api.star-history.com/svg?repos=HKUDS/RAG-Anything\u0026type=Date\" /\u003e\n      \u003cimg alt=\"Star History Chart\" src=\"https://api.star-history.com/svg?repos=HKUDS/RAG-Anything\u0026type=Date\" style=\"border-radius: 15px; box-shadow: 0 0 30px rgba(0, 217, 255, 0.3);\" /\u003e\n    \u003c/picture\u003e\n  \u003c/a\u003e\n\u003c/div\u003e\n\n---\n\n## 🤝 Contribution\n\n*Join the Innovation*\n\n\u003cdiv align=\"center\"\u003e\n  We thank all our contributors for their valuable contributions.\n\u003c/div\u003e\n\n\u003cdiv align=\"center\"\u003e\n  \u003ca href=\"https://github.com/HKUDS/RAG-Anything/graphs/contributors\"\u003e\n    \u003cimg src=\"https://contrib.rocks/image?repo=HKUDS/RAG-Anything\" style=\"border-radius: 15px; box-shadow: 0 0 20px rgba(0, 217, 255, 0.3);\" /\u003e\n  \u003c/a\u003e\n\u003c/div\u003e\n\n---\n\n\u003cdiv align=\"center\" style=\"background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 15px; padding: 30px; margin: 30px 0;\"\u003e\n  \u003cdiv\u003e\n    \u003cimg src=\"https://user-images.githubusercontent.com/74038190/212284100-561aa473-3905-4a80-b561-0d28506553ee.gif\" width=\"500\"\u003e\n  \u003c/div\u003e\n  \u003cdiv style=\"margin-top: 20px;\"\u003e\n    \u003ca href=\"https://github.com/HKUDS/RAG-Anything\" style=\"text-decoration: none;\"\u003e\n      \u003cimg src=\"https://img.shields.io/badge/⭐%20Star%20us%20on%20GitHub-1a1a2e?style=for-the-badge\u0026logo=github\u0026logoColor=white\"\u003e\n    \u003c/a\u003e\n    \u003ca href=\"https://github.com/HKUDS/RAG-Anything/issues\" style=\"text-decoration: none;\"\u003e\n      \u003cimg src=\"https://img.shields.io/badge/🐛%20Report%20Issues-ff6b6b?style=for-the-badge\u0026logo=github\u0026logoColor=white\"\u003e\n    \u003c/a\u003e\n    \u003ca href=\"https://github.com/HKUDS/RAG-Anything/discussions\" style=\"text-decoration: none;\"\u003e\n      \u003cimg src=\"https://img.shields.io/badge/💬%20Discussions-4ecdc4?style=for-the-badge\u0026logo=github\u0026logoColor=white\"\u003e\n    \u003c/a\u003e\n  \u003c/div\u003e\n\u003c/div\u003e\n\n\u003cdiv align=\"center\"\u003e\n  \u003cdiv style=\"width: 100%; max-width: 600px; margin: 20px auto; padding: 20px; background: linear-gradient(135deg, rgba(0, 217, 255, 0.1) 0%, rgba(0, 217, 255, 0.05) 100%); border-radius: 15px; border: 1px solid rgba(0, 217, 255, 0.2);\"\u003e\n    \u003cdiv style=\"display: flex; justify-content: center; align-items: center; gap: 15px;\"\u003e\n      \u003cspan style=\"font-size: 24px;\"\u003e⭐\u003c/span\u003e\n      \u003cspan style=\"color: #00d9ff; font-size: 18px;\"\u003eThank you for visiting RAG-Anything!\u003c/span\u003e\n      \u003cspan style=\"font-size: 24px;\"\u003e⭐\u003c/span\u003e\n    \u003c/div\u003e\n    \u003cdiv style=\"margin-top: 10px; color: #00d9ff; font-size: 16px;\"\u003eBuilding the Future of Multimodal AI\u003c/div\u003e\n  \u003c/div\u003e\n\u003c/div\u003e\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fhkuds%2Frag-anything","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fhkuds%2Frag-anything","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fhkuds%2Frag-anything/lists"}