{"id":13394818,"url":"https://github.com/ArchiveBox/ArchiveBox","last_synced_at":"2025-03-13T20:31:44.451Z","repository":{"id":37337671,"uuid":"90356372","full_name":"ArchiveBox/ArchiveBox","owner":"ArchiveBox","description":"🗃 Open source self-hosted web archiving. Takes URLs/browser history/bookmarks/Pocket/Pinboard/etc., saves HTML, JS, PDFs, media, and more...","archived":false,"fork":false,"pushed_at":"2024-10-29T07:33:21.000Z","size":10861,"stargazers_count":21957,"open_issues_count":188,"forks_count":1165,"subscribers_count":173,"default_branch":"dev","last_synced_at":"2024-10-29T16:58:29.393Z","etag":null,"topics":["archivebox","backups","bookmark-archiver","browser-bookmarks","chromium","digipres","firefox","headless-browser","internet-archiving","pinboard","pocket","python","rss","self-hosted","singlefile","warc","wayback-machine","web-archiving","wget","youtube-dl"],"latest_commit_sha":null,"homepage":"https://archivebox.io","language":"Python","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"mit","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/ArchiveBox.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":".github/CONTRIBUTING.md","funding":".github/FUNDING.yml","license":"LICENSE","code_of_conduct":".github/CODE_OF_CONDUCT.md","threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":".github/SECURITY.md","support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null},"funding":{"github":["ArchiveBox","pirate"],"custom":["https://donate.archivebox.io","https://paypal.me/NicholasSweeting"]}},"created_at":"2017-05-05T08:50:14.000Z","updated_at":"2024-10-29T15:52:46.000Z","dependencies_parsed_at":"2023-12-06T15:29:54.708Z","dependency_job_id":"3bcee469-9c9e-462c-989d-271ad0128e3f","html_url":"https://github.com/ArchiveBox/ArchiveBox","commit_stats":{"total_commits":2772,"total_committers":108,"mean_commits":"25.666666666666668","dds":0.4415584415584416,"last_synced_commit":"f5e631dbfae4c1802abccfd5958b465c805b5afb"},"previous_names":["pirate/archivebox"],"tags_count":77,"template":false,"template_full_name":null,"repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/ArchiveBox%2FArchiveBox","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/ArchiveBox%2FArchiveBox/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/ArchiveBox%2FArchiveBox/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/ArchiveBox%2FArchiveBox/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/ArchiveBox","download_url":"https://codeload.github.com/ArchiveBox/ArchiveBox/tar.gz/refs/heads/dev","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":243478207,"owners_count":20297212,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["archivebox","backups","bookmark-archiver","browser-bookmarks","chromium","digipres","firefox","headless-browser","internet-archiving","pinboard","pocket","python","rss","self-hosted","singlefile","warc","wayback-machine","web-archiving","wget","youtube-dl"],"created_at":"2024-07-30T17:01:32.668Z","updated_at":"2025-03-13T20:31:44.438Z","avatar_url":"https://github.com/ArchiveBox.png","language":"Python","readme":"\u003cdiv align=\"center\" style=\"text-align: center; width: 100%\"\u003e\n\u003cimg src=\"https://archivebox.io/icon.png\" height=\"90px\"/\u003e\n\u003ch1\u003eArchiveBox\u003cbr/\u003e\u003csub\u003eOpen-source self-hosted web archiving.\u003c/sub\u003e\u003c/h1\u003e\n\n\u003cbr/\u003e\n\n▶️ \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart\"\u003eQuickstart\u003c/a\u003e | \u003ca href=\"https://demo.archivebox.io\"\u003eDemo\u003c/a\u003e | \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox\"\u003eGitHub\u003c/a\u003e | \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki\"\u003eDocumentation\u003c/a\u003e | \u003ca href=\"#background--motivation\"\u003eInfo \u0026 Motivation\u003c/a\u003e | \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community\"\u003eCommunity\u003c/a\u003e\n\n\u003cbr/\u003e\n\n\u003c!--\u003ca href=\"http://webchat.freenode.net?channels=ArchiveBox\u0026uio=d4\"\u003e\u003cimg src=\"https://img.shields.io/badge/Community_chat-IRC-%2328A745.svg\"/\u003e\u003c/a\u003e--\u003e\n\n\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/blob/dev/LICENSE\"\u003e\u003cimg src=\"https://img.shields.io/badge/Open_source-MIT-green.svg?logo=git\u0026logoColor=green\"/\u003e\u003c/a\u003e \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/commits/dev\"\u003e\u003cimg src=\"https://img.shields.io/github/last-commit/ArchiveBox/ArchiveBox.svg?logo=Sublime+Text\u0026logoColor=green\u0026label=Active\"/\u003e\u003c/a\u003e \u0026nbsp; \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox\"\u003e\u003cimg src=\"https://img.shields.io/github/stars/ArchiveBox/ArchiveBox.svg?logo=github\u0026label=Stars\u0026logoColor=blue\"/\u003e\u003c/a\u003e \u0026nbsp; \u003ca href=\"https://hub.docker.com/r/archivebox/archivebox\"\u003e\u003cimg src=\"https://img.shields.io/docker/pulls/archivebox/archivebox.svg?label=Docker+Pulls\"/\u003e\u003c/a\u003e \u003ca href=\"https://pypi.org/project/archivebox/\"\u003e\u003cimg src=\"https://img.shields.io/pypi/dm/archivebox?label=PyPI%20Installs\u0026color=%235f7dae\"/\u003e\u003c/a\u003e \u003ca href=\"https://chromewebstore.google.com/detail/archivebox-exporter/habonpimjphpdnmcfkaockjnffodikoj\"\u003e\u003cimg src=\"https://img.shields.io/chrome-web-store/users/habonpimjphpdnmcfkaockjnffodikoj?label=Chrome%20Store\u0026color=%231973e8\"/\u003e\u003c/a\u003e\n\n\u003c!--\u003cpre lang=\"bash\" align=\"left\"\u003e\u003ccode style=\"white-space: pre-line; text-align: left\" align=\"left\"\u003e\ncurl -fsSL 'https://get.archivebox.io' | bash    # (or see pip/brew/Docker instructions below)\n\u003c/code\u003e\u003c/pre\u003e--\u003e\n\n\u003c/div\u003e\n\u003chr/\u003e\n\u003cbr/\u003e\n\n**ArchiveBox is a self-hosted app that lets you preserve content from websites in a variety of formats.**\n\nWe aim to make your data immediately useful, and kept in formats that other programs can read directly. As output, we save standard HTML, PNG, PDF, TXT, JSON, WARC, SQLite, all guaranteed to be readable for decades to come. ArchiveBox also has a CLI, REST API, and webhooks so you can set up integrations with other services.\n\nWithout active preservation effort, everything on the internet eventually disappears or degrades.\n\n*ArchiveBox is an open source tool that lets organizations \u0026 individuals archive both public \u0026 private web content while retaining control over their data. It can be used to save copies of bookmarks, preserve evidence for legal cases, backup photos from FB/Insta/Flickr or media from YT/Soundcloud/etc., save research papers, and more...*\n\u003cbr/\u003e\n\n\u003e ➡️ Get ArchiveBox with `pip install archivebox` on [Linux](#quickstart)/[macOS](#quickstart), or via **[Docker](#quickstart)** ⭐️ on any OS.  \n\n*Once installed, you can interact with it through the: [Browser Extension](https://github.com/ArchiveBox/archivebox-browser-extension), [CLI](#usage), [self-hosted web interface](https://github.com/ArchiveBox/ArchiveBox/wiki/Publishing-Your-Archive), [Python API](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#python-shell-usage), or [filesystem](#static-archive-exporting).*\n\n\u003cbr/\u003e\n\u003chr/\u003e\n\u003cbr/\u003e\n\n📥 **You can feed ArchiveBox URLs one at a time, or schedule regular imports** from your bookmarks or history, social media feeds or RSS, link-saving services like Pocket/Pinboard, our [Browser Extension](https://github.com/ArchiveBox/archivebox-browser-extension), and more.  \n\u003csub\u003eSee \u003ca href=\"#input-formats\"\u003eInput Formats\u003c/a\u003e for a full list of supported input formats...\u003c/sub\u003e\n\n\u003cbr/\u003e\n\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/90f1ce3c-75bb-401d-88ed-6297694b76ae\" alt=\"snapshot detail page\" align=\"right\" width=\"190px\" style=\"float: right\"/\u003e\n\n**It saves snapshots of the URLs you feed it in several redundant formats.**  \nIt also detects any content featured *inside* pages \u0026 extracts it out into a folder:\n- 🌐 **HTML**/**Any websites** ➡️ `original HTML+CSS+JS`, `singlefile HTML`, `screenshot PNG`, `PDF`, `WARC`, `title`, `article text`, `favicon`, `headers`, ...\n- 🎥 **Social Media**/**News** ➡️ `post content TXT`, `comments`, `title`, `author`, `images`, ...\n- 🎬 **YouTube**/**SoundCloud**/etc. ➡️ `MP3/MP4`s, `subtitles`, `metadata`, `thumbnail`, ...\n- 💾 **Github**/**Gitlab**/etc. links ➡️ `clone of GIT source code`, `README`, `images`, ...\n- ✨ *and more, see [Output Formats](#output-formats) below...*\n\nYou can run ArchiveBox as a Docker web app to manage these snapshots, or continue accessing the same collection using the `pip`-installed CLI, Python API, and SQLite3 APIs. \nAll the ways of using it are equivalent, and provide matching features like adding tags, scheduling regular crawls, viewing logs, and more...\n\n\u003cbr/\u003e\n\u003chr/\u003e\n\n🛠️ ArchiveBox uses [standard tools](#dependencies) like Chrome, [`wget`](https://www.gnu.org/software/wget/), \u0026 [`yt-dlp`](https://github.com/yt-dlp/yt-dlp), and stores data in [ordinary files \u0026 folders](#archive-layout).  \n*(no complex proprietary formats, all data is readable without needing to run ArchiveBox)*\n\nThe goal is to sleep soundly knowing the part of the internet you care about will be automatically preserved in durable, easily accessible formats [for decades](#background--motivation) after it goes down.\n\n\n\u003chr/\u003e\n\u003cbr/\u003e\n\n\n**📦\u0026nbsp; Install ArchiveBox using your preferred method: `docker` / `pip` / `apt` / etc. ([see full Quickstart below](#quickstart)).**\n\n\n\u003cdetails\u003e\n\u0026nbsp; \u003csummary\u003e\u003ci\u003eExpand for quick copy-pastable install commands...\u003c/i\u003e \u0026nbsp; ⤵️\u003c/summary\u003e\n\u003cbr/\u003e\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003e# Option A: Get ArchiveBox with Docker Compose (recommended):\nmkdir -p ~/archivebox/data \u0026\u0026 cd ~/archivebox\ncurl -fsSL 'https://docker-compose.archivebox.io' \u003e docker-compose.yml   # edit options in this file as-needed\ndocker compose run archivebox init --setup\n# docker compose run archivebox add 'https://example.com'\n# docker compose run archivebox help\n# docker compose up\n\u003cbr/\u003e\n\u003cbr/\u003e\n# Option B: Or use it as a plain Docker container:\nmkdir -p ~/archivebox/data \u0026\u0026 cd ~/archivebox/data\ndocker run -it -v $PWD:/data archivebox/archivebox init --setup\n# docker run -it -v $PWD:/data archivebox/archivebox add 'https://example.com'\n# docker run -it -v $PWD:/data archivebox/archivebox help\n# docker run -it -v $PWD:/data -p 8000:8000 archivebox/archivebox\n\u003cbr/\u003e\n\u003cbr/\u003e\n# Option C: Or install it with your preferred pkg manager (see Quickstart below for apt, brew, and more)\npip install archivebox\nmkdir -p ~/archivebox/data \u0026\u0026 cd ~/archivebox/data\narchivebox init --setup\n# archivebox add 'https://example.com'\n# archivebox help\n# archivebox server 0.0.0.0:8000\n\u003cbr/\u003e\n\u003cbr/\u003e\n# Option D: Or use the optional auto setup script to install it\ncurl -fsSL 'https://get.archivebox.io' | bash\n\u003c/code\u003e\u003c/pre\u003e\n\u003cbr/\u003e\n\u003csub\u003eOpen \u003ca href=\"http://localhost:8000\"\u003e\u003ccode\u003ehttp://localhost:8000\u003c/code\u003e\u003c/a\u003e to see your server's Web UI ➡️\u003c/sub\u003e\n\u003c/details\u003e\n\u003cbr/\u003e\n\n\n\u003cdiv align=\"center\" style=\"text-align: center\"\u003e\n\u003cbr/\u003e\u003cbr/\u003e\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/5a7d95f2-6977-4de6-9f08-42851a1fe1d2\" height=\"70px\" alt=\"bookshelf graphic\"\u003e \u0026nbsp; \u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/b2765a33-0d1e-4019-a1db-920c7e00e20e\" height=\"75px\" alt=\"logo\" align=\"top\"/\u003e \u0026nbsp; \u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/5a7d95f2-6977-4de6-9f08-42851a1fe1d2\" height=\"70px\" alt=\"bookshelf graphic\"\u003e\n\u003cbr/\u003e\u003cbr/\u003e\n\u003csmall\u003e\u003ca href=\"https://demo.archivebox.io\"\u003eDemo\u003c/a\u003e | \u003ca href=\"#screenshots\"\u003eScreenshots\u003c/a\u003e | \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Usage\"\u003eUsage\u003c/a\u003e\u003c/small\u003e\n\u003cbr/\u003e\n\u003csub\u003e. . . . . . . . . . . . . . . . . . . . . . . . . . . .\u003c/sub\u003e\n\u003cbr/\u003e\u003cbr/\u003e\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/8d67382c-e0ce-4286-89f7-7915f09b930c\" width=\"22%\" alt=\"cli init screenshot\" align=\"top\"\u003e\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/dad2bc51-e7e5-484e-bb26-f956ed692d16\" width=\"22%\" alt=\"cli init screenshot\" align=\"top\"\u003e\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/e8e0b6f8-8fdf-4b7f-8124-c10d8699bdb2\" width=\"22%\" alt=\"server snapshot admin screenshot\" align=\"top\"\u003e\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/ace0954a-ddac-4520-9d18-1c77b1ec50b2\" width=\"28.6%\" alt=\"server snapshot details page screenshot\" align=\"top\"/\u003e\n\u003cbr/\u003e\u003cbr/\u003e\n\u003c/div\u003e\n\n## Key Features\n\n- [**Free \u0026 open source**](https://github.com/ArchiveBox/ArchiveBox/blob/dev/LICENSE), own your own data \u0026 maintain your privacy by self-hosting\n- [**Powerful CLI**](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#CLI-Usage) with [modular dependencies](#dependencies) and [support for Google Drive/NFS/SMB/S3/B2/etc.](https://github.com/ArchiveBox/ArchiveBox/wiki/Setting-Up-Storage)\n- [**Comprehensive documentation**](https://github.com/ArchiveBox/ArchiveBox/wiki), [active development](https://github.com/ArchiveBox/ArchiveBox/wiki/Roadmap), and [rich community](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community)\n- [**Extracts a wide variety of content out-of-the-box**](https://github.com/ArchiveBox/ArchiveBox/issues/51): [media (yt-dlp), articles (readability), code (git), etc.](#output-formats)\n- [**Supports scheduled/realtime importing**](https://github.com/ArchiveBox/ArchiveBox/wiki/Scheduled-Archiving) from [many types of sources](#input-formats)\n- [**Uses standard, durable, long-term formats**](#output-formats) like HTML, JSON, PDF, PNG, MP4, TXT, and WARC\n- [**Usable as a oneshot CLI**](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#CLI-Usage), [**self-hosted web UI**](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#UI-Usage), [Python API](https://docs.archivebox.io/en/dev/apidocs/archivebox/archivebox.html) (BETA), [REST API](https://github.com/ArchiveBox/ArchiveBox/issues/496) (ALPHA), or [desktop app](https://github.com/ArchiveBox/electron-archivebox)\n- [**Saves all pages to archive.org as well**](https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#save_archive_dot_org) by default for redundancy (can be [disabled](https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#stealth-mode) for local-only mode)\n- Advanced users: support for archiving [content requiring login/paywall/cookies](https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#chrome_user_data_dir) (see wiki security caveats!)\n- Planned: support for running [JS during archiving](https://github.com/ArchiveBox/ArchiveBox/issues/51) to adblock, [autoscroll](https://github.com/ArchiveBox/ArchiveBox/issues/80), [modal-hide](https://github.com/ArchiveBox/ArchiveBox/issues/175), [thread-expand](https://github.com/ArchiveBox/ArchiveBox/issues/345)\n\n\u003cbr/\u003e\n\n## 🤝 Professional Integration\n\nArchiveBox is free for everyone to self-host, but we also provide support, security review, and custom integrations to help NGOs, governments, and other organizations [run ArchiveBox professionally](https://zulip.archivebox.io/#narrow/stream/167-enterprise/topic/welcome/near/1191102):\n\n- **Journalists:**\n  `crawling during research`, `preserving cited pages`, `fact-checking \u0026 review`  \n- **Lawyers:**\n  `collecting \u0026 preserving evidence`, `detecting changes`, `tagging \u0026 review`  \n- **Researchers:**\n  `analyzing social media trends`, `getting LLM training data`, `crawling pipelines`\n- **Individuals:**\n  `saving bookmarks`, `preserving portfolio content`, `legacy / memoirs archival`\n- **Governments:**\n  `snapshoting public service sites`, `recordkeeping compliance`\n\n\u003e ***[Contact us](https://zulip.archivebox.io/#narrow/stream/167-enterprise/topic/welcome/near/1191102)** if your org wants help using ArchiveBox professionally.*  \n\u003e We offer: setup \u0026 support, CAPTCHA/ratelimit unblocking, SSO, audit logging/chain-of-custody, and more  \n\u003e *ArchiveBox is a 🏛️ 501(c)(3) [nonprofit FSP](https://hackclub.com/hcb/) and all our work supports open-source development.* \n\n\u003cbr/\u003e\n\n\u003cdiv align=\"center\" style=\"text-align: center\"\u003e\n\u003cbr/\u003e\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/0db52ea7-4a2c-441d-b47f-5553a5d8fe96\" width=\"49%\" alt=\"grass\"/\u003e\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/0db52ea7-4a2c-441d-b47f-5553a5d8fe96\" width=\"49%\" alt=\"grass\"/\u003e\n\u003c/div\u003e\n\n\u003ca name=\"install\"\u003e\u003c/a\u003e\n\n# Quickstart\n\n**🖥\u0026nbsp; [Supported OSs](https://github.com/ArchiveBox/ArchiveBox/wiki/Install#supported-systems):** Linux/BSD, macOS, Windows (Docker) \u0026nbsp; **👾\u0026nbsp; CPUs:** `amd64` (`x86_64`), `arm64`, `arm7` \u003csup\u003e(raspi\u003e=3)\u003c/sup\u003e\u003cbr/\u003e\n\n\u003cbr/\u003e\n\n#### ✳️\u0026nbsp; Easy Setup\n\n\u003cdetails\u003e\n\u003csummary\u003e\u003cb\u003e\u003cimg src=\"https://user-images.githubusercontent.com/511499/117447182-29758200-af0b-11eb-97bd-58723fee62ab.png\" alt=\"Docker\" height=\"28px\" align=\"top\"/\u003e \u003ccode\u003edocker-compose\u003c/code\u003e\u003c/b\u003e  (macOS/Linux/Windows) \u0026nbsp; \u003cb\u003e👈\u0026nbsp; recommended\u003c/b\u003e \u0026nbsp; \u003ci\u003e(click to expand)\u003c/i\u003e\u003c/summary\u003e\n\u003cbr/\u003e\n\u003ci\u003e👍 Docker Compose is recommended for the easiest install/update UX + best security + all \u003ca href=\"#dependencies\"\u003eextras\u003c/a\u003e out-of-the-box.\u003c/i\u003e\n\u003cbr/\u003e\u003cbr/\u003e\n\u003col\u003e\n\u003cli\u003eInstall \u003ca href=\"https://docs.docker.com/get-docker/\"\u003eDocker\u003c/a\u003e on your system (if not already installed).\u003c/li\u003e\n\u003cli\u003eDownload the \u003ca href=\"https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/dev/docker-compose.yml\" download\u003e\u003ccode\u003edocker-compose.yml\u003c/code\u003e\u003c/a\u003e file into a new empty directory (can be anywhere).\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003emkdir -p ~/archivebox/data \u0026\u0026 cd ~/archivebox\n# Read and edit docker-compose.yml options as-needed after downloading\ncurl -fsSL 'https://docker-compose.archivebox.io' \u003e docker-compose.yml\n\u003c/code\u003e\u003c/pre\u003e\u003c/li\u003e\n\u003cli\u003eRun the initial setup to create an admin user (or set ADMIN_USER/PASS in docker-compose.yml)\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003edocker compose run archivebox init --setup\n\u003c/code\u003e\u003c/pre\u003e\u003c/li\u003e\n\u003cli\u003eNext steps: Start the server then login to the Web UI \u003ca href=\"http://127.0.0.1:8000\"\u003ehttp://127.0.0.1:8000\u003c/a\u003e ⇢ Admin.\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003edocker compose up\n# completely optional, CLI can always be used without running a server\n# docker compose run [-T] archivebox [subcommand] [--help]\ndocker compose run archivebox add 'https://example.com'\ndocker compose run archivebox help\n\u003c/code\u003e\u003c/pre\u003e\n\u003ci\u003eFor more info, see \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Install#option-a-docker--docker-compose-setup-%EF%B8%8F\"\u003eInstall: Docker Compose\u003c/a\u003e in the Wiki. ➡️\u003c/i\u003e\n\u003c/li\u003e\n\u003c/ol\u003e\n\nSee \u003ca href=\"#%EF%B8%8F-cli-usage\"\u003ebelow\u003c/a\u003e for more usage examples using the CLI, Web UI, or \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#sql-shell-usage\"\u003efilesystem/SQL/Python\u003c/a\u003e to manage your archive.\n\u003cbr/\u003e\u003cbr/\u003e\n\u003c/details\u003e\n\n\u003cdetails\u003e\n\u003csummary\u003e\u003cb\u003e\u003cimg src=\"https://user-images.githubusercontent.com/511499/117447182-29758200-af0b-11eb-97bd-58723fee62ab.png\" alt=\"Docker\" height=\"28px\" align=\"top\"/\u003e \u003ccode\u003edocker run\u003c/code\u003e\u003c/b\u003e  (macOS/Linux/Windows)\u003c/summary\u003e\n\u003cbr/\u003e\n\u003col\u003e\n\u003cli\u003eInstall \u003ca href=\"https://docs.docker.com/get-docker/\"\u003eDocker\u003c/a\u003e on your system (if not already installed).\u003c/li\u003e\n\u003cli\u003eCreate a new empty directory and initialize your collection (can be anywhere).\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003emkdir -p ~/archivebox/data \u0026\u0026 cd ~/archivebox/data\ndocker run -v $PWD:/data -it archivebox/archivebox init --setup\n\u003c/code\u003e\u003c/pre\u003e\n\u003c/li\u003e\n\u003cli\u003eOptional: Start the server then login to the Web UI \u003ca href=\"http://127.0.0.1:8000\"\u003ehttp://127.0.0.1:8000\u003c/a\u003e ⇢ Admin.\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003edocker run -v $PWD:/data -p 8000:8000 archivebox/archivebox\n# completely optional, CLI can always be used without running a server\n# docker run -v $PWD:/data -it [subcommand] [--help]\ndocker run -v $PWD:/data -it archivebox/archivebox help\n\u003c/code\u003e\u003c/pre\u003e\n\u003ci\u003eFor more info, see \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Install#option-a-docker--docker-compose-setup-%EF%B8%8F\"\u003eInstall: Docker Compose\u003c/a\u003e in the Wiki. ➡️\u003c/i\u003e\n\u003c/li\u003e\n\u003c/ol\u003e\n\nSee \u003ca href=\"#%EF%B8%8F-cli-usage\"\u003ebelow\u003c/a\u003e for more usage examples using the CLI, Web UI, or filesystem/SQL/Python to manage your archive.\n\u003cbr/\u003e\u003cbr/\u003e\n\u003c/details\u003e\n\n\u003cdetails\u003e\n\u003csummary\u003e\u003cb\u003e\u003cimg src=\"https://user-images.githubusercontent.com/511499/117456282-08665e80-af16-11eb-91a1-8102eff54091.png\" alt=\"curl sh automatic setup script\" height=\"28px\" align=\"top\"/\u003e \u003ccode\u003ebash\u003c/code\u003e auto-setup script\u003c/b\u003e  (macOS/Linux)\u003c/summary\u003e\n\u003cbr/\u003e\n\u003col\u003e\n\u003cli\u003eInstall \u003ca href=\"https://docs.docker.com/get-docker/\"\u003eDocker\u003c/a\u003e on your system (optional, highly recommended but not required).\u003c/li\u003e\n\u003cli\u003eRun the automatic setup script.\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003ecurl -fsSL 'https://get.archivebox.io' | bash\u003c/code\u003e\u003c/pre\u003e\n\u003ci\u003eFor more info, see \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Install#option-b-automatic-setup-script\"\u003eInstall: Bare Metal\u003c/a\u003e in the Wiki. ➡️\u003c/i\u003e\n\u003c/li\u003e\n\u003c/ol\u003e\n\nSee \u003ca href=\"#%EF%B8%8F-cli-usage\"\u003ebelow\u003c/a\u003e for more usage examples using the CLI, Web UI, or filesystem/SQL/Python to manage your archive.\u003cbr/\u003e\nSee \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/blob/dev/bin/setup.sh\"\u003e\u003ccode\u003esetup.sh\u003c/code\u003e\u003c/a\u003e for the source code of the auto-install script.\u003cbr/\u003e\nSee \u003ca href=\"https://docs.sweeting.me/s/against-curl-sh\"\u003e\"Against curl | sh as an install method\"\u003c/a\u003e blog post for my thoughts on the shortcomings of this install method.\n\u003cbr/\u003e\u003cbr/\u003e\n\u003c/details\u003e\n\n\u003cbr/\u003e\n\n#### 🛠\u0026nbsp; Package Manager Setup\n\n\u003ca name=\"Manual-Setup\"\u003e\u003c/a\u003e\n\n\n\u003cdetails\u003e\n\u003csummary\u003e\u003cb\u003e\u003cimg src=\"https://user-images.githubusercontent.com/511499/117447613-ba4c5d80-af0b-11eb-8f89-1d98e31b6a79.png\" alt=\"Pip\" height=\"28px\" align=\"top\"/\u003e \u003ccode\u003epip\u003c/code\u003e\u003c/b\u003e (macOS/Linux/BSD)\u003c/summary\u003e\n\u003cbr/\u003e\n\u003col\u003e\n\n\u003cli\u003eInstall \u003ca href=\"https://realpython.com/installing-python/\"\u003ePython \u003e= v3.10\u003c/a\u003e and \u003ca href=\"https://nodejs.org/en/download/package-manager/\"\u003eNode \u003e= v18\u003c/a\u003e on your system (if not already installed).\u003c/li\u003e\n\u003cli\u003eInstall the ArchiveBox package using \u003ccode\u003epip3\u003c/code\u003e (or \u003ca href=\"https://docs.astral.sh/uv/guides/tools/#running-tools\"\u003e\u003ccode\u003euvx\u003c/code\u003e\u003c/a\u003e).\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003epip3 install --upgrade archivebox yt-dlp playwright\nplaywright install --with-deps chromium\narchivebox version\n# install any missing extras shown using apt/brew/pkg/etc. see Wiki for instructions\n#    python@3.10 node curl wget git ripgrep ...\n\u003c/code\u003e\u003c/pre\u003e\n\u003ci\u003eSee the \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Install\"\u003eInstall: Bare Metal\u003c/a\u003e Wiki for full install instructions for each OS...\u003c/i\u003e\n\u003c/li\u003e\n\u003cli\u003eCreate a new empty directory and initialize your collection (can be anywhere).\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003emkdir -p ~/archivebox/data \u0026\u0026 cd ~/archivebox/data   # for example\narchivebox init --setup   # instantialize a new collection\n# (--setup auto-installs and link JS dependencies: singlefile, readability, mercury, etc.)\n\u003c/code\u003e\u003c/pre\u003e\n\u003c/li\u003e\n\u003cli\u003eOptional: Start the server then login to the Web UI \u003ca href=\"http://127.0.0.1:8000\"\u003ehttp://127.0.0.1:8000\u003c/a\u003e ⇢ Admin.\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003earchivebox server 0.0.0.0:8000\n# completely optional, CLI can always be used without running a server\n# archivebox [subcommand] [--help]\narchivebox help\n\u003c/code\u003e\u003c/pre\u003e\n\u003c/li\u003e\n\u003c/ol\u003e\n\nSee \u003ca href=\"#%EF%B8%8F-cli-usage\"\u003ebelow\u003c/a\u003e for more usage examples using the CLI, Web UI, or filesystem/SQL/Python to manage your archive.\u003cbr/\u003e\n\u003cbr/\u003e\n\u003csub\u003eSee the \u003ca href=\"https://github.com/ArchiveBox/pip-archivebox\"\u003e\u003ccode\u003epip-archivebox\u003c/code\u003e\u003c/a\u003e repo for more details about this distribution.\u003c/sub\u003e\n\u003cbr/\u003e\u003cbr/\u003e\n\u003c/details\u003e\n\n\n\u003cdetails\u003e\n\u003csummary\u003e\u003cb\u003e\u003cimg src=\"https://user-images.githubusercontent.com/511499/117448075-49597580-af0c-11eb-91ba-f34fff10096b.png\" alt=\"aptitude\" height=\"28px\" align=\"top\"/\u003e \u003ccode\u003eapt\u003c/code\u003e\u003c/b\u003e (Ubuntu/Debian/etc.)\u003c/summary\u003e\n\u003cbr/\u003e\nSee the \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Install#option-c-bare-metal-setup\"\u003eInstall: Bare Metal\u003c/a\u003e Wiki for instructions. ➡️\n\u003c!--\u003col\u003e\n\u003cli\u003eAdd the ArchiveBox repository to your sources.\u003cbr/\u003e\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003eecho \"deb http://ppa.launchpad.net/archivebox/archivebox/ubuntu focal main\" | sudo tee /etc/apt/sources.list.d/archivebox.list\nsudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys C258F79DCC02E369\nsudo apt update\n\u003c/code\u003e\u003c/pre\u003e\n\u003c/li\u003e\n\u003cli\u003eInstall the ArchiveBox package using \u003ccode\u003eapt\u003c/code\u003e.\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003esudo apt install archivebox\n# update to newest version with pip (sometimes apt package is outdated)\npip install --upgrade --ignore-installed archivebox yt-dlp playwright\nplaywright install --with-deps chromium    # install chromium and its system dependencies\narchivebox version                         # make sure all dependencies are installed\n\u003c/code\u003e\u003c/pre\u003e\n\u003c/li\u003e\n\u003cli\u003eCreate a new empty directory and initialize your collection (can be anywhere).\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003emkdir -p ~/archivebox/data \u0026\u0026 cd ~/archivebox/data\narchivebox init --setup\n\u003c/code\u003e\u003c/pre\u003e\n\u003cbr/\u003e\n\u003c/li\u003e\n\u003cli\u003eOptional: Start the server then login to the Web UI \u003ca href=\"http://127.0.0.1:8000\"\u003ehttp://127.0.0.1:8000\u003c/a\u003e ⇢ Admin.\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003earchivebox server 0.0.0.0:8000\n# completely optional, CLI can always be used without running a server\n# archivebox [subcommand] [--help]\narchivebox help\n\u003c/code\u003e\u003c/pre\u003e\n\u003c/li\u003e\n\u003c/ol\u003e\nSee \u003ca href=\"#%EF%B8%8F-cli-usage\"\u003ebelow\u003c/a\u003e for more usage examples using the CLI, Web UI, or filesystem/SQL/Python to manage your archive.\u003cbr/\u003e\n\u003csub\u003eSee the \u003ca href=\"https://github.com/ArchiveBox/debian-archivebox\"\u003e\u003ccode\u003edebian-archivebox\u003c/code\u003e\u003c/a\u003e repo for more details about this distribution.\u003c/sub\u003e--\u003e\n\u003cbr/\u003e\u003cbr/\u003e\n\u003c/details\u003e\n\n\u003cdetails\u003e\n\u003csummary\u003e\u003cb\u003e\u003cimg src=\"https://user-images.githubusercontent.com/511499/117447803-f2ec3700-af0b-11eb-87d3-671d114f011d.png\" alt=\"homebrew\" height=\"28px\" align=\"top\"/\u003e \u003ccode\u003ebrew\u003c/code\u003e\u003c/b\u003e (macOS only)\u003c/summary\u003e\n\u003cbr/\u003e\n\u003col\u003e\n\u003cli\u003eInstall \u003ca href=\"https://brew.sh/#install\"\u003eHomebrew\u003c/a\u003e on your system (if not already installed).\u003c/li\u003e\n\u003cli\u003eInstall the ArchiveBox package using \u003ccode\u003ebrew\u003c/code\u003e.\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003ebrew tap archivebox/archivebox\nbrew install archivebox\n# update to newest version with pip (sometimes brew package is outdated)\npip install --upgrade --ignore-installed archivebox yt-dlp playwright\nplaywright install --with-deps chromium    # install chromium and its system dependencies\narchivebox version                         # make sure all dependencies are installed\n\u003c/code\u003e\u003c/pre\u003e\n\u003ci\u003eSee the \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Install#option-c-bare-metal-setup\"\u003eInstall: Bare Metal\u003c/a\u003e Wiki for more granular instructions for macOS... ➡️\u003c/i\u003e\n\u003c/li\u003e\n\u003cli\u003eCreate a new empty directory and initialize your collection (can be anywhere).\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003emkdir -p ~/archivebox/data \u0026\u0026 cd ~/archivebox/data\narchivebox init --setup\n\u003c/code\u003e\u003c/pre\u003e\n\u003c/li\u003e\n\u003cli\u003eOptional: Start the server then login to the Web UI \u003ca href=\"http://127.0.0.1:8000\"\u003ehttp://127.0.0.1:8000\u003c/a\u003e ⇢ Admin.\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003earchivebox server 0.0.0.0:8000\n# completely optional, CLI can always be used without running a server\n# archivebox [subcommand] [--help]\narchivebox help\n\u003c/code\u003e\u003c/pre\u003e\u003cbr/\u003e\n\u003c/li\u003e\n\u003c/ol\u003e\n\nSee \u003ca href=\"#%EF%B8%8F-cli-usage\"\u003ebelow\u003c/a\u003e for more usage examples using the CLI, Web UI, or filesystem/SQL/Python to manage your archive.\u003cbr/\u003e\n\u003csub\u003eSee the \u003ca href=\"https://github.com/ArchiveBox/homebrew-archivebox\"\u003e\u003ccode\u003ehomebrew-archivebox\u003c/code\u003e\u003c/a\u003e repo for more details about this distribution.\u003c/sub\u003e\n\u003cbr/\u003e\u003cbr/\u003e\n\u003c/details\u003e\n\n\u003cdetails\u003e\n\u003csummary\u003e\u003cimg src=\"https://user-images.githubusercontent.com/511499/118077361-f0616580-b381-11eb-973c-ee894a3349fb.png\" alt=\"Arch\" height=\"28px\" align=\"top\"/\u003e \u003ccode\u003epacman\u003c/code\u003e / \u003cimg src=\"https://user-images.githubusercontent.com/511499/118077946-29e6a080-b383-11eb-94f0-d4871da08c3f.png\" alt=\"FreeBSD\" height=\"28px\" align=\"top\"/\u003e \u003ccode\u003epkg\u003c/code\u003e / \u003cimg src=\"https://user-images.githubusercontent.com/511499/118077861-002d7980-b383-11eb-86a7-5936fad9190f.png\" alt=\"Nix\" height=\"28px\" align=\"top\"/\u003e \u003ccode\u003enix\u003c/code\u003e (Arch/FreeBSD/NixOS/more)\u003c/summary\u003e\n\u003cbr/\u003e\n\n\u003e *Warning: These are contributed by external volunteers and may lag behind the official `pip` channel.*\n\n\u003cul\u003e\n\u003cli\u003eArch: \u003ca href=\"https://aur.archlinux.org/packages/archivebox/\"\u003e\u003ccode\u003eyay -S archivebox\u003c/code\u003e\u003c/a\u003e (contributed by \u003ca href=\"https://github.com/imlonghao\"\u003e\u003ccode\u003e@imlonghao\u003c/code\u003e\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003eFreeBSD: \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox#%EF%B8%8F-easy-setup\"\u003e\u003ccode\u003ecurl -fsSL 'https://get.archivebox.io' | bash\u003c/code\u003e\u003c/a\u003e (uses \u003ccode\u003epkg\u003c/code\u003e + \u003ccode\u003epip3\u003c/code\u003e under-the-hood)\u003c/li\u003e\n\u003cli\u003eNix: \u003ca href=\"https://github.com/NixOS/nixpkgs/blob/master/pkgs/applications/misc/archivebox/default.nix\"\u003e\u003ccode\u003enix-env --install archivebox\u003c/code\u003e\u003c/a\u003e (contributed by \u003ca href=\"https://github.com/siraben\"\u003e\u003ccode\u003e@siraben\u003c/code\u003e\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003eGuix: \u003ca href=\"https://packages.guix.gnu.org/packages/archivebox/\"\u003e\u003ccode\u003eguix install archivebox\u003c/code\u003e\u003c/a\u003e (contributed by \u003ca href=\"https://github.com/rakino\"\u003e\u003ccode\u003e@rakino\u003c/code\u003e\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003eMore: \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/issues/new\"\u003e\u003ci\u003econtribute another distribution...!\u003c/i\u003e\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\nSee \u003ca href=\"#%EF%B8%8F-cli-usage\"\u003ebelow\u003c/a\u003e for usage examples using the CLI, Web UI, or filesystem/SQL/Python to manage your archive.\n\u003cbr/\u003e\u003cbr/\u003e\n\u003c/details\u003e\n\n\u003cbr/\u003e\n\n#### 🎗\u0026nbsp; Other Options\n\n\u003cdetails\u003e\n\u003csummary\u003e\u003cb\u003e\u003cimg src=\"https://user-images.githubusercontent.com/511499/117447182-29758200-af0b-11eb-97bd-58723fee62ab.png\" alt=\"Docker\" height=\"28px\" align=\"top\"/\u003e \u003ccode\u003edocker\u003c/code\u003e + \u003cimg src=\"https://user-images.githubusercontent.com/511499/117447263-4316c980-af0b-11eb-928d-eaf1292ac646.png\" alt=\"Electron\" height=\"28px\" align=\"top\"/\u003e \u003ccode\u003eelectron\u003c/code\u003e Desktop App\u003c/b\u003e (macOS/Linux/Windows)\u003c/summary\u003e\n\u003cbr/\u003e\n\u003col\u003e\n\u003cli\u003eInstall \u003ca href=\"https://docs.docker.com/get-docker/\"\u003eDocker\u003c/a\u003e on your system (if not already installed).\u003c/li\u003e\n\u003cli\u003eDownload a binary release for your OS or build the native app from source\u003cbr/\u003e\n\u003cul\u003e\n\u003cli\u003emacOS: \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/releases/download/v0.6.2/Electron-ArchiveBox-macOS-x64-0.6.2.app.zip\" download\u003e\u003ccode\u003eArchiveBox.app.zip\u003c/code\u003e\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003eLinux: \u003ccode\u003eArchiveBox.deb\u003c/code\u003e (alpha: \u003ca href=\"https://github.com/ArchiveBox/electron-archivebox#quickstart\"\u003ebuild manually\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003eWindows: \u003ccode\u003eArchiveBox.exe\u003c/code\u003e (beta: \u003ca href=\"https://github.com/ArchiveBox/electron-archivebox#quickstart\"\u003ebuild manually\u003c/a\u003e)\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003c/ol\u003e\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/575ef92f-bb3e-4a7c-a4ba-986c1fd76ecf\" width=\"320px\"\u003e\n\u003cbr/\u003e\n\u003ci\u003e✨ Alpha (contributors wanted!)\u003c/i\u003e: for more info, see the: \u003ca href=\"https://github.com/ArchiveBox/electron-archivebox\"\u003eElectron ArchiveBox\u003c/a\u003e repo.\n\u003cbr/\u003e\n\u003c/details\u003e\n\n\u003cdetails\u003e\n\u003csummary\u003e\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/0c46e949-00fe-49c8-a613-ee14501c014c\" alt=\"Self-hosting Platforms\" height=\"28px\" align=\"top\"/\u003e\u003cb\u003e TrueNAS / UNRAID / YunoHost / Cloudron / etc.\u003c/b\u003e (self-hosting solutions)\u003c/summary\u003e\n\u003cbr/\u003e\n\n\u003e *Warning: These are contributed by external volunteers and may lag behind the official `pip` channel.*\n\n\u003cul\u003e\n\u003cli\u003e\u003cs\u003eTrueNAS: \u003ca href=\"https://truecharts.org/charts/stable/archivebox/\"\u003eOfficial ArchiveBox TrueChart\u003c/a\u003e / \u003ca href=\"https://dev.to/finloop/setting-up-archivebox-on-truenas-scale-1788\"\u003eCustom App Guide\u003c/a\u003e\u003c/s\u003e (\u003ca href=\"https://truecharts.org/news/scale-deprecation/\"\u003eTrueCharts is discontinued\u003c/a\u003e, wait for \u003ca href=\"https://forums.truenas.com/t/the-future-of-electric-eel-and-apps/5409/\"\u003eElectric Eel\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://unraid.net/community/apps?q=archivebox#r\"\u003eUnRaid\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/YunoHost-Apps/archivebox_ynh\"\u003eYunohost\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://www.cloudron.io/store/io.archivebox.cloudronapp.html\"\u003eCloudron\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://docs.saltbox.dev/sandbox/apps/archivebox/\"\u003eSaltbox\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://portainer-templates.as93.net/archivebox\"\u003ePortainer\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/pull/922/files#diff-00f0606e18b2618c3cc1667ca7c2b703b537af690ca71eba1330633587dcb1ee\"\u003eAppImage\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://runtipi.io/docs/apps-available#:~:text=for%20AI%20Chats.-,ArchiveBox,Open%20source%20self%2Dhosted%20web%20archiving.,-Atuin%20Server\"\u003eRuntipi\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/issues/986\"\u003eUmbrel\u003c/a\u003e (need contributors...)\u003c/li\u003e\n\n\u003cli\u003eMore: \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/issues/new\"\u003e\u003ci\u003econtribute another distribution...!\u003c/i\u003e\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\nSee \u003ca href=\"#%EF%B8%8F-cli-usage\"\u003ebelow\u003c/a\u003e for usage examples using the CLI, Web UI, or filesystem/SQL/Python to manage your archive.\n\u003cbr/\u003e\u003cbr/\u003e\n\u003c/details\u003e\n\n\u003cdetails\u003e\n\u003csummary\u003e\u003cimg src=\"https://user-images.githubusercontent.com/511499/117448723-1663b180-af0d-11eb-837f-d43959227810.png\" alt=\"paid\" height=\"27px\" align=\"top\"/\u003e Paid hosting solutions (cloud VPS)\u003c/summary\u003e\n\u003cbr/\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"https://zulip.archivebox.io/#narrow/stream/167-enterprise/topic/welcome/near/1191102\"\u003e\n \u003cimg src=\"https://img.shields.io/badge/Custom_Development-ArchiveBox.io-%231a1a1a.svg?style=flat\" height=\"22px\"/\u003e\n\u003c/a\u003e (\u003ca href=\"https://zulip.archivebox.io/#narrow/stream/167-enterprise/topic/welcome/near/1191102\"\u003eget hosting, support, and feature customization directy from us\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://monadical.com\"\u003e\n \u003cimg src=\"https://img.shields.io/badge/General_Dev_Consulting-Monadical.com-%231a1a1a.svg?style=flat\" height=\"22px\"/\u003e\n\u003c/a\u003e (\u003ca href=\"https://monadical.com/contact-us.html\"\u003egeneralist consultancy that has ArchiveBox experience\u003c/a\u003e)\u003c/li\u003e\n\u003cbr/\u003e\nOther providers of paid ArchiveBox hosting (not officially endorsed):\u003cbr/\u003e\n\u003cbr/\u003e\u003cbr/\u003e\n\u003cli\u003e\u003ca href=\"https://elest.io/open-source/archivebox\"\u003e\u003cimg src=\"https://img.shields.io/badge/Managed_Hosting-Elest.io-%23193f7e.svg?style=flat\" height=\"22px\"/\u003e\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://www.stellarhosted.com/archivebox/\"\u003e\u003cimg src=\"https://img.shields.io/badge/Semi_Managed_Hosting-StellarHosted.com-%23193f7e.svg?style=flat\" height=\"22px\"/\u003e\u003c/a\u003e (USD $29-250/mo, \u003ca href=\"https://www.stellarhosted.com/archivebox/#pricing\"\u003epricing\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://www.pikapods.com/pods?run=archivebox\"\u003e\u003cimg src=\"https://img.shields.io/badge/Semi_Managed_Hosting-PikaPods.com-%2343a047.svg?style=flat\" height=\"22px\"/\u003e\u003c/a\u003e (from USD $2.6/mo)\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://m.do.co/c/cbc4c0c17840\"\u003e\n \u003cimg src=\"https://img.shields.io/badge/Unmanaged_VPS-DigitalOcean.com-%232f7cf7.svg?style=flat\" height=\"22px\"/\u003e\n\u003c/a\u003e (USD $5-50+/mo, \u003ca href=\"https://m.do.co/c/cbc4c0c17840\"\u003e🎗\u0026nbsp; referral link\u003c/a\u003e, \u003ca href=\"https://www.digitalocean.com/community/tutorials/how-to-install-and-use-docker-compose-on-ubuntu-20-04\"\u003einstructions\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://www.vultr.com/?ref=7130289\"\u003e\n \u003cimg src=\"https://img.shields.io/badge/Unmanaged_VPS-Vultr.com-%232337a8.svg?style=flat\" height=\"22px\"/\u003e\n\u003c/a\u003e (USD $2.5-50+/mo, \u003ca href=\"https://www.vultr.com/?ref=7130289\"\u003e🎗\u0026nbsp; referral link\u003c/a\u003e, \u003ca href=\"https://www.vultr.com/docs/install-docker-compose-on-ubuntu-20-04\"\u003einstructions\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://fly.io/\"\u003e\n \u003cimg src=\"https://img.shields.io/badge/Unmanaged_App-Fly.io-%239a2de6.svg?style=flat\" height=\"22px\"/\u003e\n\u003c/a\u003e (USD $10-50+/mo, \u003ca href=\"https://fly.io/docs/hands-on/start/\"\u003einstructions\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://railway.app/template/2Vvhmy\"\u003e\n \u003cimg src=\"https://img.shields.io/badge/Unmanaged_App-Railway-%23A11BE6.svg?style=flat\" height=\"22px\"/\u003e\n\u003c/a\u003e (USD $0-5+/mo)\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://aws.amazon.com/marketplace/pp/Linnovate-Open-Source-Innovation-Support-For-Archi/B08RVW6MJ2\"\u003e\u003cimg src=\"https://img.shields.io/badge/Unmanaged_VPS-AWS-%23ee8135.svg?style=flat\" height=\"22px\"/\u003e\u003c/a\u003e (USD $60-200+/mo)\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/meanio.archivebox?ocid=gtmrewards_whatsnewblog_archivebox_vol118\"\u003e\u003cimg src=\"https://img.shields.io/badge/Unmanaged_VPS-Azure-%237cb300.svg?style=flat\" height=\"22px\"/\u003e\u003c/a\u003e (USD $60-200+/mo)\u003c/li\u003e\n\u003cbr/\u003e\n\u003csub\u003e\u003ci\u003eReferral links marked 🎗 provide $5-10 of free credit for new users and help pay for our \u003ca href=\"https://demo.archivebox.io\"\u003edemo server\u003c/a\u003e hosting costs.\u003c/i\u003e\u003c/sub\u003e\n\u003c/ul\u003e\n\nFor more discussion on managed and paid hosting options see here: \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/issues/531\"\u003eIssue #531\u003c/a\u003e.\n\n\u003c/details\u003e\n\n\u003cbr/\u003e\n\n#### ➡️\u0026nbsp; Next Steps\n\n- Import URLs from some of the supported [Input Formats](#input-formats) or view the supported [Output Formats](#output-formats)...\n- Tweak your UI or archiving behavior [Configuration](#configuration), read about some of the [Caveats](#caveats), or [Troubleshoot](https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting)\n- Read about the [Dependencies](#dependencies) used for archiving, the [Upgrading Process](https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives), or the [Archive Layout](#archive-layout) on disk...\n- Or check out our full [Documentation](#documentation) or [Community Wiki](#internet-archiving-ecosystem)...\n\n\u003cbr/\u003e\n\n### Usage\n\n#### ⚡️\u0026nbsp; \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#cli-usage\"\u003eCLI Usage\u003c/a\u003e\n\nArchiveBox commands can be run in a terminal [directly on your host](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#cli-usage), or via [Docker](https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#usage-1)/[Docker Compose](https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#usage).  \n\u003csup\u003e(depending on how you chose to install it above)\u003c/sup\u003e\n\n```bash\nmkdir -p ~/archivebox/data   # create a new data dir anywhere\ncd ~/archivebox/data         # IMPORTANT: cd into the directory\n\n# archivebox [subcommand] [--help]\narchivebox version\narchivebox help\n\n# equivalent: docker compose run archivebox [subcommand] [--help]\ndocker compose run archivebox help\n\n# equivalent: docker run -it -v $PWD:/data archivebox/archivebox [subcommand] [--help]\ndocker run -it -v $PWD:/data archivebox/archivebox help\n```\n\n#### ArchiveBox Subcommands\n\n- `archivebox` `help`/`version` to see the list of available subcommands / currently installed version info\n- `archivebox` `setup`/`init`/`config`/`status`/`shell`/`manage` to administer your collection\n- `archivebox` `add`/`oneshot`/`schedule` to pull in fresh URLs from [bookmarks/history/RSS/etc.](#input-formats)\n- `archivebox` `list`/`update`/`remove` to manage existing Snapshots in your collection\n\n\u003cbr/\u003e\n\u003cdetails\u003e\n\u003csummary\u003e\u003cimg src=\"https://user-images.githubusercontent.com/511499/117456282-08665e80-af16-11eb-91a1-8102eff54091.png\" alt=\"curl sh automatic setup script\" height=\"22px\" align=\"top\"/\u003e \u003cb\u003eCLI Usage Examples: non-Docker\u003c/b\u003e\u003c/summary\u003e\n\u003cbr/\u003e\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003e\n# make sure you have pip-installed ArchiveBox and it's available in your $PATH first  \n\u003cbr/\u003e\n# archivebox [subcommand] [--help]\narchivebox init --setup      # safe to run init multiple times (also how you update versions)\narchivebox version           # get archivebox version info + check dependencies\narchivebox help              # get list of archivebox subcommands that can be run\narchivebox add --depth=1 'https://news.ycombinator.com'\n\u003c/code\u003e\u003c/pre\u003e\n\u003ci\u003eFor more info, see our \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#cli-usage\"\u003eUsage: CLI Usage\u003c/a\u003e wiki. ➡️\u003c/i\u003e\n\u003c/details\u003e\n\n\u003cbr/\u003e\n\n\u003cdetails\u003e\n\u003csummary\u003e\u003cimg src=\"https://user-images.githubusercontent.com/511499/117447182-29758200-af0b-11eb-97bd-58723fee62ab.png\" alt=\"Docker\" height=\"22px\" align=\"top\"/\u003e \u003cb\u003eCLI Usage Examples: Docker Compose\u003c/b\u003e\u003c/summary\u003e\n\u003cbr/\u003e\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003e\n# make sure you have `docker-compose.yml` from the Quickstart instructions first\n\u003cbr/\u003e\n# docker compose run archivebox [subcommand] [--help]\ndocker compose run archivebox init --setup\ndocker compose run archivebox version\ndocker compose run archivebox help\ndocker compose run archivebox add --depth=1 'https://news.ycombinator.com'\n# to start webserver: docker compose up\n\u003c/code\u003e\u003c/pre\u003e\n\u003ci\u003eFor more info, see our \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#usage\"\u003eUsage: Docker Compose CLI\u003c/a\u003e wiki. ➡️\u003c/i\u003e\n\u003c/details\u003e\n\n\u003cbr/\u003e\n\n\u003cdetails\u003e\n\u003csummary\u003e\u003cimg src=\"https://user-images.githubusercontent.com/511499/117447182-29758200-af0b-11eb-97bd-58723fee62ab.png\" alt=\"Docker\" height=\"22px\" align=\"top\"/\u003e \u003cb\u003eCLI Usage Examples: Docker\u003c/b\u003e\u003c/summary\u003e\n\u003cbr/\u003e\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003e\n# make sure you create and cd into in a new empty directory first  \n\u003cbr/\u003e\n# docker run -it -v $PWD:/data archivebox/archivebox [subcommand] [--help]\ndocker run -v $PWD:/data -it archivebox/archivebox init --setup\ndocker run -v $PWD:/data -it archivebox/archivebox version\ndocker run -v $PWD:/data -it archivebox/archivebox help\ndocker run -v $PWD:/data -it archivebox/archivebox add --depth=1 'https://news.ycombinator.com'\n# to start webserver: docker run -v $PWD:/data -it -p 8000:8000 archivebox/archivebox\n\u003c/code\u003e\u003c/pre\u003e\n\u003ci\u003eFor more info, see our \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#usage-1\"\u003eUsage: Docker CLI\u003c/a\u003e wiki. ➡️\u003c/i\u003e\n\u003c/details\u003e\n\n\u003cbr/\u003e\n\n\u003cdetails\u003e\n\u003csummary\u003e\u003cb\u003e🗄\u0026nbsp; SQL/Python/Filesystem Usage\u003c/b\u003e\u003c/summary\u003e\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003e\narchivebox shell           # explore the Python library API in a REPL\nsqlite3 ./index.sqlite3    # run SQL queries directly on your index\nls ./archive/*/index.html  # or inspect snapshot data directly on the filesystem\n\u003c/code\u003e\u003c/pre\u003e\n\u003ci\u003eFor more info, see our \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#python-shell-usage\"\u003ePython Shell\u003c/a\u003e, \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#sql-shell-usage\"\u003eSQL API\u003c/a\u003e, and \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox#archive-layout\"\u003eDisk Layout\u003c/a\u003e wikis. ➡️\u003c/i\u003e\n\u003c/details\u003e\n\n\n\u003cbr/\u003e\n\n\u003cdetails\u003e\n\u003csummary\u003e\u003cb\u003e🖥\u0026nbsp; Web UI \u0026 API Usage\u003c/b\u003e\u003c/summary\u003e\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003e\n# Start the server on bare metal (pip/apt/brew/etc):\narchivebox manage createsuperuser              # create a new admin user via CLI\narchivebox server 0.0.0.0:8000                 # start the server\n\u003cbr/\u003e\n# Or with Docker Compose:\nnano docker-compose.yml                        # setup initial ADMIN_USERNAME \u0026 ADMIN_PASSWORD\ndocker compose up                              # start the server\n\u003cbr/\u003e\n# Or with a Docker container:\ndocker run -v $PWD:/data -it archivebox/archivebox archivebox manage createsuperuser\ndocker run -v $PWD:/data -it -p 8000:8000 archivebox/archivebox\n\u003c/code\u003e\u003c/pre\u003e\n\n\u003csup\u003eOpen \u003ca href=\"http://localhost:8000\"\u003e\u003ccode\u003ehttp://localhost:8000\u003c/code\u003e\u003c/a\u003e to see your server's Web UI ➡️\u003c/sup\u003e\n\u003cbr/\u003e\u003cbr/\u003e\n\u003ci\u003eFor more info, see our \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#ui-usage\"\u003eUsage: Web UI\u003c/a\u003e wiki. ➡️\u003c/i\u003e\n\u003cbr/\u003e\u003cbr/\u003e\n\u003cb\u003eOptional: Change permissions to allow non-logged-in users\u003c/b\u003e\n\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003e\narchivebox config --set PUBLIC_ADD_VIEW=True   # allow guests to submit URLs \narchivebox config --set PUBLIC_SNAPSHOTS=True  # allow guests to see snapshot content\narchivebox config --set PUBLIC_INDEX=True      # allow guests to see list of all snapshots\n# or\ndocker compose run archivebox config --set ...\n\n# restart the server to apply any config changes\n\u003c/code\u003e\u003c/pre\u003e\n\u003c/details\u003e\n\n\u003cbr/\u003e\n\u003cbr/\u003e\n\n\u003e [!TIP]\n\u003e Whether in Docker or not, ArchiveBox commands work the same way, and can be used to access the same data on-disk.\n\u003e For example, you could run the Web UI in Docker Compose, and run one-off commands with `pip`-installed ArchiveBox.\n\n\u003cdetails\u003e\n\u003csummary\u003e\u003ci\u003eExpand to show comparison...\u003c/i\u003e\u003c/summary\u003e\u003cbr/\u003e\n\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003e\narchivebox add --depth=1 'https://example.com'                     # add a URL with pip-installed archivebox on the host\ndocker compose run archivebox add --depth=1 'https://example.com'                       # or w/ Docker Compose\ndocker run -it -v $PWD:/data archivebox/archivebox add --depth=1 'https://example.com'  # or w/ Docker, all equivalent\n\u003c/code\u003e\u003c/pre\u003e\n\n\u003ci\u003eFor more info, see our \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Docker\"\u003eDocker\u003c/a\u003e wiki. ➡️\u003c/i\u003e\n\n\u003c/details\u003e\n\n\n\u003cbr/\u003e\n\u003cdiv align=\"center\" style=\"text-align: center\"\u003e\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/65f82532-18dd-49c5-86f1-02b1f3100e1e\" width=\"49%\" alt=\"grass\"/\u003e\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/65f82532-18dd-49c5-86f1-02b1f3100e1e\" width=\"49%\" alt=\"grass\"/\u003e\n\u003c/div\u003e\n\u003cbr/\u003e\n\n\u003cdiv align=\"center\" style=\"text-align: center\"\u003e\n\u003csub\u003e. . . . . . . . . . . . . . . . . . . . . . . . . . . .\u003c/sub\u003e\n\u003cbr/\u003e\u003cbr/\u003e\n\u003ca href=\"https://demo.archivebox.io\"\u003eDEMO: \u003ccode\u003ehttps://demo.archivebox.io\u003c/code\u003e\u003c/a\u003e\u003cbr/\u003e\n\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Usage\"\u003eUsage\u003c/a\u003e | \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration\"\u003eConfiguration\u003c/a\u003e | \u003ca href=\"#Caveats\"\u003eCaveats\u003c/a\u003e\n\u003cbr/\u003e\n\u003c/div\u003e\n\n\u003cbr/\u003e\n\n---\n\n\u003cdiv align=\"center\" style=\"text-align: center\"\u003e\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/ac1f897a-8baa-4f8b-8ee8-7443611f258b\" width=\"96%\" alt=\"lego\"/\u003e\n\u003c/div\u003e\n\n\u003cbr/\u003e\n\n# Overview\n\n\u003ca name=\"input-formats\"\u003e\u003c/a\u003e\n\n##  Input Formats: How to pass URLs into ArchiveBox for saving\n\n\n- \u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/ff20d251-5347-4b85-ae9b-83037d0ac01e\" height=\"28px\"/\u003e \u003cb\u003eFrom the official \u003ca href=\"https://github.com/ArchiveBox/archivebox-extension\"\u003eArchiveBox Browser Extension\u003c/a\u003e\u003c/b\u003e  \n  \u003ci\u003eProvides realtime archiving of browsing history or selected pages from Chrome/Chromium/Firefox browsers.\u003c/i\u003e\n\n- \u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/64078483-21d7-4eb1-aa6e-9ad55afe45b8\" height=\"22px\"/\u003e From manual imports of URLs from RSS, JSON, CSV, TXT, SQL, HTML, Markdown, etc. files  \n  \u003ci\u003eArchiveBox supports injesting URLs in [any text-based format](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Import-a-list-of-URLs-from-a-text-file).\u003c/i\u003e\n\n- \u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/32b494e6-4de1-4984-8d88-dc02f18e5c34\" height=\"22px\"/\u003e From manually exported [browser history](https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart#2-get-your-list-of-urls-to-archive) or [browser bookmarks](https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart#2-get-your-list-of-urls-to-archive) (in Netscape format)  \n  \u003ci\u003eInstructions: \u003ca href=\"https://support.google.com/chrome/answer/96816?hl=en\"\u003eChrome\u003c/a\u003e, \u003ca href=\"https://support.mozilla.org/en-US/kb/export-firefox-bookmarks-to-backup-or-transfer\"\u003eFirefox\u003c/a\u003e, \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/24ad068e-0fa6-41f4-a7ff-4c26fc91f71a\"\u003eSafari\u003c/a\u003e, \u003ca href=\"https://support.microsoft.com/en-us/help/211089/how-to-import-and-export-the-internet-explorer-favorites-folder-to-a-32-bit-version-of-windows\"\u003eIE\u003c/a\u003e, \u003ca href=\"https://help.opera.com/en/latest/features/#bookmarks:~:text=Click%20the%20import/-,export%20button,-on%20the%20bottom\"\u003eOpera\u003c/a\u003e, \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart#2-get-your-list-of-urls-to-archive\"\u003eand more...\u003c/a\u003e\u003c/i\u003e\n\n- \u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/4f7bd318-265c-4235-ad25-38be89946b12\" height=\"22px\"/\u003e From URLs visited through a [MITM Proxy](https://mitmproxy.org/) with [`archivebox-proxy`](https://github.com/ArchiveBox/archivebox-proxy)  \n  \u003ci\u003eProvides [realtime archiving](https://github.com/ArchiveBox/ArchiveBox/issues/577) of all traffic from any device going through the proxy.\u003c/i\u003e\n\n- \u003cimg src=\"https://getpocket.com/favicon.ico\" height=\"22px\"/\u003e From bookmarking services or social media (e.g. Twitter bookmarks, Reddit saved posts, etc.)  \n  \u003ci\u003eInstructions: \u003ca href=\"https://getpocket.com/export\"\u003ePocket\u003c/a\u003e, \u003ca href=\"https://pinboard.in/export/\"\u003ePinboard\u003c/a\u003e, \u003ca href=\"https://www.instapaper.com/user\"\u003eInstapaper\u003c/a\u003e, \u003ca href=\"https://shaarli.readthedocs.io/en/master/Usage/#importexport\"\u003eShaarli\u003c/a\u003e, \u003ca href=\"https://www.groovypost.com/howto/howto/export-delicious-bookmarks-xml/\"\u003eDelicious\u003c/a\u003e, \u003ca href=\"https://github.com/csu/export-saved-reddit\"\u003eReddit Saved\u003c/a\u003e, \u003ca href=\"https://doc.wallabag.org/en/user/import/wallabagv2.html\"\u003eWallabag\u003c/a\u003e, \u003ca href=\"http://help.unmark.it/import-export\"\u003eUnmark.it\u003c/a\u003e, \u003ca href=\"https://www.addictivetips.com/web/onetab-save-close-all-chrome-tabs-to-restore-export-or-import/\"\u003eOneTab\u003c/a\u003e, \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/issues/648\"\u003eFirefox Sync\u003c/a\u003e, \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart#2-get-your-list-of-urls-to-archive\"\u003eand more...\u003c/a\u003e\u003c/i\u003e\n\n\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/e1e5bd78-b0b6-45dc-914c-e1046fee4bc4\" width=\"330px\" align=\"right\" style=\"float: right\"/\u003e\n\n\n```bash\n# archivebox add --help\narchivebox add 'https://example.com/some/page'\narchivebox add --parser=generic_rss \u003c ~/Downloads/some_feed.xml\narchivebox add --depth=1 'https://news.ycombinator.com#2020-12-12'\necho 'http://example.com' | archivebox add\necho 'any text with \u003ca href=\"https://example.com\"\u003eurls\u003c/a\u003e in it' | archivebox add\n\n# if using Docker, add -i when piping stdin:\n# echo 'https://example.com' | docker run -v $PWD:/data -i archivebox/archivebox add\n# if using Docker Compose, add -T when piping stdin / stdout:\n# echo 'https://example.com' | docker compose run -T archivebox add\n```\n\nSee the [Usage: CLI](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#CLI-Usage) page for documentation and examples.\n\nIt also includes a built-in scheduled import feature with `archivebox schedule` and browser bookmarklet, so you can pull in URLs from RSS feeds, websites, or the filesystem regularly/on-demand.\n\n\u003cbr/\u003e\n\n\n\u003ca name=\"output-formats\"\u003e\u003c/a\u003e\n\n## Output Formats: What ArchiveBox saves for each URL\n\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/ace0954a-ddac-4520-9d18-1c77b1ec50b2\" width=\"330px\" align=\"right\" style=\"float: right\"/\u003e\n\n\nFor each web page added, ArchiveBox creates a Snapshot folder and preserves its content as ordinary files inside the folder (e.g. HTML, PDF, PNG, JSON, etc.).\n\nIt uses all available methods out-of-the-box, but you can disable extractors and fine-tune the [configuration](https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration) as-needed.\n\n\u003cbr/\u003e\n\u003cdetails\u003e\n\u003csummary\u003e\u003ci\u003eExpand to see the full list of ways it saves each page...\u003c/i\u003e\u003c/summary\u003e\n\n\n\u003ccode\u003edata/archive/{Snapshot.id}/\u003c/code\u003e\u003cbr/\u003e\n\u003cul\u003e\n\u003cli\u003e\u003cstrong\u003eIndex:\u003c/strong\u003e \u003ccode\u003eindex.html\u003c/code\u003e \u0026amp; \u003ccode\u003eindex.json\u003c/code\u003e HTML and JSON index files containing metadata and details\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eTitle\u003c/strong\u003e, \u003cstrong\u003eFavicon\u003c/strong\u003e, \u003cstrong\u003eHeaders\u003c/strong\u003e Response headers, site favicon, and parsed site title\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eSingleFile:\u003c/strong\u003e \u003ccode\u003esinglefile.html\u003c/code\u003e HTML snapshot rendered with headless Chrome using SingleFile\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eWget Clone:\u003c/strong\u003e \u003ccode\u003eexample.com/page-name.html\u003c/code\u003e wget clone of the site with  \u003ccode\u003ewarc/TIMESTAMP.gz\u003c/code\u003e\u003c/li\u003e\n\u003cli\u003eChrome Headless \u003cul\u003e\n\u003cli\u003e\u003cstrong\u003ePDF:\u003c/strong\u003e \u003ccode\u003eoutput.pdf\u003c/code\u003e Printed PDF of site using headless chrome\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eScreenshot:\u003c/strong\u003e \u003ccode\u003escreenshot.png\u003c/code\u003e 1440x900 screenshot of site using headless chrome\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eDOM Dump:\u003c/strong\u003e \u003ccode\u003eoutput.html\u003c/code\u003e DOM Dump of the HTML after rendering using headless chrome\u003c/li\u003e\n\u003c/ul\u003e\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eArticle Text:\u003c/strong\u003e \u003ccode\u003earticle.html/json\u003c/code\u003e Article text extraction using Readability \u0026amp; Mercury\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eArchive.org Permalink:\u003c/strong\u003e \u003ccode\u003earchive.org.txt\u003c/code\u003e A link to the saved site on archive.org\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eAudio \u0026amp; Video:\u003c/strong\u003e \u003ccode\u003emedia/\u003c/code\u003e all audio/video files + playlists, including subtitles \u0026amp; metadata w/ \u003ccode\u003eyt-dlp\u003c/code\u003e\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eSource Code:\u003c/strong\u003e \u003ccode\u003egit/\u003c/code\u003e clone of any repository found on GitHub, Bitbucket, or GitLab links\u003c/li\u003e\n\u003cli\u003e\u003cem\u003eMore coming soon! See the \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Roadmap\"\u003eRoadmap\u003c/a\u003e...\u003c/em\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/details\u003e\n\u003cbr/\u003e\n\n## Configuration\n\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/ea672e6b-4df5-49d8-b550-7f450951fd27\" width=\"330px\" align=\"right\" style=\"float: right\"/\u003e\n\nArchiveBox can be configured via environment variables, by using the `archivebox config` CLI, or by editing `./ArchiveBox.conf`.\n\u003cbr/\u003e\n\u003cdetails\u003e\n\u003csummary\u003e\u003ci\u003eExpand to see examples...\u003c/i\u003e\u003c/summary\u003e\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003earchivebox config                               # view the entire config\narchivebox config --get CHROME_BINARY           # view a specific value\n\u003cbr/\u003e\narchivebox config --set CHROME_BINARY=chromium  # persist a config using CLI\n# OR\necho CHROME_BINARY=chromium \u003e\u003e ArchiveBox.conf  # persist a config using file\n# OR\nenv CHROME_BINARY=chromium archivebox ...       # run with a one-off config\n\u003c/code\u003e\u003c/pre\u003e\n\u003csub\u003eThese methods also work the same way when run inside Docker, see the \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#configuration\"\u003eDocker Configuration\u003c/a\u003e wiki page for details.\u003c/sub\u003e\n\u003c/details\u003e\u003cbr/\u003e\n\nThe configuration is documented here: **[Configuration Wiki](https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration)**, and loaded here: [`archivebox/config.py`](https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/config.py).\n\n\u003ca name=\"most-common-options-to-tweak\"\u003e\u003c/a\u003e\n\u003cdetails\u003e\n\u003csummary\u003e\u003ci\u003eExpand to see the most common options to tweak...\u003c/i\u003e\u003c/summary\u003e\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003e\n# e.g. archivebox config --set TIMEOUT=120\n# or   docker compose run archivebox config --set TIMEOUT=120\n\u003cbr/\u003e\nTIMEOUT=240                # default: 60    add more seconds on slower networks\nCHECK_SSL_VALIDITY=False   # default: True  False = allow saving URLs w/ bad SSL\nSAVE_ARCHIVE_DOT_ORG=False # default: True  False = disable Archive.org saving\nMAX_MEDIA_SIZE=1500m       # default: 750m  raise/lower youtubedl output size\n\u003cbr/\u003e\nPUBLIC_INDEX=True          # default: True  whether anon users can view index\nPUBLIC_SNAPSHOTS=True      # default: True  whether anon users can view pages\nPUBLIC_ADD_VIEW=False      # default: False whether anon users can add new URLs\n\u003cbr/\u003e\nCHROME_USER_AGENT=\"Mozilla/5.0 ...\"  # change these to get around bot blocking\nWGET_USER_AGENT=\"Mozilla/5.0 ...\"\nCURL_USER_AGENT=\"Mozilla/5.0 ...\"\n\u003c/code\u003e\u003c/pre\u003e\n\u003c/details\u003e\n\u003cbr/\u003e\n\n## Dependencies\n\nTo achieve high-fidelity archives in as many situations as possible, ArchiveBox depends on a variety of 3rd-party libraries and tools that specialize in extracting different types of content.\n\n\u003e Under-the-hood, ArchiveBox uses [Django](https://www.djangoproject.com/start/overview/) to power its [Web UI](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#ui-usage), [Django Ninja](https://django-ninja.dev/) for the REST API, and [SQlite](https://www.sqlite.org/locrsf.html) + the filesystem to provide [fast \u0026 durable metadata storage](https://www.sqlite.org/locrsf.html) w/ [deterministic upgrades](https://stackoverflow.com/a/39976321/2156113).\n\nArchiveBox bundles industry-standard tools like [Google Chrome](https://github.com/ArchiveBox/ArchiveBox/wiki/Chromium-Install), [`wget`, `yt-dlp`, `readability`, etc.](#dependencies) internally, and its operation can be [tuned, secured, and extended](https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration) as-needed for many different applications.\n\n\u003cbr/\u003e\n\u003cdetails\u003e\n\u003csummary\u003e\u003ci\u003eExpand to learn more about ArchiveBox's internals \u0026 dependencies...\u003c/i\u003e\u003c/summary\u003e\u003cbr/\u003e\n\n\u003cblockquote\u003e\n\u003cp\u003e\u003cem\u003eTIP: For better security while running ArchiveBox, and to avoid polluting your host system with a bunch of sub-dependencies that you need to keep up-to-date,\u003cstrong\u003eit is strongly recommended to use the \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Docker\"\u003e⭐️ official Docker image\u003c/a\u003e\u003c/strong\u003e which provides everything in an easy container with simple one-liner upgrades.\u003c/em\u003e\u003c/p\u003e\n\u003c/blockquote\u003e\n\n\u003cul\u003e\n\u003cli\u003eLanguage: Python \u003ccode\u003e\u0026gt;=3.10\u003c/code\u003e\u003c/li\u003e\n\u003cli\u003eBackend: \u003ca href=\"https://www.djangoproject.com/\"\u003eDjango\u003c/a\u003e + \u003ca href=\"https://django-ninja.dev/\"\u003eDjango-Ninja\u003c/a\u003e for REST API\u003c/li\u003e\n\u003cli\u003eFrontend: \u003ca href=\"https://docs.djangoproject.com/en/5.1/ref/contrib/admin/\"\u003eDjango Admin\u003c/a\u003e + Vanilla HTML, CSS, JS\u003c/li\u003e\n\u003cli\u003eWeb Server: \u003ca href=\"https://www.djangoproject.com/\"\u003eDjango\u003c/a\u003e + \u003ca href=\"https://channels.readthedocs.io/en/latest/\"\u003e\u003ccode\u003echannels\u003c/code\u003e\u003c/a\u003e + \u003ca href=\"https://github.com/django/daphne/\"\u003e\u003ccode\u003edaphne]\u003c/code\u003e\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003eDatabase: \u003ca href=\"https://docs.djangoproject.com/en/5.1/ref/databases/#sqlite-notes\"\u003eDjango ORM\u003c/a\u003e saving to \u003ca href=\"https://www.sqlite.org/mostdeployed.html\"\u003eSQLite3\u003c/a\u003e \u003ccode\u003e./data/index.sqlite\u003c/code\u003e\u003c/li\u003e\n\u003cli\u003eJob Queue: \u003ca href=\"https://huey.readthedocs.io/\"\u003eHuey\u003c/a\u003e using \u003ccode\u003e./data/queue.sqlite3\u003c/code\u003e under \u003ccode\u003esupervisord\u003c/code\u003e\u003c/li\u003e\n\u003cli\u003eBuild/test/lint: \u003ca href=\"https://github.com/pdm-project/pdm\"\u003e\u003ccode\u003epdm\u003c/code\u003e\u003c/a\u003e / \u003ccode\u003emypy\u003c/code\u003e+\u003ccode\u003epyright\u003c/code\u003e+\u003ccode\u003epytest\u003c/code\u003e / \u003ccode\u003eruff\u003c/code\u003e\u003c/li\u003e\n\u003cli\u003eSubdependencies: \u003ca href=\"https://github.com/ArchiveBox/abx-pkg\"\u003e\u003ccode\u003eabx-pkg\u003c/code\u003e\u003c/a\u003e installs apt/brew/pip/npm pkgs at runtime (e.g. \u003ccode\u003eyt-dlp\u003c/code\u003e, \u003ccode\u003esinglefile\u003c/code\u003e, \u003ccode\u003ereadability\u003c/code\u003e, \u003ccode\u003egit\u003c/code\u003e)\u003c/li\u003e\n\u003c/ul\u003e\n\n\nThese optional subdependencies used for archiving sites include:\n\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/62a02155-05d7-4f3e-8de5-75a50a145c4f\" alt=\"archivebox --version CLI output screenshot showing dependencies installed\" width=\"330px\" align=\"right\" style=\"max-width: 100%;\"\u003e\n\n\u003cul\u003e\n\u003cli\u003e\u003ccode\u003echromium\u003c/code\u003e / \u003ccode\u003echrome\u003c/code\u003e (for screenshots, PDF, DOM HTML, and headless JS scripts)\u003c/li\u003e\n\u003cli\u003e\u003ccode\u003enode\u003c/code\u003e \u0026amp; \u003ccode\u003enpm\u003c/code\u003e (for readability, mercury, and singlefile)\u003c/li\u003e\n\u003cli\u003e\u003ccode\u003ewget\u003c/code\u003e (for plain HTML, static files, and WARC saving)\u003c/li\u003e\n\u003cli\u003e\u003ccode\u003ecurl\u003c/code\u003e (for fetching headers, favicon, and posting to Archive.org)\u003c/li\u003e\n\u003cli\u003e\u003ccode\u003eyt-dlp\u003c/code\u003e or \u003ccode\u003eyoutube-dl\u003c/code\u003e (for audio, video, and subtitles)\u003c/li\u003e\n\u003cli\u003e\u003ccode\u003egit\u003c/code\u003e (for cloning git repos)\u003c/li\u003e\n\u003cli\u003e\u003ccode\u003esinglefile\u003c/code\u003e (for saving into a self-contained html file)\u003c/li\u003e\n\u003cli\u003e\u003ccode\u003epostlight/parser\u003c/code\u003e (for discussion threads, forums, and articles)\u003c/li\u003e\n\u003cli\u003e\u003ccode\u003ereadability\u003c/code\u003e (for articles and long text content)\u003c/li\u003e\n\u003cli\u003eand more as we grow...\u003c/li\u003e\n\u003c/ul\u003e\n\nYou don't need to install every dependency to use ArchiveBox. ArchiveBox will automatically disable extractors that rely on dependencies that aren't installed, based on what is configured and available in your \u003ccode\u003e$PATH\u003c/code\u003e.\n  \nIf not using Docker, make sure to keep the dependencies up-to-date yourself and check that ArchiveBox isn't reporting any incompatibility with the versions you install.\n\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003e#install python3 and archivebox with your system package manager\n# apt/brew/pip/etc install ... (see Quickstart instructions above)\n\u003cbr/\u003e\nwhich -a archivebox    # see where you have installed archivebox\narchivebox setup       # auto install all the extractors and extras\narchivebox --version   # see info and check validity of installed dependencies\n\u003c/code\u003e\u003c/pre\u003e\n  \nInstalling directly on \u003cstrong\u003eWindows without Docker or WSL/WSL2/Cygwin is not officially supported\u003c/strong\u003e (I cannot respond to Windows support tickets), but some advanced users have reported getting it working.\n\n\u003ch4\u003eLearn More\u003c/h4\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Install#dependencies\"\u003eWiki: Install (Dependencies)\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Chromium-Install\"\u003eWiki: Chromium Install\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives\"\u003eWiki: Upgrading or Merging Archives\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#installing\"\u003eWiki: Troubleshooting (Installing)\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\n\u003c/details\u003e\n\u003cbr/\u003e\n\n\n## Archive Layout\n\nAll of ArchiveBox's state (SQLite DB, content, config, logs, etc.) is stored in a single folder per collection.\n\n\u003cbr/\u003e\n\u003cdetails\u003e\n\u003csummary\u003e\u003ci\u003eExpand to learn more about the layout of Archivebox's data on-disk...\u003c/i\u003e\u003c/summary\u003e\u003cbr/\u003e\n\nData folders can be created anywhere (`~/archivebox/data` or `$PWD/data` as seen in our examples), and you can create as many data folders as you want to hold different collections.\nAll \u003ccode\u003earchivebox\u003c/code\u003e CLI commands are designed to be run from inside an ArchiveBox data folder, starting with \u003ccode\u003earchivebox init\u003c/code\u003e to initialize a new collection inside an empty directory.\n\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003emkdir -p ~/archivebox/data \u0026\u0026 cd ~/archivebox/data   # just an example, can be anywhere\narchivebox init\u003c/code\u003e\u003c/pre\u003e\n\nThe on-disk layout is optimized to be easy to browse by hand and durable long-term. The main index is a standard \u003ccode\u003eindex.sqlite3\u003c/code\u003e database in the root of the data folder (it can also be \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Publishing-Your-Archive#2-export-and-host-it-as-static-html\"\u003eexported as static JSON/HTML\u003c/a\u003e), and the archive snapshots are organized by date-added timestamp in the \u003ccode\u003edata/archive/\u003c/code\u003e subfolder.\n\n\u003cimg src=\"https://user-images.githubusercontent.com/511499/117453293-c7b91600-af12-11eb-8a3f-aa48b0f9da3c.png\" width=\"400px\" align=\"right\" style=\"float: right\"/\u003e\n\n\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003edata/\n    index.sqlite3\n    ArchiveBox.conf\n    archive/\n        ...\n        1617687755/\n            index.html\n            index.json\n            screenshot.png\n            media/some_video.mp4\n            warc/1617687755.warc.gz\n            git/somerepo.git\n            ...\n\u003c/code\u003e\u003c/pre\u003e\n\nEach snapshot subfolder \u003ccode\u003edata/archive/TIMESTAMP/\u003c/code\u003e includes a static \u003ccode\u003eindex.json\u003c/code\u003e and \u003ccode\u003eindex.html\u003c/code\u003e describing its contents, and the snapshot extractor outputs are plain files within the folder.\n\n\u003ch4\u003eLearn More\u003c/h4\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Setting-Up-Storage\"\u003eWiki: Setting Up Storage (SMB, NFS, S3, B2, Google Drive, etc.)\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Disk-Layout\"\u003eWiki: Usage (Disk Layout)\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#large-archives\"\u003eWiki: Usage (Large Archives)\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#output-folder\"\u003eWiki: Security Overview (Output Folder)\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Publishing-Your-Archive\"\u003eWiki: Publishing Your Archive\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives\"\u003eWiki: Upgrading or Merging Archives\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\n\u003c/details\u003e\n\u003cbr/\u003e\n\n\n## Static Archive Exporting\n\nYou can create one-off archives of individual URLs with `archivebox oneshot`, or export your index as static HTML using `archivebox list` (so you can view it without an ArchiveBox server).\n\n\u003cbr/\u003e\n\u003cdetails\u003e\n\u003csummary\u003e\u003ci\u003eExpand to learn how to export your ArchiveBox collection...\u003c/i\u003e\u003c/summary\u003e\u003cbr/\u003e\n\n\u003cblockquote\u003e\n\u003cp\u003e\u003cem\u003eNOTE: These exports are not paginated, exporting many URLs or the entire archive at once may be slow. Use the filtering CLI flags on the \u003ccode\u003earchivebox list\u003c/code\u003e command to export specific Snapshots or ranges.\u003c/em\u003e\u003c/p\u003e\n\u003c/blockquote\u003e\n\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003e# do a one-off single URL archive wihout needing a data dir initialized\narchivebox oneshot 'https://example.com'\n\n# archivebox list --help\narchivebox list --html --with-headers \u003e index.html     # export to static html table\narchivebox list --json --with-headers \u003e index.json     # export to json blob\narchivebox list --csv=timestamp,url,title \u003e index.csv  # export to csv spreadsheet\n\n# (if using Docker Compose, add the -T flag when piping)\n# docker compose run -T archivebox list --html 'https://example.com' \u003e index.json\n\u003c/code\u003e\u003c/pre\u003e\n\nThe paths in the static exports are relative, make sure to keep them next to your `./archive` folder when backing them up or viewing them.\n\n\u003ch4\u003eLearn More\u003c/h4\u003e\n\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Publishing-Your-Archive#2-export-and-host-it-as-static-html\"\u003eWiki: Publishing Your Archive (Exporting as Static HTML)\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#publishing\"\u003eWiki: Security Overview (Publishing)\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#public_index--public_snapshots--public_add_view\"\u003eWiki: Configuration (\u003ccode\u003ePUBLIC_INDEX\u003c/code\u003e, \u003ccode\u003ePUBLIC_SNAPSHOTS\u003c/code\u003e, \u003ccode\u003ePUBLIC_ADD_VIEW\u003c/code\u003e)\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\n\u003c/details\u003e\n\u003cbr/\u003e\n\n\n\u003cdiv align=\"center\" style=\"text-align: center\"\u003e\n\u003cimg src=\"https://docs.monadical.com/uploads/upload_b6900afc422ae699bfefa2dcda3306f3.png\" width=\"100%\" alt=\"security graphic\"/\u003e\n\u003c/div\u003e\n\n\n## Caveats\n\n### Archiving Private Content\n\n\u003ca id=\"archiving-private-urls\"\u003e\u003c/a\u003e\n\nIf you're importing pages with private content or URLs containing secret tokens you don't want public (e.g Google Docs, paywalled content, unlisted videos, etc.), **you may want to disable some of the extractor methods to avoid leaking that content to 3rd party APIs or the public**.\n\n\u003cbr/\u003e\n\u003cdetails\u003e\n\u003csummary\u003e\u003ci\u003eExpand to learn about privacy, permissions, and user accounts...\u003c/i\u003e\u003c/summary\u003e\n\n\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003e# don't save private content to ArchiveBox, e.g.:\narchivebox add 'https://docs.google.com/document/d/12345somePrivateDocument'\narchivebox add 'https://vimeo.com/somePrivateVideo'\n\n# without first disabling saving to Archive.org:\narchivebox config --set SAVE_ARCHIVE_DOT_ORG=False  # disable saving all URLs in Archive.org\n\n# restrict the main index, Snapshot content, and Add Page to authenticated users as-needed:\narchivebox config --set PUBLIC_INDEX=False\narchivebox config --set PUBLIC_SNAPSHOTS=False\narchivebox config --set PUBLIC_ADD_VIEW=False \narchivebox manage createsuperuser\n\n# if extra paranoid or anti-Google:\narchivebox config --set SAVE_FAVICON=False          # disable favicon fetching (it calls a Google API passing the URL's domain part only)\narchivebox config --set CHROME_BINARY=chromium      # ensure it's using Chromium instead of Chrome\n\u003c/code\u003e\u003c/pre\u003e\n\n\u003cblockquote\u003e\n\u003cp\u003e\u003cem\u003eCAUTION: Assume anyone \u003cem\u003eviewing\u003c/em\u003e your archives will be able to see any cookies, session tokens, or private URLs passed to ArchiveBox during archiving.\u003c/em\u003e\n\u003cem\u003eMake sure to secure your ArchiveBox data and don't share snapshots with others without stripping out sensitive headers and content first.\u003c/em\u003e\u003c/p\u003e\n\u003c/blockquote\u003e\n\n\u003ch4\u003eLearn More\u003c/h4\u003e\n\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Publishing-Your-Archive\"\u003eWiki: Publishing Your Archive\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview\"\u003eWiki: Security Overview\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Chromium-Install#setting-up-a-chromium-user-profile\"\u003eWiki: Chromium Install (Setting Up a User Profile)\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#chrome_user_data_dir\"\u003eWiki: Configuration (\u003ccode\u003eCHROME_USER_DATA_DIR\u003c/code\u003e)\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#cookies_file\"\u003eWiki: Configuration (\u003ccode\u003eCOOKIES_FILE\u003c/code\u003e)\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\n\u003c/details\u003e\n\u003cbr/\u003e\n\n\n### Security Risks of Viewing Archived JS\n\nBe aware that malicious archived JS can access the contents of other pages in your archive when viewed. Because the Web UI serves all viewed snapshots from a single domain, they share a request context and **typical CSRF/CORS/XSS/CSP protections do not work to prevent cross-site request attacks**. See the [Security Overview](https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#stealth-mode) page and [Issue #239](https://github.com/ArchiveBox/ArchiveBox/issues/239) for more details.\n\n\n\u003cbr/\u003e\n\u003cdetails\u003e\n\u003csummary\u003e\u003ci\u003eExpand to see risks and mitigations...\u003c/i\u003e\u003c/summary\u003e\n\n\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003e# visiting an archived page with malicious JS:\nhttps://127.0.0.1:8000/archive/1602401954/example.com/index.html\n\n# example.com/index.js can now make a request to read everything from:\nhttps://127.0.0.1:8000/index.html\nhttps://127.0.0.1:8000/archive/*\n# then example.com/index.js can send it off to some evil server\n\u003c/code\u003e\u003c/pre\u003e\n\n\u003cblockquote\u003e\n\u003cp\u003e\u003cem\u003eNOTE: Only the \u003ccode\u003ewget\u003c/code\u003e \u0026amp; \u003ccode\u003edom\u003c/code\u003e extractor methods execute archived JS when viewing snapshots, all other archive methods produce static output that does not execute JS on viewing.\u003c/em\u003e\u003cbr/\u003e\n\u003cem\u003eIf you are worried about these issues ^ you should disable these extractors using:\u003cbr/\u003e \u003ccode\u003earchivebox config --set SAVE_WGET=False SAVE_DOM=False\u003c/code\u003e.\u003c/em\u003e\u003c/p\u003e\n\u003c/blockquote\u003e\n\n\u003ch4\u003eLearn More\u003c/h4\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview\"\u003eWiki: Security Overview\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/issues/239\"\u003eArchiveBox Github Issue: #239\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/security/advisories/GHSA-cr45-98w9-gwqx\"\u003eSecurity Advisory: \u003ccode\u003eCVE-2023-45815\u003c/code\u003e\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#publishing\"\u003eWiki: Security Overview (Publishing)\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\n\u003c/details\u003e\n\u003cbr/\u003e\n\n\n### Working Around Sites that Block Archiving\n\nFor various reasons, many large sites (Reddit, Twitter, Cloudflare, etc.) actively block archiving or bots in general. There are a number of approaches to work around this, and we also provide \u003ca href=\"https://docs.monadical.com/s/archivebox-consulting-services\"\u003econsulting services\u003c/a\u003e to help here.\n\n\u003cbr/\u003e\n\u003cdetails\u003e\n\u003csummary\u003e\u003ci\u003eClick to learn how to set up user agents, cookies, and site logins...\u003c/i\u003e\u003c/summary\u003e\n\u003cbr/\u003e\n\n\n\u003cul\u003e\n\u003cli\u003eSet \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#curl_user_agent\"\u003e\u003ccode\u003eCHROME_USER_AGENT\u003c/code\u003e, \u003ccode\u003eWGET_USER_AGENT\u003c/code\u003e, \u003ccode\u003eCURL_USER_AGENT\u003c/code\u003e\u003c/a\u003e to impersonate a real browser (by default, ArchiveBox reveals that it's a bot when using the default user agent settings)\u003c/li\u003e\n\u003cli\u003eSet up a logged-in browser session for archiving using \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Chromium-Install#setting-up-a-chromium-user-profile\"\u003e\u003ccode\u003eCHROME_USER_DATA_DIR\u003c/code\u003e \u0026amp; \u003ccode\u003eCOOKIES_FILE\u003c/code\u003e\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003eRewrite your URLs before archiving to swap in alternative frontends that are more bot-friendly e.g.\u003cbr\u003e\n\u003ccode\u003ereddit.com/some/url\u003c/code\u003e -\u0026gt; \u003ccode\u003eteddit.net/some/url\u003c/code\u003e: \u003ca href=\"https://github.com/mendel5/alternative-front-ends\"\u003ehttps://github.com/mendel5/alternative-front-ends\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\nIn the future we plan on adding support for running JS scripts during archiving to block ads, cookie popups, modals, and fix other issues. Follow here for progress: \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/issues/51\"\u003eIssue #51\u003c/a\u003e.\n\n\u003c/details\u003e\n\u003cbr/\u003e\n\n\n### Saving Multiple Snapshots of a Single URL\n\nArchiveBox appends a hash with the current date `https://example.com#2020-10-24` to differentiate when a single URL is archived multiple times.\n\n\n\u003cbr/\u003e\n\u003cdetails\u003e\n\u003csummary\u003e\u003ci\u003eClick to learn how the \u003ccode\u003eRe-Snapshot\u003c/code\u003e feature works...\u003c/i\u003e\u003c/summary\u003e\n\u003cbr/\u003e\n\n\nBecause ArchiveBox uniquely identifies snapshots by URL, it must use a workaround to take multiple snapshots of the same URL (otherwise they would show up as a single Snapshot entry). It makes the URLs of repeated snapshots unique by adding a hash with the archive date at the end:\n\n\u003cpre lang=\"bash\"\u003e\u003ccode style=\"white-space: pre-line\"\u003earchivebox add 'https://example.com#2020-10-24'\n...\narchivebox add 'https://example.com#2020-10-25'\n\u003c/code\u003e\u003c/pre\u003e\n\nThe \u003cimg src=\"https://user-images.githubusercontent.com/511499/115942091-73c02300-a476-11eb-958e-5c1fc04da488.png\" alt=\"Re-Snapshot Button\" height=\"24px\"/\u003e button in the Admin UI is a shortcut for this hash-date multi-snapshotting workaround.\n\nImproved support for saving multiple snapshots of a single URL without this hash-date workaround will be \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/issues/179\"\u003eadded eventually\u003c/a\u003e (along with the ability to view diffs of the changes between runs).\n\n\u003ch4\u003eLearn More\u003c/h4\u003e\n\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/issues/179\"\u003eArchiveBox Issues: #179\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#explanation-of-buttons-in-the-web-ui---admin-snapshots-list\"\u003eWiki: Usage (Explanation of Web UI Buttons)\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\n\u003c/details\u003e\n\u003cbr/\u003e\n\n### Storage Requirements\n\nBecause ArchiveBox is designed to ingest a large volume of URLs with multiple copies of each URL stored by different 3rd-party tools, it can be quite disk-space intensive. There are also some special requirements when using filesystems like NFS/SMB/FUSE.\n\n\u003cbr/\u003e\n\u003cdetails\u003e\n\u003csummary\u003e\u003ci\u003eClick to learn more about ArchiveBox's filesystem and hosting requirements...\u003c/i\u003e\u003c/summary\u003e\n\u003cbr/\u003e\n\n\u003cul\u003e\n\u003cli\u003e\u003cstrong\u003eArchiveBox can use anywhere from ~1gb per 1000 Snapshots, to ~50gb per 1000 Snapshots\u003c/strong\u003e, mostly dependent on whether you're saving audio \u0026 video using \u003ccode\u003eSAVE_MEDIA=True\u003c/code\u003e and whether you lower \u003ccode\u003eMEDIA_MAX_SIZE=750mb\u003c/code\u003e.\u003c/li\u003e\n\u003cli\u003eDisk usage can be reduced by using a compressed/\u003ca href=\"https://www.ixsystems.com/blog/ixsystems-and-klara-systems-celebrate-valentines-day-with-a-heartfelt-donation-of-fast-dedupe-to-openzfs-and-truenas/\"\u003ededuplicated\u003c/a\u003e filesystem like \u003ca href=\"https://www.reddit.com/r/zfs/comments/t9cexx/a_simple_real_world_zfs_compression_speed_an/\"\u003eZFS\u003c/a\u003e/BTRFS, or by turning off extractors methods you don't need. You can also deduplicate content with a tool like \u003ca href=\"https://github.com/adrianlopezroche/fdupes\"\u003e\u003ccode\u003efdupes\u003c/code\u003e\u003c/a\u003e or \u003ca href=\"https://github.com/pauldreik/rdfind\"\u003e\u003ccode\u003erdfind\u003c/code\u003e\u003c/a\u003e.  \n\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eDon't store large collections on older filesystems like EXT3/FAT\u003c/strong\u003e as they may not be able to handle more than 50k directory entries in the \u003ccode\u003edata/archive/\u003c/code\u003e folder.\n\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eTry to keep the \u003ccode\u003edata/index.sqlite3\u003c/code\u003e file on local drive (not a network mount)\u003c/strong\u003e or SSD for maximum performance, however the \u003ccode\u003edata/archive/\u003c/code\u003e folder can be on a network mount or slower HDD.\u003c/li\u003e\n\u003cli\u003eIf using Docker or NFS/SMB/FUSE for the \u003ccode\u003edata/archive/\u003c/code\u003e folder, you may need to set \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#puid--pgid\"\u003e\u003ccode\u003ePUID\u003c/code\u003e \u0026 \u003ccode\u003ePGID\u003c/code\u003e\u003c/a\u003e and \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/issues/1304\"\u003edisable \u003ccode\u003eroot_squash\u003c/code\u003e\u003c/a\u003e on your fileshare server.\n\u003c/li\u003e\n\u003c/ul\u003e\n\n\u003ch4\u003eLearn More\u003c/h4\u003e\n\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Disk-Layout\"\u003eWiki: Usage (Disk Layout)\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#output-folder\"\u003eWiki: Security Overview (Output-Folder)\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#large-archives\"\u003eWiki: Usage (Large Archives)\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#puid--pgid\"\u003eWiki: Configuration (\u003ccode\u003ePUID\u003c/code\u003e \u0026 \u003ccode\u003eGUID\u003c/code\u003e)\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root\"\u003eWiki: Security Overview (Do Not Run as Root)\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\n\n\u003c/details\u003e\n\u003cbr/\u003e\n\n\n---\n\n\n\u003cbr/\u003e\n\n\n## Screenshots\n\n\u003cdiv align=\"center\" width=\"80%\"\u003e\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/0362bcd1-1dfd-43c6-b4ec-db5e78da07b2\" width=\"80%\"/\u003e\n\u003ctable\u003e\n\u003ctbody\u003e\n\u003ctr\u003e\n\u003ctd\u003e\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/e9fdcb16-344e-48c8-8be0-efa48ec155d5\" alt=\"brew install archivebox\" width=\"210px\"\u003e\u003cbr/\u003e\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/e235c9d8-fda9-499d-a6a5-59b0e6a0efce\" alt=\"archivebox version\" width=\"210px\"\u003e\n\u003c/td\u003e\n\u003ctd\u003e\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/0e3da0c7-d2c2-4a71-b096-6caedafd6ef7\" alt=\"archivebox init\" width=\"210px\"\u003e\u003cbr/\u003e\n\u003c/td\u003e\n\u003ctd\u003e\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/31c47440-ec14-4a02-99a3-aae8a9078d46\" alt=\"archivebox add\" width=\"210px\"\u003e\n\u003c/td\u003e\n\u003ctd\u003e\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/ebcdc21d-e2af-4bf8-ad4b-bc4f3151bbef\" alt=\"archivebox data dir\" width=\"210px\"\u003e\n\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/ea672e6b-4df5-49d8-b550-7f450951fd27\" alt=\"archivebox server\" width=\"210px\"\u003e\n\u003c/td\u003e\n\u003ctd\u003e\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/e1e5bd78-b0b6-45dc-914c-e1046fee4bc4\" alt=\"archivebox server add\" width=\"210px\"\u003e\n\u003c/td\u003e\n\u003ctd\u003e\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/e8e0b6f8-8fdf-4b7f-8124-c10d8699bdb2\" alt=\"archivebox server list\" width=\"210px\"\u003e\n\u003c/td\u003e\n\u003ctd\u003e\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/ace0954a-ddac-4520-9d18-1c77b1ec50b2\" alt=\"archivebox server detail\" width=\"210px\"\u003e\n\u003c/td\u003e\n\u003c/tr\u003e\n\u003c/tbody\u003e\n\u003c/table\u003e\n\u003c/div\u003e\n\u003cbr/\u003e\n\n\u003cbr/\u003e\n\u003cdiv align=\"center\" style=\"text-align: center\"\u003e\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/ca85432e-a2df-40c6-968f-51a1ef99b24e\" width=\"100%\" alt=\"paisley graphic\"\u003e\n\u003c/div\u003e\n\n\n# Background \u0026 Motivation\n\nArchiveBox aims to enable more of the internet to be saved from deterioration by empowering people to self-host their own archives. The intent is for all the web content you care about to be viewable with common software in 50 - 100 years without needing to run ArchiveBox or other specialized software to replay it.\n\n\n\u003cbr/\u003e\n\u003cdetails\u003e\n\u003csummary\u003e\u003ci\u003eClick to read more about why archiving is important and how to do it ethically...\u003c/i\u003e\u003c/summary\u003e\n\u003cbr/\u003e\n\n\nVast treasure troves of knowledge are lost every day on the internet to link rot. As a society, we have an imperative to preserve some important parts of that treasure, just like we preserve our books, paintings, and music in physical libraries long after the originals go out of print or fade into obscurity.\n\nWhether it's to resist censorship by saving news articles before they get taken down or edited, or just to save a collection of early 2010's flash games you loved to play, having the tools to archive internet content enables to you save the stuff you care most about before it disappears.\n\n\u003cdiv align=\"center\" style=\"text-align: center\"\u003e\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/71e36bc5-1c94-44e2-92b6-405fa898c734\" width=\"40%\"/\u003e\u003cbr/\u003e\n\u003csup\u003e\u003ci\u003eImage from \u003ca href=\"https://perma.cc/\"\u003ePerma.cc\u003c/a\u003e...\u003c/i\u003e\u003cbr/\u003e\u003c/sup\u003e\n\u003c/div\u003e\n\nThe balance between the permanence and ephemeral nature of content on the internet is part of what makes it beautiful. I don't think everything should be preserved in an automated fashion--making all content permanent and never removable, but I do think people should be able to decide for themselves and effectively archive specific content that they care about, just like libraries do. Without the work of archivists saving physical books, manuscrips, and paintings we wouldn't have any knowledge of our ancestors' history. I believe archiving the web is just as important to provide the same benefit to future generations.\n\nArchiveBox's stance is that duplication of other people's content is only ethical if it:\n\n- A. doesn't deprive the original creators of revenue and\n- B. is responsibly curated by an individual/institution.\n\nIn the U.S., \u003ca href=\"https://guides.library.oregonstate.edu/copyright/libraries\"\u003elibraries, researchers, and archivists\u003c/a\u003e are allowed to duplicate copyrighted materials under \u003ca href=\"https://libguides.ala.org/copyright/fairuse\"\u003e\"fair use\"\u003c/a\u003e for \u003ca href=\"https://guides.cuny.edu/cunyfairuse/librarians#:~:text=One%20of%20these%20specified%20conditions,may%20be%20liable%20for%20copyright\"\u003eprivate study, scholarship, or research\u003c/a\u003e. Archive.org's non-profit preservation work is \u003ca href=\"https://blog.archive.org/2024/03/01/fair-use-in-action-at-the-internet-archive/\"\u003ecovered under fair use\u003c/a\u003e in the US, and they properly handle \u003ca href=\"https://cardozoaelj.com/2015/03/20/use-of-copyright-law-to-take-down-revenge-porn/\"\u003eunethical content\u003c/a\u003e/\u003ca href=\"https://help.archive.org/help/rights/\"\u003eDMCA\u003c/a\u003e/\u003ca href=\"https://gdpr.eu/right-to-be-forgotten/#:~:text=An%20individual%20has%20the%20right,that%20individual%20withdraws%20their%20consent.\"\u003eGDPR\u003c/a\u003e removal requests to maintain good standing in the eyes of the law.\n\nAs long as you A. don't try to profit off pirating copyrighted content and B. have processes in place to respond to removal requests, many countries allow you to use sofware like ArchiveBox to ethically and responsibly archive any web content you can view. That being said, ArchiveBox is not liable for how you choose to operate the software. You must research your own local laws and regulations, and get proper legal council if you plan to host a public instance (start by putting your DMCA/GDPR contact info in \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#footer_info\"\u003e\u003ccode\u003eFOOTER_INFO\u003c/code\u003e\u003c/a\u003e and changing your instance's branding using \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#custom_templates_dir\"\u003e\u003ccode\u003eCUSTOM_TEMPLATES_DIR\u003c/code\u003e\u003c/a\u003e).\n\n\u003c/details\u003e\n\u003cbr/\u003e\n\n\n## Comparison to Other Projects\n\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/4cac62a9-e8fb-425b-85a3-ca644aa6dd42\" width=\"5%\" align=\"right\" alt=\"comparison\" style=\"float: right\"/\u003e \n\n\n\u003e **Check out our [community wiki](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community) for a list of alternative web archiving tools and orgs.**\n\nArchiveBox gained momentum in the internet archiving industry because it uniquely combines 3 things:\n\n- **it's distributed:** users own their data instead of entrusting it to one big central provider\n- **it's future-proof:** saving in *multiple formats* and extracting out raw TXT, PNG, PDF, MP4, etc. files\n- **it's extensible:** with powerful APIs, flexible storage, and a big community adding new extractors regularly\n\n\u003cbr/\u003e\n\u003cdetails\u003e\n\u003csummary\u003e\u003ci\u003eExpand for a more direct comparison to Archive.org and specific open-source alternatives...\u003c/i\u003e\u003c/summary\u003e\u003cbr/\u003e\n\nArchiveBox tries to be a robust, set-and-forget archiving solution suitable for archiving RSS feeds, bookmarks, or your entire browsing history (beware, it may be too big to store), including private/authenticated content that you wouldn't otherwise share with a centralized service like Archive.org.\n\n\u003ch3\u003eComparison With Centralized Public Archives\u003c/h3\u003e\n\nNot all content is suitable to be archived on a centralized, publicly accessible platform. Archive.org doesn't offer the ability to save things behind login walls for good reason, as the content may not have been intended for a public audience. ArchiveBox exists to fill that gap by letting everyone save what they have access to on an individual basis, and to encourage decentralized archiving that's less succeptible to censorship or natural disasters.\n\nBy having users store their content locally or within their organizations, we can also save much larger portions of the internet than a centralized service has the disk capcity handle. The eventual goal is to work towards federated archiving where users can share portions of their collections with each other, and with central archives on a case-by-case basis.\n\n\u003ch3\u003eComparison With Other Self-Hosted Archiving Options\u003c/h3\u003e\n\nArchiveBox differentiates itself from [similar self-hosted projects](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community#Web-Archiving-Projects) by providing both a comprehensive CLI interface for managing your archive, a Web UI that can be used either independently or together with the CLI, and a simple on-disk data format that can be used without either.\n\n\n*If you want better fidelity for very complex interactive pages with heavy JS/streams/API requests, check out [ArchiveWeb.page](https://archiveweb.page) and [ReplayWeb.page](https://replayweb.page).*\n\n*If you want more bookmark categorization and note-taking features, check out [Archivy](https://archivy.github.io/), [Memex](https://github.com/WorldBrain/Memex), [Polar](https://getpolarized.io/), or [LinkAce](https://www.linkace.org/).*\n\n*If you need more advanced recursive spider/crawling ability beyond `--depth=1`, check out [Browsertrix](https://github.com/webrecorder/browsertrix-crawler), [Photon](https://github.com/s0md3v/Photon), or [Scrapy](https://scrapy.org/) and pipe the outputted URLs into ArchiveBox.*\n\nFor more alternatives, see our [list here](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community#Web-Archiving-Projects)...\n\nArchiveBox is neither the highest fidelity nor the simplest tool available for self-hosted archiving, rather it's a jack-of-all-trades that tries to do most things well by default. We encourage you to try these other tools made by our friends if ArchiveBox isn't suited to your needs.\n\n\u003c/details\u003e\n\n\u003cbr/\u003e\n\n\u003c!--\u003cdiv align=\"center\" style=\"text-align: center\"\u003e\u003cbr/\u003e\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/04808ac2-3133-44fd-8703-3387e06dc851\" width=\"100%\" alt=\"dependencies graphic\"\u003e\u003c/div\u003e--\u003e\n\n## Internet Archiving Ecosystem\n\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/78d8a725-97f4-47f5-b983-1f62843ddc51\" width=\"14%\" align=\"right\" style=\"float: right\"/\u003e\n\n\u003cdetails\u003e\n\u003csummary\u003e\u003ci\u003eOur \u003cb\u003e\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community\"\u003eCommunity Wiki\u003c/a\u003e\u003c/b\u003e strives to be a comprehensive index of the web archiving industry...\u003c/i\u003e\u003c/summary\u003e\n\u003cbr/\u003e\n\n- [Community Wiki](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community)\n  - [Web Archiving Software](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community#web-archiving-projects)  \n    _List of ArchiveBox alternatives and open source projects in the internet archiving space._\n  - [Awesome-Web-Archiving Lists](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community#the-master-lists)  \n    _Community-maintained indexes of archiving tools and institutions like `iipc/awesome-web-archiving`._\n  - [Reading List](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community#reading-list)  \n    _Articles, posts, and blogs relevant to ArchiveBox and web archiving in general._\n  - [Communities](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community#communities)  \n    _A collection of the most active internet archiving communities and initiatives._\n- Check out the ArchiveBox [Roadmap](https://github.com/ArchiveBox/ArchiveBox/wiki/Roadmap) and [Changelog](https://github.com/ArchiveBox/ArchiveBox/wiki/Changelog)\n- Learn why archiving the internet is important by reading the \"[On the Importance of Web Archiving](https://items.ssrc.org/parameters/on-the-importance-of-web-archiving/)\" blog post.\n- Reach out to me for questions and comments via [@ArchiveBoxApp](https://twitter.com/ArchiveBoxApp) or [@theSquashSH](https://twitter.com/thesquashSH) on Twitter\n\n\u003c/details\u003e\n\n\u003cbr/\u003e\n\n**Need help building a custom archiving solution?**\n\n\u003e ✨ **[Hire the team that built Archivebox](https://zulip.archivebox.io/#narrow/stream/167-enterprise/topic/welcome/near/1191102) to solve archiving for your org.** ([@ArchiveBoxApp](https://twitter.com/ArchiveBoxApp))\n\n\u003cbr/\u003e\n\n\n\u003cdiv align=\"center\" style=\"text-align: center\"\u003e\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/897f7a88-1265-4aab-b80c-b1640afaad1f\" width=\"100%\" alt=\"documentation graphic\"\u003e\n\u003c/div\u003e\n\n# Documentation\n\n\u003cimg src=\"https://read-the-docs-guidelines.readthedocs-hosted.com/_images/logo-dark.png\" width=\"13%\" align=\"right\" style=\"float: right\"/\u003e\n\nWe use the [ArchiveBox GitHub Wiki](https://github.com/ArchiveBox/ArchiveBox/wiki) for documentation.\n\n\u003csub\u003eThere is also a mirror available on \u003ca href=\"https://archivebox.readthedocs.io/en/latest/\"\u003eRead the Docs\u003c/a\u003e (though it's sometimes outdated).\u003c/sub\u003e\n\n\u003e ✏️ You can submit docs changes \u0026 suggestions in our dedicated repo [`ArchiveBox/docs`](https://github.com/ArchiveBox/docs).\n\n## Getting Started\n\n- [Quickstart](https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart)\n- [Install](https://github.com/ArchiveBox/ArchiveBox/wiki/Install)\n- [Docker](https://github.com/ArchiveBox/ArchiveBox/wiki/Docker)\n- [Usage](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage)\n- [Configuration](https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration)\n- [Supported Sources](https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart#2-get-your-list-of-urls-to-archive)\n- [Supported Outputs](https://github.com/ArchiveBox/ArchiveBox/wiki#can-save-these-things-for-each-site)\n- [Scheduled Archiving](https://github.com/ArchiveBox/ArchiveBox/wiki/Scheduled-Archiving)\n\n## Advanced\n\n- [Security Overview](https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview)\n- [Cookies \u0026 Sessions Setup](https://github.com/ArchiveBox/ArchiveBox/wiki/Chromium-Install#setting-up-a-chromium-user-profile) (archiving sites that require logins)\n- [Setting up the Search Backends](https://github.com/ArchiveBox/ArchiveBox/wiki/Setting-up-Search) (choosing ripgrep, Sonic, or FTS5)\n- [Setting up Local/Remote Storages](https://github.com/ArchiveBox/ArchiveBox/wiki/Setting-up-Storage) (S3/B2/Google Drive/SMB/NFS/etc.)\n- [Setting up Authentication \u0026 Permissions](https://github.com/ArchiveBox/ArchiveBox/wiki/Setting-up-Authentication) (SSO/LDAP/OAuth/API Keys/etc.)\n- [Publishing Your Archive](https://github.com/ArchiveBox/ArchiveBox/wiki/Publishing-Your-Archive) (sharing your archive server with others)\n- [Chromium Install Options](https://github.com/ArchiveBox/ArchiveBox/wiki/Chromium-Install) (installing and configuring ArchiveBox's Chrome)\n- [Upgrading or Merging Archives](https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives)\n- [Troubleshooting](https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting)\n\n## Developers\n\n- [Developer Documentation](https://github.com/ArchiveBox/ArchiveBox#archivebox-development)\n- [Python API](https://docs.archivebox.io/)\n- [REST API](https://demo.archivebox.io/api) (alpha)\n\n## More Info\n\n- [Bug Tracker](https://github.com/ArchiveBox/ArchiveBox/issues)\n- [Roadmap](https://github.com/ArchiveBox/ArchiveBox/wiki/Roadmap)\n- [Changelog](https://github.com/ArchiveBox/ArchiveBox/releases)\n- [Donations](https://github.com/ArchiveBox/ArchiveBox/wiki/Donations)\n- [Background \u0026 Motivation](https://github.com/ArchiveBox/ArchiveBox#background--motivation)\n- [Web Archiving Community](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community)\n\n\u003cbr/\u003e\n\n---\n\n\u003cdiv align=\"center\" style=\"text-align: center\"\u003e\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/e895e79f-5c7d-429b-ad8a-7df2cc183ca3\" width=\"100%\" alt=\"development\"\u003e\n\u003c/div\u003e\n\n# ArchiveBox Development\n\nAll contributions to ArchiveBox are welcomed! Check our [issues](https://github.com/ArchiveBox/ArchiveBox/issues) and [Roadmap](https://github.com/ArchiveBox/ArchiveBox/wiki/Roadmap) for things to work on, and please open an issue to discuss your proposed implementation before working on things! Otherwise we may have to close your PR if it doesn't align with our roadmap.\n\nFor low hanging fruit / easy first tickets, see: \u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/issues?q=is%3Aissue+is%3Aopen+sort%3Aupdated-desc+label%3A%22help+wanted%22\"\u003eArchiveBox/Issues `#good first ticket` `#help wanted`\u003c/a\u003e.\n\n**Python API Documentation:** https://docs.archivebox.io/en/dev/archivebox.html#module-archivebox.main\n\n**Internal Architecture Diagrams:** https://github.com/ArchiveBox/ArchiveBox/wiki/ArchiveBox-Architecture-Diagrams\n\n\n### Setup the dev environment\n\n\u003cdetails\u003e\u003csummary\u003e\u003ci\u003eClick to expand...\u003c/i\u003e\u003c/summary\u003e\n\n#### 1. Clone the main code repo (making sure to pull the submodules as well)\n\n```bash\ngit clone --recurse-submodules https://github.com/ArchiveBox/ArchiveBox\ncd ArchiveBox\ngit checkout dev  # or the branch you want to test\ngit submodule update --init --recursive\ngit pull --recurse-submodules\n```\n\n#### 2. Option A: Install the Python, JS, and system dependencies directly on your machine\n\n```bash\n# Install ArchiveBox + python dependencies\npip install uv\n./bin/lock_pkgs.sh         # (aka `uv venv; uv sync;` + generate requirements.txt)\n\n# Install ArchiveBox runtime dependencies\nmkdir -p data \u0026\u0026 cd data\narchivebox install         # on \u003e=v0.8.5 (otherwise `archivebox setup`)\n\n# Run the development server w/ autoreloading (but no bg workers)\narchivebox manage runserver --debug --reload 0.0.0.0:8000\n\n# Run the production server (with bg workers but no autoreloading)\narchivebox server 0.0.0.0:8000\n```\n\n#### 2. Option B: Build the docker container and use that for development instead\n\n```bash\n# Optional: develop via docker by mounting the code dir into the container\n# if you edit e.g. ./archivebox/core/models.py on the docker host, runserver\n# inside the container will reload and pick up your changes\n./bin/build_docker.sh dev\n\ndocker run -it -v $PWD/data:/data archivebox/archivebox:dev init --setup\n\n# Run the development server w/ autoreloading (but no bg workers)\ndocker run -it -v $PWD/data:/data -v $PWD/archivebox:/app/archivebox -p 8000:8000 archivebox/archivebox:dev manage runserver 0.0.0.0:8000 --debug --reload\n\n# Run the production server (with bg workers but no autoreloading)\ndocker run -it -v $PWD/data:/data -v $PWD/archivebox:/app/archivebox -p 8000:8000 archivebox/archivebox:dev server\n\n# (remove the --reload flag and add the --nothreading flag when profiling with the django debug toolbar)\n# When using --reload, make sure any files you create can be read by the user in the Docker container, eg with 'chmod a+rX'.\n```\n\n\u003c/details\u003e\n\n### Common development tasks\n\nSee the `./bin/` folder and read the source of the bash scripts within.\nYou can also run all these in Docker. For more examples see the GitHub Actions CI/CD tests that are run: `.github/workflows/*.yaml`.\n\n#### Run in DEBUG mode\n\n\u003cdetails\u003e\u003csummary\u003e\u003ci\u003eClick to expand...\u003c/i\u003e\u003c/summary\u003e\n\n```bash\n# set up persistent DEBUG=True for all runs\narchivebox config --set DEBUG=True\n\n# OR you can run a dev server with DEBUG=True in a few ways:\narchivebox manage runserver --debug --reload 0.0.0.0:8000\n# or\narchivebox server --debug 0.0.0.0:8000\n# or\nenv DEBUG=True daphne -b 0.0.0.0 -p 8000 archivebox.core.asgi:application\n```\n\nhttps://stackoverflow.com/questions/1074212/how-can-i-see-the-raw-sql-queries-django-is-running\n\n\u003c/details\u003e\n\n#### Install and run a specific GitHub branch\n\n\u003cdetails\u003e\u003csummary\u003e\u003ci\u003eClick to expand...\u003c/i\u003e\u003c/summary\u003e\n\n##### Use a Pre-Built Image\n\nIf you're looking for the latest `dev` Docker image, it's often available pre-built on Docker Hub, simply pull and use `archivebox/archivebox:dev`.\n\n```bash\ndocker pull archivebox/archivebox:dev\ndocker run archivebox/archivebox:dev version\n# verify the BUILD_TIME and COMMIT_HASH in the output are recent\n```\n\n##### Build Branch from Source\n  \nYou can also build and run any branch yourself from source, for example to build \u0026 use `dev` locally:\n\n```bash\n# docker-compose.yml:\nservices:\n    archivebox:\n        image: archivebox/archivebox:dev\n        build: 'https://github.com/ArchiveBox/ArchiveBox.git#dev'\n        ...\n\n# or with plain Docker:\ndocker build -t archivebox:dev https://github.com/ArchiveBox/ArchiveBox.git#dev\ndocker run -it -v $PWD:/data archivebox:dev init\n\n# or with pip:\npip install 'git+https://github.com/pirate/ArchiveBox@dev'\nnpm install 'git+https://github.com/ArchiveBox/ArchiveBox.git#dev'\narchivebox install\n```\n\n\u003c/details\u003e\n\n#### Run the linters / tests\n\n\u003cdetails\u003e\u003csummary\u003e\u003ci\u003eClick to expand...\u003c/i\u003e\u003c/summary\u003e\n\n```bash\n./bin/lint.sh\n./bin/test.sh\n```\n(uses `flake8`, `mypy`, and `pytest -s`)\n\n\u003c/details\u003e\n\n\n#### Make DB migrations, enter Django shell, other dev helper commands\n\n\u003cdetails\u003e\u003csummary\u003e\u003ci\u003eClick to expand...\u003c/i\u003e\u003c/summary\u003e\n\n```bash\n# generate the database migrations after changes to models.py\ncd archivebox/\n./manage.py makemigrations\n\n# enter a python shell or a SQL shell\ncd path/to/test/data/\narchivebox shell\narchivebox manage dbshell\n\n# generate a graph of the ORM models\nbrew install graphviz\npip install pydot graphviz\narchivebox manage graph_models -a -o orm.png\nopen orm.png\n\n# list all models with field db info and methods\narchivebox manage list_model_info --all --signature --db-type --field-class\n\n# print all django settings\narchivebox manage print_settings\narchivebox manage print_settings --format=yaml    # pip install pyyaml\n\n# autogenerate an admin.py from given app models\narchivebox manage admin_generator core \u003e core/admin.py\n\n# dump db data to a script that re-populates it\narchivebox manage dumpscript core \u003e scripts/testdata.py\narchivebox manage reset core\narchivebox manage runscript testdata\n\n# resetdb and clear all data!\narchivebox manage reset_db\n\n# use django-tui to interactively explore commands\npip install django-tui\n# ensure django-tui is in INSTALLED_APPS: core/settings.py\narchivebox manage tui\n\n# show python and JS package dependency trees\npdm list --tree\nnpm ls --all\n```\n\n\u003cimg src=\"https://github.com/ArchiveBox/ArchiveBox/assets/511499/dc3e9f8c-9544-46e0-a7f0-30f571b72022\" width=\"600px\" alt=\"ArchiveBox ORM models relatinoship graph\"/\u003e\n\n- https://django-extensions.readthedocs.io/en/latest/command_extensions.html\n- https://stackoverflow.com/questions/1074212/how-can-i-see-the-raw-sql-queries-django-is-running\n- https://github.com/anze3db/django-tui (explore `manage.py` commands as TUI)\n- https://github.com/bloomberg/memray (advanced python profiler)\n- https://github.com/laixintao/flameshow (display flamegraphs in terminal)\n- https://github.com/taliraj/django-migrations-tui (explore migrations as TUI)\n\n\u003c/details\u003e\n\n#### Contributing a new extractor\n\n\u003cdetails\u003e\u003csummary\u003e\u003ci\u003eClick to expand...\u003c/i\u003e\u003c/summary\u003e\n\n\u003cbr/\u003e\u003cbr/\u003e\n\nArchiveBox [`extractors`](https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/extractors/media.py) are external binaries or Python/Node scripts that ArchiveBox runs to archive content on a page.\n\nExtractors take the URL of a page to archive, write their output to the filesystem `data/archive/TIMESTAMP/EXTRACTOR/...`, and return an [`ArchiveResult`](https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/core/models.py#:~:text=return%20qs-,class%20ArchiveResult,-(models.Model)%3A) entry which is saved to the database (visible on the `Log` page in the UI).\n\n*Check out how we added **[`archivebox/extractors/singlefile.py`](https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/extractors/singlefile.py)** as an example of the process: [Issue #399](https://github.com/ArchiveBox/ArchiveBox/issues/399) + [PR #403](https://github.com/ArchiveBox/ArchiveBox/pull/403).*\n\n\u003cbr/\u003e\n\n\n**The process to contribute a new extractor is like this:**\n\n\u003e [!IMPORTANT]\n\u003e This process is getting much easier after v0.8.x, there is a new plugin system under development: https://github.com/ArchiveBox/ArchiveBox/releases/tag/v0.8.4-rc\n\n1. [Open an issue](https://github.com/ArchiveBox/ArchiveBox/issues/new?assignees=\u0026labels=changes%3A+behavior%2Cstatus%3A+idea+phase\u0026template=feature_request.md\u0026title=Feature+Request%3A+...) with your propsoed implementation (please link to the pages of any new external dependencies you plan on using)\n2. Ensure any dependencies needed are easily installable via a package managers like `apt`, `brew`, `pip3`, `npm`\n   (Ideally, prefer to use external programs available via `pip3` or `npm`, however we do support using any binary installable via package manager that exposes a CLI/Python API and writes output to stdout or the filesystem.)\n3. Create a new file in [`archivebox/extractors/EXTRACTOR.py`](https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/extractors) (copy an existing extractor like [`singlefile.py`](https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/extractors/singlefile.py) as a template)\n4. Add config settings to enable/disable any new dependencies and the extractor as a whole, e.g. `USE_DEPENDENCYNAME`, `SAVE_EXTRACTORNAME`, `EXTRACTORNAME_SOMEOTHEROPTION` in [`archivebox/config.py`](https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/config.py)\n5. Add a preview section to [`archivebox/templates/core/snapshot.html`](https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/templates/core/snapshot.html) to view the output, and a column to [`archivebox/templates/core/index_row.html`](https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/templates/core/index_row.html) with an icon for your extractor\n6. Add an integration test for your extractor in [`tests/test_extractors.py`](https://github.com/ArchiveBox/ArchiveBox/blob/dev/tests/test_extractors.py)\n7. [Submit your PR for review!](https://github.com/ArchiveBox/ArchiveBox/blob/dev/.github/CONTRIBUTING.md) 🎉\n8. Once merged, please document it in these places and anywhere else you see info about other extractors:\n  - https://github.com/ArchiveBox/ArchiveBox#output-formats\n  - https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#archive-method-toggles\n  - https://github.com/ArchiveBox/ArchiveBox/wiki/Install#dependencies\n\n\u003cbr/\u003e\u003cbr/\u003e\n\n\u003c/details\u003e\n\n#### Build the docs, pip package, and docker image\n\n\u003cdetails\u003e\u003csummary\u003e\u003ci\u003eClick to expand...\u003c/i\u003e\u003c/summary\u003e\n\n(Normally CI takes care of this, but these scripts can be run to do it manually)\n```bash\n./bin/build.sh\n\n# or individually:\n./bin/build_docs.sh\n./bin/build_pip.sh\n./bin/build_docker.sh\n```\n\n\u003c/details\u003e\n\n#### Roll a release\n\n\u003cdetails\u003e\u003csummary\u003e\u003ci\u003eClick to expand...\u003c/i\u003e\u003c/summary\u003e\n\n(Normally CI takes care of this, but these scripts can be run to do it manually)\n```bash\n./bin/release.sh\n\n# or individually:\n./bin/release_docs.sh\n./bin/release_pip.sh\n./bin/release_docker.sh\n```\n\n\u003c/details\u003e\n\n---\n\n## Further Reading\n\n\u003cimg src=\"https://raw.githubusercontent.com/Monadical-SAS/redux-time/HEAD/examples/static/jeremy.jpg\" width=\"100px\" align=\"right\"/\u003e\n\n- [ArchiveBox.io Website](https://archivebox.io) / [ArchiveBox Github (Source Code)](https://github.com/ArchiveBox/ArchiveBox) / [ArchiveBox Demo Server](https://demo.archivebox.io)\n- [Documentation (Github Wiki)](https://github.com/ArchiveBox/ArchiveBox/wiki) / [API Reference Docs (ReadTheDocs)](https://docs.archivebox.io) / [Roadmap](https://github.com/ArchiveBox/ArchiveBox/wiki/Roadmap) / [Changelog](https://github.com/ArchiveBox/ArchiveBox/releases)\n- [Bug Tracker (Github Issues)](https://github.com/ArchiveBox/ArchiveBox/issues) / [Discussions (Github Discussions)](https://github.com/ArchiveBox/ArchiveBox/discussions) / [Community Chat Forum (Zulip)](https://zulip.archivebox.io)\n- Find us on social media: [Twitter `@ArchiveBoxApp`](https://twitter.com/ArchiveBoxApp), [LinkedIn](https://www.linkedin.com/company/archivebox/), [YouTube](https://www.youtube.com/@ArchiveBoxApp), [SaaSHub](https://www.saashub.com/archivebox), [Alternative.to](https://alternativeto.net/software/archivebox/about/), [Reddit](https://www.reddit.com/r/ArchiveBox/)\n\n---\n\n\u003cbr/\u003e\n\u003cdiv align=\"center\" style=\"text-align: center\"\u003e\n\u003cb\u003e\u003ca href=\"https://docs.sweeting.me/s/archivebox-consulting-services\"\u003e🏛️ Contact us for professional support 💬\u003c/a\u003e\u003c/b\u003e\u003cbr/\u003e\n\u003cbr/\u003e\u003cbr/\u003e\n\u003ca href=\"https://hcb.hackclub.com/donations/start/archivebox\"\u003e\u003cimg src=\"https://img.shields.io/badge/Donate-Directly-%13DE5D26.svg\"/\u003e\u003c/a\u003e \u0026nbsp;\n\u003ca href=\"https://github.com/sponsors/pirate\"\u003e\u003cimg src=\"https://img.shields.io/badge/Github_Sponsors-%23B7CDFE.svg\"/\u003e\u003c/a\u003e \u0026nbsp;\n\u003ca href=\"https://www.patreon.com/theSquashSH\"\u003e\u003cimg src=\"https://img.shields.io/badge/Patreon-%23DD5D76.svg\"/\u003e\u003c/a\u003e \u0026nbsp;\n\u003ca href=\"https://paypal.me/NicholasSweeting\"\u003e\u003cimg src=\"https://img.shields.io/badge/Paypal-%23FFD141.svg\"/\u003e\u003c/a\u003e \u0026nbsp;\n\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox/wiki/Donations\"\u003e\u003cimg src=\"https://img.shields.io/badge/BTC%5CETH-%231a1a1a.svg\"/\u003e\u003c/a\u003e\n\u003cbr/\u003e\n\u003csup\u003e\u003ci\u003eArchiveBox operates as a US 501(c)(3) nonprofit \u003ca href=\"https://en.wikipedia.org/wiki/Fiscal_sponsorship\"\u003eFSP\u003c/a\u003e (sponsored by \u003ca href=\"https://hackclub.com/hcb?ref=donation\"\u003eHCB\u003c/a\u003e), \u003ca href=\"https://hcb.hackclub.com/donations/start/archivebox\"\u003edirect donations\u003c/a\u003e are tax-deductible.\u003c/i\u003e\u003c/sup\u003e\n\u003cbr/\u003e\u003cbr/\u003e\n\u003ca href=\"https://twitter.com/ArchiveBoxApp\"\u003e\u003cimg src=\"https://img.shields.io/badge/Tweet-%40ArchiveBoxApp-blue.svg?style=flat\"/\u003e\u003c/a\u003e\u0026nbsp;\n\u003ca href=\"https://github.com/ArchiveBox/ArchiveBox\"\u003e\u003cimg src=\"https://img.shields.io/github/stars/ArchiveBox/ArchiveBox.svg?style=flat\u0026label=Star+on+Github\"/\u003e\u003c/a\u003e\u0026nbsp;\n\u003ca href=\"https://zulip.archivebox.io/\"\u003e\u003cimg src=\"https://img.shields.io/badge/Join_Our_Community-Zulip_Forum-%23B7EDFE.svg\"/\u003e\u003c/a\u003e\u003cbr/\u003e\u003cbr/\u003e\n\u003chr/\u003e\n\u003ci\u003e✨ Have spare CPU/disk/bandwidth after all your 网站存档爬 and want to help the world?\u003cbr/\u003eCheck out our \u003ca href=\"https://github.com/ArchiveBox/good-karma-kit\"\u003eGood Karma Kit\u003c/a\u003e...\u003c/i\u003e\n\u003c/div\u003e\n","funding_links":["https://github.com/sponsors/ArchiveBox","https://github.com/sponsors/pirate","https://donate.archivebox.io","https://paypal.me/NicholasSweeting","https://www.patreon.com/theSquashSH"],"categories":["Python","Tools \u0026 Software","HarmonyOS","Web","Apps","Overview","Backup tools","网络服务","chromium","firefox","效率 \u003ca name=\"good\"\u003e\u003c/a\u003e","Python (1887)","self-hosted","Onboarding","Web Archiving","Table of Contents"],"sub_categories":["Acquisition","Windows Manager","Availability \u0026 compatability","KnowledgeBase","Other services","Metadata removal","网络服务_其他","Capture Operators \u0026 Services","Uncategorized"],"project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2FArchiveBox%2FArchiveBox","html_url":"https://awesome.ecosyste.ms/projects/github.com%2FArchiveBox%2FArchiveBox","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2FArchiveBox%2FArchiveBox/lists"}