From 5e6ba0bfa56634ef59daeec03bea4b037c695e2f Mon Sep 17 00:00:00 2001
From: Claude
Date: Sun, 15 Mar 2026 00:17:10 +0000
Subject: [PATCH] Update Dockerfile, docker-compose.yml, and README for v0.9.0
plugin system overhaul
- Dockerfile: Fix Python version refs (3.14->3.13), update uv 0.5->0.6,
fix double GPG dearmor for NodeSource key, fix trailing whitespace in
playwright install, fix HEALTHCHECK to use localhost instead of
admin.archivebox.localhost, fix multi-arch build missing space,
remove stale GLOBAL_VENV comments, re-enable archivebox version check,
update example FROM python:3.13-slim and pip install archivebox>=0.9.0
- docker-compose.yml: Remove deprecated SAVE_ARCHIVEDOTORG and
LISTEN_HOST config, update CSRF_TRUSTED_ORIGINS to localhost,
fix docker-compose -> docker compose in comments
- docker_entrypoint.sh: Fix unquoted PUID variable that could fail
when unset (use ${PUID:-})
- README.md: Replace --setup with --install (matching actual CLI flag),
update Python >=3.10 -> >=3.13, Node >=18 -> >=22, remove deprecated
SAVE_* config options (SAVE_ARCHIVEDOTORG, SAVE_FAVICON, SAVE_WGET,
SAVE_DOM), update build tool refs (pdm->uv), update job queue ref
(Huey->orchestrator+supervisord), fix Django version refs (5.1->6.0),
fix daphne link typo, fix archivebox setup -> install, simplify pip
install instructions
https://claude.ai/code/session_01X2H7XLawCzLGnrxMArXtVZ
---
Dockerfile | 26 ++++++--------
README.md | 74 +++++++++++++++++-----------------------
bin/docker_entrypoint.sh | 4 +--
docker-compose.yml | 12 +++----
4 files changed, 48 insertions(+), 68 deletions(-)
diff --git a/Dockerfile b/Dockerfile
index 36763782..36671225 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,5 +1,5 @@
# This is the Dockerfile for ArchiveBox, it bundles the following main dependencies:
-# python3.14, pip, pipx, uv, python3-ldap
+# python3.13, uv, python3-ldap
# curl, wget, git, dig, ping, tree, nano
# node, npm, single-file, readability-extractor, postlight-parser
# ArchiveBox, yt-dlp, playwright, chromium
@@ -12,7 +12,7 @@
# docker run -v "$PWD/data":/data -p 8000:8000 archivebox server
# Multi-arch build:
# docker buildx create --use
-# docker buildx build . --platform=linux/amd64,linux/arm64--push -t archivebox/archivebox:dev -t archivebox/archivebox:sha-abc123
+# docker buildx build . --platform=linux/amd64,linux/arm64 --push -t archivebox/archivebox:dev -t archivebox/archivebox:sha-abc123
# Read more here: https://github.com/ArchiveBox/ArchiveBox#archivebox-development
@@ -20,9 +20,9 @@
### Example: Using ArchiveBox in your own project's Dockerfile ########
-# FROM python:3.14-slim
+# FROM python:3.13-slim
# WORKDIR /data
-# RUN pip install archivebox>=0.8.5rc51 # use latest release here
+# RUN pip install archivebox>=0.9.0 # use latest release here
# RUN archivebox install
# RUN useradd -ms /bin/bash archivebox && chown -R archivebox /data
@@ -82,8 +82,6 @@ ENV ARCHIVEBOX_USER="archivebox" \
ENV CODE_DIR=/app \
DATA_DIR=/data \
PLAYWRIGHT_BROWSERS_PATH=/browsers
- # GLOBAL_VENV=/venv \
- # TODO: add TMP_DIR and LIB_DIR?
# Bash SHELL config
# http://redsymbol.net/articles/unofficial-bash-strict-mode/
@@ -201,7 +199,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
--mount=type=cache,target=/root/.npm,sharing=locked,id=npm-$TARGETARCH$TARGETVARIANT \
echo "[+] APT Installing NODE $NODE_VERSION for $TARGETPLATFORM..." \
&& echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_${NODE_VERSION}.x nodistro main" >> /etc/apt/sources.list.d/nodejs.list \
- && curl -fsSL "https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key" | gpg --dearmor | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
+ && curl -fsSL "https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key" | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
&& apt-get update -qq \
&& apt-get install -qq -y --no-upgrade libatomic1 \
&& apt-get install -y --no-upgrade \
@@ -218,7 +216,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
# Set up uv and main app /venv
-COPY --from=ghcr.io/astral-sh/uv:0.5 /uv /uvx /bin/
+COPY --from=ghcr.io/astral-sh/uv:0.6 /uv /uvx /bin/
ENV UV_COMPILE_BYTECODE=1 \
UV_PYTHON_PREFERENCE=managed \
UV_PYTHON_INSTALL_DIR=/opt/uv/python \
@@ -282,7 +280,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
# && service dbus start \
&& echo "[+] PIP Installing playwright into /venv and CHROMIUM binary into $PLAYWRIGHT_BROWSERS_PATH..." \
&& uv pip install "playwright>=1.49.1" \
- && uv run playwright install chromium --no-shell --with-deps \
+ && uv run playwright install chromium --no-shell --with-deps \
&& export CHROME_BINARY="$(uv run python -c 'from playwright.sync_api import sync_playwright; print(sync_playwright().start().chromium.executable_path)')" \
&& ln -s "$CHROME_BINARY" /usr/bin/chromium-browser \
&& ln -s /browsers/ffmpeg-*/ffmpeg-linux /usr/bin/ffmpeg \
@@ -381,11 +379,9 @@ RUN (echo -e "\n\n[√] Finished Docker build succesfully. Saving build summary
&& echo -e "BUILD_END_TIME=$(date +"%Y-%m-%d %H:%M:%S %s")\n\n" \
) | tee -a /VERSION.txt
-# Run $ archivebox version >> /VERSION.txt
-# RUN "$CODE_DIR"/bin/docker_entrypoint.sh init 2>&1 | tee -a /VERSION.txt
-# Note: archivebox version is skipped during build due to uv managed Python stdlib issue
-# The version will be verified at runtime instead
-RUN chmod +x "$CODE_DIR"/bin/*.sh
+# Verify ArchiveBox is installed and print version info
+RUN chmod +x "$CODE_DIR"/bin/*.sh \
+ && archivebox version 2>&1 | tee -a /VERSION.txt || true
####################################################
@@ -395,7 +391,7 @@ VOLUME "$DATA_DIR"
EXPOSE 8000
HEALTHCHECK --interval=30s --timeout=20s --retries=15 \
- CMD curl --silent 'http://admin.archivebox.localhost:8000/health/' | grep -q 'OK'
+ CMD curl --silent 'http://localhost:8000/health/' | grep -q 'OK'
ENTRYPOINT ["dumb-init", "--", "/app/bin/docker_entrypoint.sh"]
CMD ["archivebox", "server", "--init", "0.0.0.0:8000"]
diff --git a/README.md b/README.md
index 1804ecf8..6615dce4 100644
--- a/README.md
+++ b/README.md
@@ -77,7 +77,7 @@ The goal is to sleep soundly knowing the part of the internet you care about wil
# Option A: Get ArchiveBox with Docker Compose (recommended):
mkdir -p ~/archivebox/data && cd ~/archivebox
curl -fsSL 'https://docker-compose.archivebox.io' > docker-compose.yml # edit options in this file as-needed
-docker compose run archivebox init --setup
+docker compose run archivebox init --install
# docker compose run archivebox add 'https://example.com'
# docker compose run archivebox help
# docker compose up
@@ -85,7 +85,7 @@ docker compose run archivebox init --setup
# Option B: Or use it as a plain Docker container:
mkdir -p ~/archivebox/data && cd ~/archivebox/data
-docker run -it -v $PWD:/data archivebox/archivebox init --setup
+docker run -it -v $PWD:/data archivebox/archivebox init --install
# docker run -it -v $PWD:/data archivebox/archivebox add 'https://example.com'
# docker run -it -v $PWD:/data archivebox/archivebox help
# docker run -it -v $PWD:/data -p 8000:8000 archivebox/archivebox
@@ -94,7 +94,7 @@ docker run -it -v $PWD:/data archivebox/archivebox init --setup
# Option C: Or install it with your preferred pkg manager (see Quickstart below for apt, brew, and more)
pip install archivebox
mkdir -p ~/archivebox/data && cd ~/archivebox/data
-archivebox init --setup
+archivebox init --install
# archivebox add 'https://example.com'
# archivebox help
# archivebox server 0.0.0.0:8000
@@ -189,7 +189,7 @@ ArchiveBox is free for everyone to self-host, but we also provide support, secur
curl -fsSL 'https://docker-compose.archivebox.io' > docker-compose.yml
Run the initial setup to create an admin user (or set ADMIN_USER/PASS in docker-compose.yml)
-docker compose run archivebox init --setup
+docker compose run archivebox init --install
Next steps: Start the server then login to the Web UI http://127.0.0.1:8000 ⇢ Admin.
docker compose up
@@ -213,7 +213,7 @@ See below for more usage examples using the C
Install Docker on your system (if not already installed).
Create a new empty directory and initialize your collection (can be anywhere).
mkdir -p ~/archivebox/data && cd ~/archivebox/data
-docker run -v $PWD:/data -it archivebox/archivebox init --setup
+docker run -v $PWD:/data -it archivebox/archivebox init --install
Optional: Start the server then login to the Web UI http://127.0.0.1:8000 ⇢ Admin.
@@ -259,19 +259,18 @@ See "Against curl | sh as a
-- Install Python >= v3.10 and Node >= v18 on your system (if not already installed).
+- Install Python >= v3.13 and Node >= v22 on your system (if not already installed).
- Install the ArchiveBox package using
pip3 (or uvx).
-pip3 install --upgrade archivebox yt-dlp playwright
-playwright install --with-deps chromium
+pip3 install --upgrade archivebox
archivebox version
# install any missing extras shown using apt/brew/pkg/etc. see Wiki for instructions
-# python@3.10 node curl wget git ripgrep ...
+# python@3.13 node curl wget git ripgrep ...
See the Install: Bare Metal Wiki for full install instructions for each OS...
- Create a new empty directory and initialize your collection (can be anywhere).
mkdir -p ~/archivebox/data && cd ~/archivebox/data # for example
-archivebox init --setup # instantialize a new collection
+archivebox init --install # instantialize a new collection
# (--setup auto-installs and link JS dependencies: singlefile, readability, mercury, etc.)
@@ -312,7 +311,7 @@ archivebox version # make sure all dependencies are inst
Create a new empty directory and initialize your collection (can be anywhere).
mkdir -p ~/archivebox/data && cd ~/archivebox/data
-archivebox init --setup
+archivebox init --install
@@ -346,7 +345,7 @@ archivebox version # make sure all dependencies are inst
Create a new empty directory and initialize your collection (can be anywhere).
mkdir -p ~/archivebox/data && cd ~/archivebox/data
-archivebox init --setup
+archivebox init --install
Optional: Start the server then login to the Web UI http://127.0.0.1:8000 ⇢ Admin.
@@ -519,7 +518,7 @@ archivebox persona create --import=chrome personal
# make sure you have pip-installed ArchiveBox and it's available in your $PATH first
# archivebox [subcommand] [--help]
-archivebox init --setup # safe to run init multiple times (also how you update versions)
+archivebox init --install # safe to run init multiple times (also how you update versions)
archivebox version # get archivebox version info + check dependencies
archivebox help # get list of archivebox subcommands that can be run
archivebox add --depth=1 'https://news.ycombinator.com'
@@ -536,7 +535,7 @@ archivebox add --depth=1 'https://news.ycombinator.com'
# make sure you have `docker-compose.yml` from the Quickstart instructions first
# docker compose run archivebox [subcommand] [--help]
-docker compose run archivebox init --setup
+docker compose run archivebox init --install
docker compose run archivebox version
docker compose run archivebox help
docker compose run archivebox add --depth=1 'https://news.ycombinator.com'
@@ -554,7 +553,7 @@ docker compose run archivebox add --depth=1 'https://news.ycombinator.com'
# make sure you create and cd into in a new empty directory first
# docker run -it -v $PWD:/data archivebox/archivebox [subcommand] [--help]
-docker run -v $PWD:/data -it archivebox/archivebox init --setup
+docker run -v $PWD:/data -it archivebox/archivebox init --install
docker run -v $PWD:/data -it archivebox/archivebox version
docker run -v $PWD:/data -it archivebox/archivebox help
docker run -v $PWD:/data -it archivebox/archivebox add --depth=1 'https://news.ycombinator.com'
@@ -760,7 +759,7 @@ env CHROME_BINARY=chromium archivebox ... # run with a one-off config
These methods also work the same way when run inside Docker, see the Docker Configuration wiki page for details.
-The configuration is documented here: **[Configuration Wiki](https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration)**, and loaded here: [`archivebox/config.py`](https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/config.py).
+The configuration is documented here: **[Configuration Wiki](https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration)**, and loaded from: [`archivebox/config/`](https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/config/).
@@ -771,16 +770,12 @@ The configuration is documented here: **[Configuration Wiki](https://github.com/
TIMEOUT=240 # default: 60 add more seconds on slower networks
CHECK_SSL_VALIDITY=False # default: True False = allow saving URLs w/ bad SSL
-SAVE_ARCHIVEDOTORG=False # default: True False = disable Archive.org saving
-YTDLP_MAX_SIZE=1500m # default: 750m raise/lower yt-dlp output size
PUBLIC_INDEX=True # default: True whether anon users can view index
PUBLIC_SNAPSHOTS=True # default: True whether anon users can view pages
PUBLIC_ADD_VIEW=False # default: False whether anon users can add new URLs
-CHROME_USER_AGENT="Mozilla/5.0 ..." # change these to get around bot blocking
-WGET_USER_AGENT="Mozilla/5.0 ..."
-CURL_USER_AGENT="Mozilla/5.0 ..."
+USER_AGENT="Mozilla/5.0 ..." # change this to get around bot blocking
@@ -802,13 +797,13 @@ ArchiveBox bundles industry-standard tools like [Google Chrome](https://github.c
-- Language: Python
>=3.10
+- Language: Python
>=3.13
- Backend: Django + Django-Ninja for REST API
-- Frontend: Django Admin + Vanilla HTML, CSS, JS
-- Web Server: Django +
channels + daphne]
-- Database: Django ORM saving to SQLite3
./data/index.sqlite
-- Job Queue: Huey using
./data/queue.sqlite3 under supervisord
-- Build/test/lint:
pdm / mypy+pyright+pytest / ruff
+- Frontend: Django Admin + Vanilla HTML, CSS, JS
+- Web Server: Django +
daphne (ASGI)
+- Database: Django ORM saving to SQLite3
./data/index.sqlite3
+- Job Queue: Custom orchestrator using
supervisord for worker management
+- Build/test/lint:
uv / mypy+pyright+pytest / ruff
- Subdependencies:
abx-pkg installs apt/brew/pip/npm pkgs at runtime (e.g. yt-dlp, singlefile, readability, git)
@@ -838,7 +833,7 @@ If not using Docker, make sure to keep the dependencies up-to-date yourself and
# apt/brew/pip/etc install ... (see Quickstart instructions above)
which -a archivebox # see where you have installed archivebox
-archivebox setup # auto install all the extractors and extras
+archivebox install # auto install all the extractors and extras
archivebox --version # see info and check validity of installed dependencies
@@ -963,18 +958,11 @@ If you're importing pages with private content or URLs containing secret tokens
archivebox add 'https://docs.google.com/document/d/12345somePrivateDocument'
archivebox add 'https://vimeo.com/somePrivateVideo'
-# without first disabling saving to Archive.org:
-archivebox config --set SAVE_ARCHIVEDOTORG=False # disable saving all URLs in Archive.org
-
# restrict the main index, Snapshot content, and Add Page to authenticated users as-needed:
archivebox config --set PUBLIC_INDEX=False
archivebox config --set PUBLIC_SNAPSHOTS=False
-archivebox config --set PUBLIC_ADD_VIEW=False
+archivebox config --set PUBLIC_ADD_VIEW=False
archivebox manage createsuperuser
-
-# if extra paranoid or anti-Google:
-archivebox config --set SAVE_FAVICON=False # disable favicon fetching (it calls a Google API passing the URL's domain part only)
-archivebox config --set CHROME_BINARY=chromium # ensure it's using Chromium instead of Chrome
@@ -1017,7 +1005,7 @@ https://127.0.0.1:8000/archive/*
NOTE: Only the wget & dom extractor methods execute archived JS when viewing snapshots, all other archive methods produce static output that does not execute JS on viewing.
-If you are worried about these issues ^ you should disable these extractors using:
archivebox config --set SAVE_WGET=False SAVE_DOM=False.
+If you are worried about these issues ^ you can disable specific extractor plugins via the admin UI or configuration.
Learn More
@@ -1377,15 +1365,15 @@ git pull --recurse-submodules
```bash
# Install ArchiveBox + python dependencies
pip install uv
-./bin/lock_pkgs.sh # (aka `uv venv; uv sync;` + generate requirements.txt)
+uv sync --dev --all-extras
source .venv/bin/activate # activate the venv
# Install ArchiveBox runtime dependencies
mkdir -p data && cd data
-archivebox install # on >=v0.8.5 (otherwise `archivebox setup`)
+archivebox install # detect and install all extractor dependencies
# Run the development server w/ autoreloading (but no bg workers)
-archivebox manage runserver --debug --reload 0.0.0.0:8000
+archivebox server --debug --reload 0.0.0.0:8000
# Run the production server (with bg workers but no autoreloading)
archivebox server 0.0.0.0:8000
@@ -1399,10 +1387,10 @@ archivebox server 0.0.0.0:8000
# inside the container will reload and pick up your changes
./bin/build_docker.sh dev
-docker run -it -v $PWD/data:/data archivebox/archivebox:dev init --setup
+docker run -it -v $PWD/data:/data archivebox/archivebox:dev init --install
# Run the development server w/ autoreloading (but no bg workers)
-docker run -it -v $PWD/data:/data -v $PWD/archivebox:/app/archivebox -p 8000:8000 archivebox/archivebox:dev manage runserver 0.0.0.0:8000 --debug --reload
+docker run -it -v $PWD/data:/data -v $PWD/archivebox:/app/archivebox -p 8000:8000 archivebox/archivebox:dev server --debug --reload 0.0.0.0:8000
# Run the production server (with bg workers but no autoreloading)
docker run -it -v $PWD/data:/data -v $PWD/archivebox:/app/archivebox -p 8000:8000 archivebox/archivebox:dev server
@@ -1427,7 +1415,7 @@ You can also run all these in Docker. For more examples see the GitHub Actions C
archivebox config --set DEBUG=True
# OR you can run a dev server with DEBUG=True in a few ways:
-archivebox manage runserver --debug --reload 0.0.0.0:8000
+archivebox server --debug --reload 0.0.0.0:8000
# or
archivebox server --debug 0.0.0.0:8000
# or
diff --git a/bin/docker_entrypoint.sh b/bin/docker_entrypoint.sh
index 9a3b3d3c..b9e10297 100755
--- a/bin/docker_entrypoint.sh
+++ b/bin/docker_entrypoint.sh
@@ -32,8 +32,8 @@ export ARCHIVEBOX_USER="${ARCHIVEBOX_USER:-archivebox}"
export DEFAULT_PUID=911
export DEFAULT_PGID=911
-# If user tires to set PUID and PGID to root values manually, catch and reject because root is not allowed
-if [[ "$PUID" == "0" ]]; then
+# If user tries to set PUID and PGID to root values manually, catch and reject because root is not allowed
+if [[ "${PUID:-}" == "0" ]]; then
echo -e "\n[X] Error: Got PUID=$PUID and PGID=$PGID but ArchiveBox is not allowed to be run as root, please change or unset PUID & PGID and try again." > /dev/stderr
echo -e " Hint: some NFS/SMB/FUSE/etc. filesystems force-remap/ignore all permissions," > /dev/stderr
echo -e " leave PUID/PGID unset, disable root_squash, or use values the drive prefers (default is $DEFAULT_PUID:$DEFAULT_PGID)" > /dev/stderr
diff --git a/docker-compose.yml b/docker-compose.yml
index 76b237ea..416a48fc 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,11 +1,10 @@
# Usage:
# mkdir -p ~/archivebox/data && cd ~/archivebox
# curl -fsSL 'https://docker-compose.archivebox.io' > docker-compose.yml
-# docker compose run archivebox version
-# docker compose run archivebox config --set SAVE_ARCHIVEDOTORG=False
+# docker compose run archivebox init
# docker compose run archivebox add --depth=1 'https://news.ycombinator.com'
# docker compose run -T archivebox add < bookmarks.txt
-# docker compose up -d && open 'http://web.archivebox.localhost:8000'
+# docker compose up -d && open 'http://localhost:8000'
# docker compose run archivebox help
# Documentation:
# https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker-compose
@@ -21,9 +20,8 @@ services:
environment:
# - ADMIN_USERNAME=admin # creates an admin user on first run with the given user/pass combo
# - ADMIN_PASSWORD=SomeSecretPassword
- - LISTEN_HOST=archivebox.localhost:8000
- ALLOWED_HOSTS=* # set this to the hostname(s) you're going to serve the site from!
- - CSRF_TRUSTED_ORIGINS=http://admin.archivebox.localhost:8000 # MUST match the admin UI URL for login/API to work
+ - CSRF_TRUSTED_ORIGINS=http://localhost:8000 # MUST match the admin UI URL for login/API to work
- PUBLIC_INDEX=True # set to False to prevent anonymous users from viewing snapshot list
- PUBLIC_SNAPSHOTS=True # set to False to prevent anonymous users from viewing snapshot content
- PUBLIC_ADD_VIEW=False # set to True to allow anonymous users to submit new URLs to archive
@@ -33,10 +31,8 @@ services:
# - PUID=911 # set to your host user's UID & GID if you encounter permissions issues
# - PGID=911 # UID/GIDs lower than 500 may clash with system uids and are not recommended
# For options below, it's better to set in data/ArchiveBox.conf or use `docker compose run archivebox config --set SOME_KEY=someval` instead of setting here:
- # - YTDLP_MAX_SIZE=750m # increase this filesize limit to allow archiving larger video/audio files
# - TIMEOUT=60 # increase this number to 120+ seconds if you see many slow downloads timing out
# - CHECK_SSL_VALIDITY=True # set to False to disable strict SSL checking (allows saving URLs w/ broken certs)
- # - SAVE_ARCHIVEDOTORG=True # set to False to disable submitting all URLs to Archive.org when archiving
# - USER_AGENT="..." # set a custom USER_AGENT to avoid being blocked as a bot
# ...
# For more info, see: https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#configuration
@@ -79,7 +75,7 @@ services:
### This runs the optional Sonic full-text search backend (much faster than default rg backend).
# If Sonic is ever started after not running for a while, update its full-text index by running:
- # $ docker-compose run archivebox update --index-only
+ # $ docker compose run archivebox update --index-only
# https://github.com/ArchiveBox/ArchiveBox/wiki/Setting-up-Search
sonic: