diff --git a/Dockerfile b/Dockerfile index 36763782..36671225 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # This is the Dockerfile for ArchiveBox, it bundles the following main dependencies: -# python3.14, pip, pipx, uv, python3-ldap +# python3.13, uv, python3-ldap # curl, wget, git, dig, ping, tree, nano # node, npm, single-file, readability-extractor, postlight-parser # ArchiveBox, yt-dlp, playwright, chromium @@ -12,7 +12,7 @@ # docker run -v "$PWD/data":/data -p 8000:8000 archivebox server # Multi-arch build: # docker buildx create --use -# docker buildx build . --platform=linux/amd64,linux/arm64--push -t archivebox/archivebox:dev -t archivebox/archivebox:sha-abc123 +# docker buildx build . --platform=linux/amd64,linux/arm64 --push -t archivebox/archivebox:dev -t archivebox/archivebox:sha-abc123 # Read more here: https://github.com/ArchiveBox/ArchiveBox#archivebox-development @@ -20,9 +20,9 @@ ### Example: Using ArchiveBox in your own project's Dockerfile ######## -# FROM python:3.14-slim +# FROM python:3.13-slim # WORKDIR /data -# RUN pip install archivebox>=0.8.5rc51 # use latest release here +# RUN pip install archivebox>=0.9.0 # use latest release here # RUN archivebox install # RUN useradd -ms /bin/bash archivebox && chown -R archivebox /data @@ -82,8 +82,6 @@ ENV ARCHIVEBOX_USER="archivebox" \ ENV CODE_DIR=/app \ DATA_DIR=/data \ PLAYWRIGHT_BROWSERS_PATH=/browsers - # GLOBAL_VENV=/venv \ - # TODO: add TMP_DIR and LIB_DIR? # Bash SHELL config # http://redsymbol.net/articles/unofficial-bash-strict-mode/ @@ -201,7 +199,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T --mount=type=cache,target=/root/.npm,sharing=locked,id=npm-$TARGETARCH$TARGETVARIANT \ echo "[+] APT Installing NODE $NODE_VERSION for $TARGETPLATFORM..." \ && echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_${NODE_VERSION}.x nodistro main" >> /etc/apt/sources.list.d/nodejs.list \ - && curl -fsSL "https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key" | gpg --dearmor | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \ + && curl -fsSL "https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key" | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \ && apt-get update -qq \ && apt-get install -qq -y --no-upgrade libatomic1 \ && apt-get install -y --no-upgrade \ @@ -218,7 +216,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T # Set up uv and main app /venv -COPY --from=ghcr.io/astral-sh/uv:0.5 /uv /uvx /bin/ +COPY --from=ghcr.io/astral-sh/uv:0.6 /uv /uvx /bin/ ENV UV_COMPILE_BYTECODE=1 \ UV_PYTHON_PREFERENCE=managed \ UV_PYTHON_INSTALL_DIR=/opt/uv/python \ @@ -282,7 +280,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T # && service dbus start \ && echo "[+] PIP Installing playwright into /venv and CHROMIUM binary into $PLAYWRIGHT_BROWSERS_PATH..." \ && uv pip install "playwright>=1.49.1" \ - && uv run playwright install chromium --no-shell --with-deps \ + && uv run playwright install chromium --no-shell --with-deps \ && export CHROME_BINARY="$(uv run python -c 'from playwright.sync_api import sync_playwright; print(sync_playwright().start().chromium.executable_path)')" \ && ln -s "$CHROME_BINARY" /usr/bin/chromium-browser \ && ln -s /browsers/ffmpeg-*/ffmpeg-linux /usr/bin/ffmpeg \ @@ -381,11 +379,9 @@ RUN (echo -e "\n\n[√] Finished Docker build succesfully. Saving build summary && echo -e "BUILD_END_TIME=$(date +"%Y-%m-%d %H:%M:%S %s")\n\n" \ ) | tee -a /VERSION.txt -# Run $ archivebox version >> /VERSION.txt -# RUN "$CODE_DIR"/bin/docker_entrypoint.sh init 2>&1 | tee -a /VERSION.txt -# Note: archivebox version is skipped during build due to uv managed Python stdlib issue -# The version will be verified at runtime instead -RUN chmod +x "$CODE_DIR"/bin/*.sh +# Verify ArchiveBox is installed and print version info +RUN chmod +x "$CODE_DIR"/bin/*.sh \ + && archivebox version 2>&1 | tee -a /VERSION.txt || true #################################################### @@ -395,7 +391,7 @@ VOLUME "$DATA_DIR" EXPOSE 8000 HEALTHCHECK --interval=30s --timeout=20s --retries=15 \ - CMD curl --silent 'http://admin.archivebox.localhost:8000/health/' | grep -q 'OK' + CMD curl --silent 'http://localhost:8000/health/' | grep -q 'OK' ENTRYPOINT ["dumb-init", "--", "/app/bin/docker_entrypoint.sh"] CMD ["archivebox", "server", "--init", "0.0.0.0:8000"] diff --git a/README.md b/README.md index 1804ecf8..6615dce4 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ The goal is to sleep soundly knowing the part of the internet you care about wil
# Option A: Get ArchiveBox with Docker Compose (recommended):
mkdir -p ~/archivebox/data && cd ~/archivebox
curl -fsSL 'https://docker-compose.archivebox.io' > docker-compose.yml # edit options in this file as-needed
-docker compose run archivebox init --setup
+docker compose run archivebox init --install
# docker compose run archivebox add 'https://example.com'
# docker compose run archivebox help
# docker compose up
@@ -85,7 +85,7 @@ docker compose run archivebox init --setup
# Option B: Or use it as a plain Docker container:
mkdir -p ~/archivebox/data && cd ~/archivebox/data
-docker run -it -v $PWD:/data archivebox/archivebox init --setup
+docker run -it -v $PWD:/data archivebox/archivebox init --install
# docker run -it -v $PWD:/data archivebox/archivebox add 'https://example.com'
# docker run -it -v $PWD:/data archivebox/archivebox help
# docker run -it -v $PWD:/data -p 8000:8000 archivebox/archivebox
@@ -94,7 +94,7 @@ docker run -it -v $PWD:/data archivebox/archivebox init --setup
# Option C: Or install it with your preferred pkg manager (see Quickstart below for apt, brew, and more)
pip install archivebox
mkdir -p ~/archivebox/data && cd ~/archivebox/data
-archivebox init --setup
+archivebox init --install
# archivebox add 'https://example.com'
# archivebox help
# archivebox server 0.0.0.0:8000
@@ -189,7 +189,7 @@ ArchiveBox is free for everyone to self-host, but we also provide support, secur
curl -fsSL 'https://docker-compose.archivebox.io' > docker-compose.yml
docker compose run archivebox init --setup
+docker compose run archivebox init --install
docker compose up
@@ -213,7 +213,7 @@ See below for more usage examples using the C
Install Docker on your system (if not already installed).
Create a new empty directory and initialize your collection (can be anywhere).
mkdir -p ~/archivebox/data && cd ~/archivebox/data
-docker run -v $PWD:/data -it archivebox/archivebox init --setup
+docker run -v $PWD:/data -it archivebox/archivebox init --install
Optional: Start the server then login to the Web UI http://127.0.0.1:8000 ⇢ Admin.
@@ -259,19 +259,18 @@ See "Against curl | sh as a
-- Install Python >= v3.10 and Node >= v18 on your system (if not already installed).
+- Install Python >= v3.13 and Node >= v22 on your system (if not already installed).
- Install the ArchiveBox package using
pip3 (or uvx).
-pip3 install --upgrade archivebox yt-dlp playwright
-playwright install --with-deps chromium
+pip3 install --upgrade archivebox
archivebox version
# install any missing extras shown using apt/brew/pkg/etc. see Wiki for instructions
-# python@3.10 node curl wget git ripgrep ...
+# python@3.13 node curl wget git ripgrep ...
See the Install: Bare Metal Wiki for full install instructions for each OS...
- Create a new empty directory and initialize your collection (can be anywhere).
mkdir -p ~/archivebox/data && cd ~/archivebox/data # for example
-archivebox init --setup # instantialize a new collection
+archivebox init --install # instantialize a new collection
# (--setup auto-installs and link JS dependencies: singlefile, readability, mercury, etc.)
@@ -312,7 +311,7 @@ archivebox version # make sure all dependencies are inst
Create a new empty directory and initialize your collection (can be anywhere).
mkdir -p ~/archivebox/data && cd ~/archivebox/data
-archivebox init --setup
+archivebox init --install
@@ -346,7 +345,7 @@ archivebox version # make sure all dependencies are inst
mkdir -p ~/archivebox/data && cd ~/archivebox/data
-archivebox init --setup
+archivebox init --install
>=3.10>=3.13channels + daphne]./data/index.sqlite./data/queue.sqlite3 under supervisordpdm / mypy+pyright+pytest / ruffdaphne (ASGI)./data/index.sqlite3supervisord for worker managementuv / mypy+pyright+pytest / ruffabx-pkg installs apt/brew/pip/npm pkgs at runtime (e.g. yt-dlp, singlefile, readability, git)@@ -1017,7 +1005,7 @@ https://127.0.0.1:8000/archive/*NOTE: Only the
+If you are worried about these issues ^ you can disable specific extractor plugins via the admin UI or configuration.wget&domextractor methods execute archived JS when viewing snapshots, all other archive methods produce static output that does not execute JS on viewing.
-If you are worried about these issues ^ you should disable these extractors using:
archivebox config --set SAVE_WGET=False SAVE_DOM=False.Learn More
@@ -1377,15 +1365,15 @@ git pull --recurse-submodules ```bash # Install ArchiveBox + python dependencies pip install uv -./bin/lock_pkgs.sh # (aka `uv venv; uv sync;` + generate requirements.txt) +uv sync --dev --all-extras source .venv/bin/activate # activate the venv # Install ArchiveBox runtime dependencies mkdir -p data && cd data -archivebox install # on >=v0.8.5 (otherwise `archivebox setup`) +archivebox install # detect and install all extractor dependencies # Run the development server w/ autoreloading (but no bg workers) -archivebox manage runserver --debug --reload 0.0.0.0:8000 +archivebox server --debug --reload 0.0.0.0:8000 # Run the production server (with bg workers but no autoreloading) archivebox server 0.0.0.0:8000 @@ -1399,10 +1387,10 @@ archivebox server 0.0.0.0:8000 # inside the container will reload and pick up your changes ./bin/build_docker.sh dev -docker run -it -v $PWD/data:/data archivebox/archivebox:dev init --setup +docker run -it -v $PWD/data:/data archivebox/archivebox:dev init --install # Run the development server w/ autoreloading (but no bg workers) -docker run -it -v $PWD/data:/data -v $PWD/archivebox:/app/archivebox -p 8000:8000 archivebox/archivebox:dev manage runserver 0.0.0.0:8000 --debug --reload +docker run -it -v $PWD/data:/data -v $PWD/archivebox:/app/archivebox -p 8000:8000 archivebox/archivebox:dev server --debug --reload 0.0.0.0:8000 # Run the production server (with bg workers but no autoreloading) docker run -it -v $PWD/data:/data -v $PWD/archivebox:/app/archivebox -p 8000:8000 archivebox/archivebox:dev server @@ -1427,7 +1415,7 @@ You can also run all these in Docker. For more examples see the GitHub Actions C archivebox config --set DEBUG=True # OR you can run a dev server with DEBUG=True in a few ways: -archivebox manage runserver --debug --reload 0.0.0.0:8000 +archivebox server --debug --reload 0.0.0.0:8000 # or archivebox server --debug 0.0.0.0:8000 # or diff --git a/bin/docker_entrypoint.sh b/bin/docker_entrypoint.sh index 9a3b3d3c..b9e10297 100755 --- a/bin/docker_entrypoint.sh +++ b/bin/docker_entrypoint.sh @@ -32,8 +32,8 @@ export ARCHIVEBOX_USER="${ARCHIVEBOX_USER:-archivebox}" export DEFAULT_PUID=911 export DEFAULT_PGID=911 -# If user tires to set PUID and PGID to root values manually, catch and reject because root is not allowed -if [[ "$PUID" == "0" ]]; then +# If user tries to set PUID and PGID to root values manually, catch and reject because root is not allowed +if [[ "${PUID:-}" == "0" ]]; then echo -e "\n[X] Error: Got PUID=$PUID and PGID=$PGID but ArchiveBox is not allowed to be run as root, please change or unset PUID & PGID and try again." > /dev/stderr echo -e " Hint: some NFS/SMB/FUSE/etc. filesystems force-remap/ignore all permissions," > /dev/stderr echo -e " leave PUID/PGID unset, disable root_squash, or use values the drive prefers (default is $DEFAULT_PUID:$DEFAULT_PGID)" > /dev/stderr diff --git a/docker-compose.yml b/docker-compose.yml index 76b237ea..416a48fc 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,11 +1,10 @@ # Usage: # mkdir -p ~/archivebox/data && cd ~/archivebox # curl -fsSL 'https://docker-compose.archivebox.io' > docker-compose.yml -# docker compose run archivebox version -# docker compose run archivebox config --set SAVE_ARCHIVEDOTORG=False +# docker compose run archivebox init # docker compose run archivebox add --depth=1 'https://news.ycombinator.com' # docker compose run -T archivebox add < bookmarks.txt -# docker compose up -d && open 'http://web.archivebox.localhost:8000' +# docker compose up -d && open 'http://localhost:8000' # docker compose run archivebox help # Documentation: # https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker-compose @@ -21,9 +20,8 @@ services: environment: # - ADMIN_USERNAME=admin # creates an admin user on first run with the given user/pass combo # - ADMIN_PASSWORD=SomeSecretPassword - - LISTEN_HOST=archivebox.localhost:8000 - ALLOWED_HOSTS=* # set this to the hostname(s) you're going to serve the site from! - - CSRF_TRUSTED_ORIGINS=http://admin.archivebox.localhost:8000 # MUST match the admin UI URL for login/API to work + - CSRF_TRUSTED_ORIGINS=http://localhost:8000 # MUST match the admin UI URL for login/API to work - PUBLIC_INDEX=True # set to False to prevent anonymous users from viewing snapshot list - PUBLIC_SNAPSHOTS=True # set to False to prevent anonymous users from viewing snapshot content - PUBLIC_ADD_VIEW=False # set to True to allow anonymous users to submit new URLs to archive @@ -33,10 +31,8 @@ services: # - PUID=911 # set to your host user's UID & GID if you encounter permissions issues # - PGID=911 # UID/GIDs lower than 500 may clash with system uids and are not recommended # For options below, it's better to set in data/ArchiveBox.conf or use `docker compose run archivebox config --set SOME_KEY=someval` instead of setting here: - # - YTDLP_MAX_SIZE=750m # increase this filesize limit to allow archiving larger video/audio files # - TIMEOUT=60 # increase this number to 120+ seconds if you see many slow downloads timing out # - CHECK_SSL_VALIDITY=True # set to False to disable strict SSL checking (allows saving URLs w/ broken certs) - # - SAVE_ARCHIVEDOTORG=True # set to False to disable submitting all URLs to Archive.org when archiving # - USER_AGENT="..." # set a custom USER_AGENT to avoid being blocked as a bot # ... # For more info, see: https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#configuration @@ -79,7 +75,7 @@ services: ### This runs the optional Sonic full-text search backend (much faster than default rg backend). # If Sonic is ever started after not running for a while, update its full-text index by running: - # $ docker-compose run archivebox update --index-only + # $ docker compose run archivebox update --index-only # https://github.com/ArchiveBox/ArchiveBox/wiki/Setting-up-Search sonic: