test fixes

This commit is contained in:
Nick Sweeting
2026-03-23 04:12:31 -07:00
parent b749b26c5d
commit 1d94645abd
5 changed files with 120 additions and 48 deletions

View File

@@ -26,26 +26,14 @@ def is_redacted_env_key(key: str) -> bool:
def redact_env(env: dict[str, Any] | None) -> dict[str, Any]:
if not isinstance(env, dict):
return {}
return {
str(key): value
for key, value in env.items()
if key is not None and not is_redacted_env_key(str(key))
}
return {str(key): value for key, value in env.items() if key is not None and not is_redacted_env_key(str(key))}
def env_to_dotenv_text(env: dict[str, Any] | None) -> str:
redacted_env = redact_env(env)
return "\n".join(
f"{key}={shlex.quote(stringify_env_value(value))}"
for key, value in sorted(redacted_env.items())
if value is not None
)
return "\n".join(f"{key}={shlex.quote(stringify_env_value(value))}" for key, value in sorted(redacted_env.items()) if value is not None)
def env_to_shell_exports(env: dict[str, Any] | None) -> str:
redacted_env = redact_env(env)
return " ".join(
f"{key}={shlex.quote(stringify_env_value(value))}"
for key, value in sorted(redacted_env.items())
if value is not None
)
return " ".join(f"{key}={shlex.quote(stringify_env_value(value))}" for key, value in sorted(redacted_env.items()) if value is not None)

View File

@@ -1,8 +1,9 @@
"""archivebox/tests/conftest.py - Pytest fixtures for CLI tests."""
import os
import sys
import secrets
import subprocess
import sys
import tempfile
import textwrap
import time
@@ -12,12 +13,35 @@ from typing import Any
import pytest
from archivebox.uuid_compat import uuid7
pytest_plugins = ["archivebox.tests.fixtures"]
REPO_ROOT = Path(__file__).resolve().parents[2]
SESSION_DATA_DIR = Path(tempfile.mkdtemp(prefix="archivebox-pytest-session-")).resolve()
os.environ.setdefault("DATA_DIR", str(SESSION_DATA_DIR))
# Force ArchiveBox imports to see a temp DATA_DIR and cwd during test collection.
os.environ["DATA_DIR"] = str(SESSION_DATA_DIR)
os.environ.pop("CRAWL_DIR", None)
os.environ.pop("SNAP_DIR", None)
os.chdir(SESSION_DATA_DIR)
def _is_repo_path(path: Path) -> bool:
resolved = path.expanduser().resolve(strict=False)
return resolved == REPO_ROOT or REPO_ROOT in resolved.parents
def _assert_not_repo_path(path: Path, *, label: str) -> None:
if _is_repo_path(path):
raise AssertionError(f"{label} must not point inside the repo root during tests: {path}")
def _assert_safe_runtime_paths(*, cwd: Path | None = None, env: dict[str, str] | None = None) -> None:
if cwd is not None:
_assert_not_repo_path(cwd, label="cwd")
for key in ("DATA_DIR", "CRAWL_DIR", "SNAP_DIR"):
value = (env or {}).get(key)
if value:
_assert_not_repo_path(Path(value), label=key)
# =============================================================================
@@ -47,6 +71,7 @@ def run_archivebox_cmd(
"""
cmd = [sys.executable, "-m", "archivebox"] + args
_assert_not_repo_path(data_dir, label="DATA_DIR")
base_env = os.environ.copy()
base_env["DATA_DIR"] = str(data_dir)
base_env["USE_COLOR"] = "False"
@@ -71,6 +96,7 @@ def run_archivebox_cmd(
if env:
base_env.update(env)
_assert_safe_runtime_paths(cwd=data_dir, env=base_env)
result = subprocess.run(
cmd,
input=stdin,
@@ -90,7 +116,7 @@ def run_archivebox_cmd(
@pytest.fixture(autouse=True)
def isolate_test_runtime(tmp_path):
def isolate_test_runtime(tmp_path, monkeypatch):
"""
Run each pytest test from an isolated temp cwd and restore env mutations.
@@ -104,14 +130,35 @@ def isolate_test_runtime(tmp_path):
seed a separate session-scoped temp ``DATA_DIR`` above so any ArchiveBox
config imported before this fixture runs never points at the repo root.
"""
_assert_not_repo_path(tmp_path, label="tmp_path")
original_cwd = Path.cwd()
original_env = os.environ.copy()
original_chdir = os.chdir
original_popen = subprocess.Popen
os.chdir(tmp_path)
os.environ.pop("DATA_DIR", None)
os.environ.pop("CRAWL_DIR", None)
os.environ.pop("SNAP_DIR", None)
def guarded_chdir(path: os.PathLike[str] | str) -> None:
_assert_not_repo_path(Path(path), label="cwd")
original_chdir(path)
def guarded_popen(*args: Any, **kwargs: Any):
cwd = kwargs.get("cwd")
env = kwargs.get("env")
if cwd is not None:
_assert_not_repo_path(Path(cwd), label="cwd")
_assert_safe_runtime_paths(cwd=Path(cwd) if cwd is not None else None, env=env)
return original_popen(*args, **kwargs)
monkeypatch.setattr(os, "chdir", guarded_chdir)
monkeypatch.setattr(subprocess, "Popen", guarded_popen)
try:
_assert_safe_runtime_paths(cwd=Path.cwd(), env=os.environ)
yield
finally:
os.chdir(original_cwd)
original_chdir(original_cwd)
os.environ.clear()
os.environ.update(original_env)
@@ -166,14 +213,18 @@ def run_archivebox_cmd_cwd(
"""
cmd = [sys.executable, "-m", "archivebox"] + args
_assert_not_repo_path(cwd, label="cwd")
base_env = os.environ.copy()
base_env.pop("DATA_DIR", None)
base_env.pop("CRAWL_DIR", None)
base_env.pop("SNAP_DIR", None)
base_env["USE_COLOR"] = "False"
base_env["SHOW_PROGRESS"] = "False"
if env:
base_env.update(env)
_assert_safe_runtime_paths(cwd=cwd, env=base_env)
result = subprocess.run(
cmd,
input=stdin,
@@ -202,8 +253,12 @@ def run_python_cwd(
cwd: Path,
timeout: int = 60,
) -> tuple[str, str, int]:
_assert_not_repo_path(cwd, label="cwd")
base_env = os.environ.copy()
base_env.pop("DATA_DIR", None)
base_env.pop("CRAWL_DIR", None)
base_env.pop("SNAP_DIR", None)
_assert_safe_runtime_paths(cwd=cwd, env=base_env)
result = subprocess.run(
[sys.executable, "-"],
input=script,
@@ -446,7 +501,7 @@ def assert_record_has_fields(record: dict[str, Any], required_fields: list[str])
def create_test_url(domain: str = "example.com", path: str | None = None) -> str:
"""Generate unique test URL."""
path = path or uuid7().hex[:8]
path = path or secrets.token_hex(4)
return f"https://{domain}/{path}"

View File

@@ -305,6 +305,10 @@ create_release() {
if [[ "${version}" == *rc* ]]; then
prerelease_args+=(--prerelease)
fi
if gh release view "${TAG_PREFIX}${version}" --repo "${slug}" >/dev/null 2>&1; then
echo "GitHub release ${TAG_PREFIX}${version} already exists"
return 0
fi
gh release create "${TAG_PREFIX}${version}" \
--repo "${slug}" \
@@ -318,13 +322,17 @@ publish_artifacts() {
local version="$1"
local pypi_token="${UV_PUBLISH_TOKEN:-${PYPI_TOKEN:-${PYPI_PAT_SECRET:-}}}"
if [[ -n "${pypi_token}" ]]; then
UV_PUBLISH_TOKEN="${pypi_token}" uv publish --username=__token__ dist/*
elif [[ -n "${GITHUB_ACTIONS:-}" ]]; then
uv publish --trusted-publishing always dist/*
if curl -fsSL "https://pypi.org/pypi/${PYPI_PACKAGE}/json" | jq -e --arg version "${version}" '.releases[$version] | length > 0' >/dev/null 2>&1; then
echo "${PYPI_PACKAGE} ${version} already published on PyPI"
else
echo "Missing PyPI credentials: set UV_PUBLISH_TOKEN or PYPI_TOKEN" >&2
return 1
if [[ -n "${pypi_token}" ]]; then
UV_PUBLISH_TOKEN="${pypi_token}" uv publish --username=__token__ dist/*
elif [[ -n "${GITHUB_ACTIONS:-}" ]]; then
uv publish --trusted-publishing always dist/*
else
echo "Missing PyPI credentials: set UV_PUBLISH_TOKEN or PYPI_TOKEN" >&2
return 1
fi
fi
wait_for_pypi "${PYPI_PACKAGE}" "${version}"
@@ -347,15 +355,35 @@ main() {
return 1
fi
update_internal_dependencies
version="$(bump_version)"
run_checks
version="$(current_version)"
latest="$(latest_release_version "${slug}")"
if [[ -z "${latest}" ]]; then
relation="gt"
else
relation="$(compare_versions "${version}" "${latest}")"
fi
git add -A
git commit -m "release: ${TAG_PREFIX}${version}"
git push origin "${branch}"
if [[ "${relation}" == "eq" ]]; then
update_internal_dependencies
version="$(bump_version)"
run_checks
wait_for_runs "${slug}" push "$(git rev-parse HEAD)" "push"
git add -A
git commit -m "release: ${TAG_PREFIX}${version}"
git push origin "${branch}"
wait_for_runs "${slug}" push "$(git rev-parse HEAD)" "push"
elif [[ "${relation}" == "gt" ]]; then
if [[ -n "$(git status --short)" ]]; then
echo "Refusing to publish existing unreleased version ${version} with a dirty worktree" >&2
return 1
fi
run_checks
wait_for_runs "${slug}" push "$(git rev-parse HEAD)" "push"
else
echo "Current version ${version} is behind latest GitHub release ${latest}" >&2
return 1
fi
publish_artifacts "${version}"
create_release "${slug}" "${version}"

View File

@@ -26,8 +26,8 @@ services:
- PUBLIC_INDEX=True # set to False to prevent anonymous users from viewing snapshot list
- PUBLIC_SNAPSHOTS=True # set to False to prevent anonymous users from viewing snapshot content
- PUBLIC_ADD_VIEW=False # set to True to allow anonymous users to submit new URLs to archive
- SEARCH_BACKEND_ENGINE=sonic # tells ArchiveBox to use sonic container below for fast full-text search
- SEARCH_BACKEND_HOST_NAME=sonic
- SEARCH_BACKEND_ENGINE=sonic # tells ArchiveBox to use its built-in Sonic worker for fast full-text search
# - SEARCH_BACKEND_HOST_NAME=127.0.0.1
- SEARCH_BACKEND_PASSWORD=SomeSecretPassword
# - PUID=911 # set to your host user's UID & GID if you encounter permissions issues
# - PGID=911 # UID/GIDs lower than 500 may clash with system uids and are not recommended
@@ -54,20 +54,21 @@ services:
# https://github.com/ArchiveBox/ArchiveBox/wiki/Scheduled-Archiving
### This runs the optional Sonic full-text search backend (much faster than default rg backend).
### ArchiveBox now starts and uses Sonic automatically when SEARCH_BACKEND_ENGINE=sonic,
# so the old standalone docker sidecar below is no longer necessary.
# If Sonic is ever started after not running for a while, update its full-text index by running:
# $ docker compose run archivebox update --index-only
# https://github.com/ArchiveBox/ArchiveBox/wiki/Setting-up-Search
sonic:
image: archivebox/sonic:latest
expose:
- 1491
environment:
- SEARCH_BACKEND_PASSWORD=SomeSecretPassword
volumes:
#- ./sonic.cfg:/etc/sonic.cfg:ro # mount to customize: https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/stable/etc/sonic.cfg
- ./data/sonic:/var/lib/sonic/store
# sonic:
# image: archivebox/sonic:latest
# expose:
# - 1491
# environment:
# - SEARCH_BACKEND_PASSWORD=SomeSecretPassword
# volumes:
# #- ./sonic.cfg:/etc/sonic.cfg:ro # mount to customize: https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/stable/etc/sonic.cfg
# - ./data/sonic:/var/lib/sonic/store
### This optional container runs xvfb+noVNC so you can watch the ArchiveBox browser as it archives things,

2
uv.lock generated
View File

@@ -130,7 +130,7 @@ dev = [{ name = "prek", specifier = ">=0.3.6" }]
[[package]]
name = "abxbus"
version = "2.4.7"
version = "2.4.8"
source = { editable = "../abxbus" }
dependencies = [
{ name = "aiofiles", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },