mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-05 23:37:58 +10:00
test fixes
This commit is contained in:
@@ -26,26 +26,14 @@ def is_redacted_env_key(key: str) -> bool:
|
||||
def redact_env(env: dict[str, Any] | None) -> dict[str, Any]:
|
||||
if not isinstance(env, dict):
|
||||
return {}
|
||||
return {
|
||||
str(key): value
|
||||
for key, value in env.items()
|
||||
if key is not None and not is_redacted_env_key(str(key))
|
||||
}
|
||||
return {str(key): value for key, value in env.items() if key is not None and not is_redacted_env_key(str(key))}
|
||||
|
||||
|
||||
def env_to_dotenv_text(env: dict[str, Any] | None) -> str:
|
||||
redacted_env = redact_env(env)
|
||||
return "\n".join(
|
||||
f"{key}={shlex.quote(stringify_env_value(value))}"
|
||||
for key, value in sorted(redacted_env.items())
|
||||
if value is not None
|
||||
)
|
||||
return "\n".join(f"{key}={shlex.quote(stringify_env_value(value))}" for key, value in sorted(redacted_env.items()) if value is not None)
|
||||
|
||||
|
||||
def env_to_shell_exports(env: dict[str, Any] | None) -> str:
|
||||
redacted_env = redact_env(env)
|
||||
return " ".join(
|
||||
f"{key}={shlex.quote(stringify_env_value(value))}"
|
||||
for key, value in sorted(redacted_env.items())
|
||||
if value is not None
|
||||
)
|
||||
return " ".join(f"{key}={shlex.quote(stringify_env_value(value))}" for key, value in sorted(redacted_env.items()) if value is not None)
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
"""archivebox/tests/conftest.py - Pytest fixtures for CLI tests."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import secrets
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import textwrap
|
||||
import time
|
||||
@@ -12,12 +13,35 @@ from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
from archivebox.uuid_compat import uuid7
|
||||
|
||||
pytest_plugins = ["archivebox.tests.fixtures"]
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
SESSION_DATA_DIR = Path(tempfile.mkdtemp(prefix="archivebox-pytest-session-")).resolve()
|
||||
os.environ.setdefault("DATA_DIR", str(SESSION_DATA_DIR))
|
||||
# Force ArchiveBox imports to see a temp DATA_DIR and cwd during test collection.
|
||||
os.environ["DATA_DIR"] = str(SESSION_DATA_DIR)
|
||||
os.environ.pop("CRAWL_DIR", None)
|
||||
os.environ.pop("SNAP_DIR", None)
|
||||
os.chdir(SESSION_DATA_DIR)
|
||||
|
||||
|
||||
def _is_repo_path(path: Path) -> bool:
|
||||
resolved = path.expanduser().resolve(strict=False)
|
||||
return resolved == REPO_ROOT or REPO_ROOT in resolved.parents
|
||||
|
||||
|
||||
def _assert_not_repo_path(path: Path, *, label: str) -> None:
|
||||
if _is_repo_path(path):
|
||||
raise AssertionError(f"{label} must not point inside the repo root during tests: {path}")
|
||||
|
||||
|
||||
def _assert_safe_runtime_paths(*, cwd: Path | None = None, env: dict[str, str] | None = None) -> None:
|
||||
if cwd is not None:
|
||||
_assert_not_repo_path(cwd, label="cwd")
|
||||
|
||||
for key in ("DATA_DIR", "CRAWL_DIR", "SNAP_DIR"):
|
||||
value = (env or {}).get(key)
|
||||
if value:
|
||||
_assert_not_repo_path(Path(value), label=key)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
@@ -47,6 +71,7 @@ def run_archivebox_cmd(
|
||||
"""
|
||||
cmd = [sys.executable, "-m", "archivebox"] + args
|
||||
|
||||
_assert_not_repo_path(data_dir, label="DATA_DIR")
|
||||
base_env = os.environ.copy()
|
||||
base_env["DATA_DIR"] = str(data_dir)
|
||||
base_env["USE_COLOR"] = "False"
|
||||
@@ -71,6 +96,7 @@ def run_archivebox_cmd(
|
||||
if env:
|
||||
base_env.update(env)
|
||||
|
||||
_assert_safe_runtime_paths(cwd=data_dir, env=base_env)
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
input=stdin,
|
||||
@@ -90,7 +116,7 @@ def run_archivebox_cmd(
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def isolate_test_runtime(tmp_path):
|
||||
def isolate_test_runtime(tmp_path, monkeypatch):
|
||||
"""
|
||||
Run each pytest test from an isolated temp cwd and restore env mutations.
|
||||
|
||||
@@ -104,14 +130,35 @@ def isolate_test_runtime(tmp_path):
|
||||
seed a separate session-scoped temp ``DATA_DIR`` above so any ArchiveBox
|
||||
config imported before this fixture runs never points at the repo root.
|
||||
"""
|
||||
_assert_not_repo_path(tmp_path, label="tmp_path")
|
||||
original_cwd = Path.cwd()
|
||||
original_env = os.environ.copy()
|
||||
original_chdir = os.chdir
|
||||
original_popen = subprocess.Popen
|
||||
os.chdir(tmp_path)
|
||||
os.environ.pop("DATA_DIR", None)
|
||||
os.environ.pop("CRAWL_DIR", None)
|
||||
os.environ.pop("SNAP_DIR", None)
|
||||
|
||||
def guarded_chdir(path: os.PathLike[str] | str) -> None:
|
||||
_assert_not_repo_path(Path(path), label="cwd")
|
||||
original_chdir(path)
|
||||
|
||||
def guarded_popen(*args: Any, **kwargs: Any):
|
||||
cwd = kwargs.get("cwd")
|
||||
env = kwargs.get("env")
|
||||
if cwd is not None:
|
||||
_assert_not_repo_path(Path(cwd), label="cwd")
|
||||
_assert_safe_runtime_paths(cwd=Path(cwd) if cwd is not None else None, env=env)
|
||||
return original_popen(*args, **kwargs)
|
||||
|
||||
monkeypatch.setattr(os, "chdir", guarded_chdir)
|
||||
monkeypatch.setattr(subprocess, "Popen", guarded_popen)
|
||||
try:
|
||||
_assert_safe_runtime_paths(cwd=Path.cwd(), env=os.environ)
|
||||
yield
|
||||
finally:
|
||||
os.chdir(original_cwd)
|
||||
original_chdir(original_cwd)
|
||||
os.environ.clear()
|
||||
os.environ.update(original_env)
|
||||
|
||||
@@ -166,14 +213,18 @@ def run_archivebox_cmd_cwd(
|
||||
"""
|
||||
cmd = [sys.executable, "-m", "archivebox"] + args
|
||||
|
||||
_assert_not_repo_path(cwd, label="cwd")
|
||||
base_env = os.environ.copy()
|
||||
base_env.pop("DATA_DIR", None)
|
||||
base_env.pop("CRAWL_DIR", None)
|
||||
base_env.pop("SNAP_DIR", None)
|
||||
base_env["USE_COLOR"] = "False"
|
||||
base_env["SHOW_PROGRESS"] = "False"
|
||||
|
||||
if env:
|
||||
base_env.update(env)
|
||||
|
||||
_assert_safe_runtime_paths(cwd=cwd, env=base_env)
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
input=stdin,
|
||||
@@ -202,8 +253,12 @@ def run_python_cwd(
|
||||
cwd: Path,
|
||||
timeout: int = 60,
|
||||
) -> tuple[str, str, int]:
|
||||
_assert_not_repo_path(cwd, label="cwd")
|
||||
base_env = os.environ.copy()
|
||||
base_env.pop("DATA_DIR", None)
|
||||
base_env.pop("CRAWL_DIR", None)
|
||||
base_env.pop("SNAP_DIR", None)
|
||||
_assert_safe_runtime_paths(cwd=cwd, env=base_env)
|
||||
result = subprocess.run(
|
||||
[sys.executable, "-"],
|
||||
input=script,
|
||||
@@ -446,7 +501,7 @@ def assert_record_has_fields(record: dict[str, Any], required_fields: list[str])
|
||||
|
||||
def create_test_url(domain: str = "example.com", path: str | None = None) -> str:
|
||||
"""Generate unique test URL."""
|
||||
path = path or uuid7().hex[:8]
|
||||
path = path or secrets.token_hex(4)
|
||||
return f"https://{domain}/{path}"
|
||||
|
||||
|
||||
|
||||
@@ -305,6 +305,10 @@ create_release() {
|
||||
if [[ "${version}" == *rc* ]]; then
|
||||
prerelease_args+=(--prerelease)
|
||||
fi
|
||||
if gh release view "${TAG_PREFIX}${version}" --repo "${slug}" >/dev/null 2>&1; then
|
||||
echo "GitHub release ${TAG_PREFIX}${version} already exists"
|
||||
return 0
|
||||
fi
|
||||
|
||||
gh release create "${TAG_PREFIX}${version}" \
|
||||
--repo "${slug}" \
|
||||
@@ -318,13 +322,17 @@ publish_artifacts() {
|
||||
local version="$1"
|
||||
local pypi_token="${UV_PUBLISH_TOKEN:-${PYPI_TOKEN:-${PYPI_PAT_SECRET:-}}}"
|
||||
|
||||
if [[ -n "${pypi_token}" ]]; then
|
||||
UV_PUBLISH_TOKEN="${pypi_token}" uv publish --username=__token__ dist/*
|
||||
elif [[ -n "${GITHUB_ACTIONS:-}" ]]; then
|
||||
uv publish --trusted-publishing always dist/*
|
||||
if curl -fsSL "https://pypi.org/pypi/${PYPI_PACKAGE}/json" | jq -e --arg version "${version}" '.releases[$version] | length > 0' >/dev/null 2>&1; then
|
||||
echo "${PYPI_PACKAGE} ${version} already published on PyPI"
|
||||
else
|
||||
echo "Missing PyPI credentials: set UV_PUBLISH_TOKEN or PYPI_TOKEN" >&2
|
||||
return 1
|
||||
if [[ -n "${pypi_token}" ]]; then
|
||||
UV_PUBLISH_TOKEN="${pypi_token}" uv publish --username=__token__ dist/*
|
||||
elif [[ -n "${GITHUB_ACTIONS:-}" ]]; then
|
||||
uv publish --trusted-publishing always dist/*
|
||||
else
|
||||
echo "Missing PyPI credentials: set UV_PUBLISH_TOKEN or PYPI_TOKEN" >&2
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
|
||||
wait_for_pypi "${PYPI_PACKAGE}" "${version}"
|
||||
@@ -347,15 +355,35 @@ main() {
|
||||
return 1
|
||||
fi
|
||||
|
||||
update_internal_dependencies
|
||||
version="$(bump_version)"
|
||||
run_checks
|
||||
version="$(current_version)"
|
||||
latest="$(latest_release_version "${slug}")"
|
||||
if [[ -z "${latest}" ]]; then
|
||||
relation="gt"
|
||||
else
|
||||
relation="$(compare_versions "${version}" "${latest}")"
|
||||
fi
|
||||
|
||||
git add -A
|
||||
git commit -m "release: ${TAG_PREFIX}${version}"
|
||||
git push origin "${branch}"
|
||||
if [[ "${relation}" == "eq" ]]; then
|
||||
update_internal_dependencies
|
||||
version="$(bump_version)"
|
||||
run_checks
|
||||
|
||||
wait_for_runs "${slug}" push "$(git rev-parse HEAD)" "push"
|
||||
git add -A
|
||||
git commit -m "release: ${TAG_PREFIX}${version}"
|
||||
git push origin "${branch}"
|
||||
|
||||
wait_for_runs "${slug}" push "$(git rev-parse HEAD)" "push"
|
||||
elif [[ "${relation}" == "gt" ]]; then
|
||||
if [[ -n "$(git status --short)" ]]; then
|
||||
echo "Refusing to publish existing unreleased version ${version} with a dirty worktree" >&2
|
||||
return 1
|
||||
fi
|
||||
run_checks
|
||||
wait_for_runs "${slug}" push "$(git rev-parse HEAD)" "push"
|
||||
else
|
||||
echo "Current version ${version} is behind latest GitHub release ${latest}" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
publish_artifacts "${version}"
|
||||
create_release "${slug}" "${version}"
|
||||
|
||||
@@ -26,8 +26,8 @@ services:
|
||||
- PUBLIC_INDEX=True # set to False to prevent anonymous users from viewing snapshot list
|
||||
- PUBLIC_SNAPSHOTS=True # set to False to prevent anonymous users from viewing snapshot content
|
||||
- PUBLIC_ADD_VIEW=False # set to True to allow anonymous users to submit new URLs to archive
|
||||
- SEARCH_BACKEND_ENGINE=sonic # tells ArchiveBox to use sonic container below for fast full-text search
|
||||
- SEARCH_BACKEND_HOST_NAME=sonic
|
||||
- SEARCH_BACKEND_ENGINE=sonic # tells ArchiveBox to use its built-in Sonic worker for fast full-text search
|
||||
# - SEARCH_BACKEND_HOST_NAME=127.0.0.1
|
||||
- SEARCH_BACKEND_PASSWORD=SomeSecretPassword
|
||||
# - PUID=911 # set to your host user's UID & GID if you encounter permissions issues
|
||||
# - PGID=911 # UID/GIDs lower than 500 may clash with system uids and are not recommended
|
||||
@@ -54,20 +54,21 @@ services:
|
||||
# https://github.com/ArchiveBox/ArchiveBox/wiki/Scheduled-Archiving
|
||||
|
||||
|
||||
### This runs the optional Sonic full-text search backend (much faster than default rg backend).
|
||||
### ArchiveBox now starts and uses Sonic automatically when SEARCH_BACKEND_ENGINE=sonic,
|
||||
# so the old standalone docker sidecar below is no longer necessary.
|
||||
# If Sonic is ever started after not running for a while, update its full-text index by running:
|
||||
# $ docker compose run archivebox update --index-only
|
||||
# https://github.com/ArchiveBox/ArchiveBox/wiki/Setting-up-Search
|
||||
|
||||
sonic:
|
||||
image: archivebox/sonic:latest
|
||||
expose:
|
||||
- 1491
|
||||
environment:
|
||||
- SEARCH_BACKEND_PASSWORD=SomeSecretPassword
|
||||
volumes:
|
||||
#- ./sonic.cfg:/etc/sonic.cfg:ro # mount to customize: https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/stable/etc/sonic.cfg
|
||||
- ./data/sonic:/var/lib/sonic/store
|
||||
# sonic:
|
||||
# image: archivebox/sonic:latest
|
||||
# expose:
|
||||
# - 1491
|
||||
# environment:
|
||||
# - SEARCH_BACKEND_PASSWORD=SomeSecretPassword
|
||||
# volumes:
|
||||
# #- ./sonic.cfg:/etc/sonic.cfg:ro # mount to customize: https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/stable/etc/sonic.cfg
|
||||
# - ./data/sonic:/var/lib/sonic/store
|
||||
|
||||
|
||||
### This optional container runs xvfb+noVNC so you can watch the ArchiveBox browser as it archives things,
|
||||
|
||||
2
uv.lock
generated
2
uv.lock
generated
@@ -130,7 +130,7 @@ dev = [{ name = "prek", specifier = ">=0.3.6" }]
|
||||
|
||||
[[package]]
|
||||
name = "abxbus"
|
||||
version = "2.4.7"
|
||||
version = "2.4.8"
|
||||
source = { editable = "../abxbus" }
|
||||
dependencies = [
|
||||
{ name = "aiofiles", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
|
||||
Reference in New Issue
Block a user