wip

2026-04-06 07:47:53 +10:00 · 2026-03-23 03:58:32 -07:00
parent 268856bcfb
commit b749b26c5d
286 changed files with 21704 additions and 13480 deletions
--- a/archivebox/tests/conftest.py
+++ b/archivebox/tests/conftest.py
@@ -8,7 +8,7 @@ import textwrap
 import time
 import shutil
 from pathlib import Path
-from typing import List, Dict, Any, Optional, Tuple
+from typing import Any

 import pytest

@@ -24,13 +24,14 @@ os.environ.setdefault("DATA_DIR", str(SESSION_DATA_DIR))
 # CLI Helpers (defined before fixtures that use them)
 # =============================================================================

+
 def run_archivebox_cmd(
-    args: List[str],
+    args: list[str],
    data_dir: Path,
-    stdin: Optional[str] = None,
+    stdin: str | None = None,
    timeout: int = 60,
-    env: Optional[Dict[str, str]] = None,
-) -> Tuple[str, str, int]:
+    env: dict[str, str] | None = None,
+) -> tuple[str, str, int]:
    """
    Run archivebox command via subprocess, return (stdout, stderr, returncode).

@@ -44,28 +45,28 @@ def run_archivebox_cmd(
    Returns:
        Tuple of (stdout, stderr, returncode)
    """
-    cmd = [sys.executable, '-m', 'archivebox'] + args
+    cmd = [sys.executable, "-m", "archivebox"] + args

    base_env = os.environ.copy()
-    base_env['DATA_DIR'] = str(data_dir)
-    base_env['USE_COLOR'] = 'False'
-    base_env['SHOW_PROGRESS'] = 'False'
+    base_env["DATA_DIR"] = str(data_dir)
+    base_env["USE_COLOR"] = "False"
+    base_env["SHOW_PROGRESS"] = "False"
    # Disable slow extractors for faster tests
-    base_env['SAVE_ARCHIVEDOTORG'] = 'False'
-    base_env['SAVE_TITLE'] = 'False'
-    base_env['SAVE_FAVICON'] = 'False'
-    base_env['SAVE_WGET'] = 'False'
-    base_env['SAVE_WARC'] = 'False'
-    base_env['SAVE_PDF'] = 'False'
-    base_env['SAVE_SCREENSHOT'] = 'False'
-    base_env['SAVE_DOM'] = 'False'
-    base_env['SAVE_SINGLEFILE'] = 'False'
-    base_env['SAVE_READABILITY'] = 'False'
-    base_env['SAVE_MERCURY'] = 'False'
-    base_env['SAVE_GIT'] = 'False'
-    base_env['SAVE_YTDLP'] = 'False'
-    base_env['SAVE_HEADERS'] = 'False'
-    base_env['SAVE_HTMLTOTEXT'] = 'False'
+    base_env["SAVE_ARCHIVEDOTORG"] = "False"
+    base_env["SAVE_TITLE"] = "False"
+    base_env["SAVE_FAVICON"] = "False"
+    base_env["SAVE_WGET"] = "False"
+    base_env["SAVE_WARC"] = "False"
+    base_env["SAVE_PDF"] = "False"
+    base_env["SAVE_SCREENSHOT"] = "False"
+    base_env["SAVE_DOM"] = "False"
+    base_env["SAVE_SINGLEFILE"] = "False"
+    base_env["SAVE_READABILITY"] = "False"
+    base_env["SAVE_MERCURY"] = "False"
+    base_env["SAVE_GIT"] = "False"
+    base_env["SAVE_YTDLP"] = "False"
+    base_env["SAVE_HEADERS"] = "False"
+    base_env["SAVE_HTMLTOTEXT"] = "False"

    if env:
        base_env.update(env)
@@ -87,6 +88,7 @@ def run_archivebox_cmd(
 # Fixtures
 # =============================================================================

+
@pytest.fixture(autouse=True)
 def isolate_test_runtime(tmp_path):
    """
@@ -117,6 +119,7 @@ def isolate_test_runtime(tmp_path):
 def pytest_sessionfinish(session, exitstatus):
    shutil.rmtree(SESSION_DATA_DIR, ignore_errors=True)

+
@pytest.fixture
 def isolated_data_dir(tmp_path):
    """
@@ -124,7 +127,7 @@ def isolated_data_dir(tmp_path):

    Uses tmp_path for complete isolation.
    """
-    data_dir = tmp_path / 'archivebox_data'
+    data_dir = tmp_path / "archivebox_data"
    data_dir.mkdir()
    return data_dir

@@ -137,7 +140,7 @@ def initialized_archive(isolated_data_dir):
    Runs `archivebox init` via subprocess to set up database and directories.
    """
    stdout, stderr, returncode = run_archivebox_cmd(
-        ['init', '--quick'],
+        ["init", "--quick"],
        data_dir=isolated_data_dir,
        timeout=60,
    )
@@ -149,23 +152,24 @@ def initialized_archive(isolated_data_dir):
 # CWD-based CLI Helpers (no DATA_DIR env)
 # =============================================================================

+
 def run_archivebox_cmd_cwd(
-    args: List[str],
+    args: list[str],
    cwd: Path,
-    stdin: Optional[str] = None,
+    stdin: str | None = None,
    timeout: int = 60,
-    env: Optional[Dict[str, str]] = None,
-) -> Tuple[str, str, int]:
+    env: dict[str, str] | None = None,
+) -> tuple[str, str, int]:
    """
    Run archivebox command via subprocess using cwd as DATA_DIR (no DATA_DIR env).
    Returns (stdout, stderr, returncode).
    """
-    cmd = [sys.executable, '-m', 'archivebox'] + args
+    cmd = [sys.executable, "-m", "archivebox"] + args

    base_env = os.environ.copy()
-    base_env.pop('DATA_DIR', None)
-    base_env['USE_COLOR'] = 'False'
-    base_env['SHOW_PROGRESS'] = 'False'
+    base_env.pop("DATA_DIR", None)
+    base_env["USE_COLOR"] = "False"
+    base_env["SHOW_PROGRESS"] = "False"

    if env:
        base_env.update(env)
@@ -183,7 +187,7 @@ def run_archivebox_cmd_cwd(
    return result.stdout, result.stderr, result.returncode


-def stop_process(proc: subprocess.Popen[str]) -> Tuple[str, str]:
+def stop_process(proc: subprocess.Popen[str]) -> tuple[str, str]:
    if proc.poll() is None:
        proc.terminate()
        try:
@@ -197,11 +201,11 @@ def run_python_cwd(
    script: str,
    cwd: Path,
    timeout: int = 60,
-) -> Tuple[str, str, int]:
+) -> tuple[str, str, int]:
    base_env = os.environ.copy()
-    base_env.pop('DATA_DIR', None)
+    base_env.pop("DATA_DIR", None)
    result = subprocess.run(
-        [sys.executable, '-'],
+        [sys.executable, "-"],
        input=script,
        capture_output=True,
        text=True,
@@ -253,7 +257,7 @@ def wait_for_archive_outputs(
                rel_path = candidate.relative_to(snapshot_dir)
                if rel_path.parts and rel_path.parts[0] == 'responses':
                    continue
-                if rel_path.name in {'stdout.log', 'stderr.log', 'cmd.sh'}:
+                if rel_path.name in {"stdout.log", "stderr.log", "cmd.sh"}:
                    continue
                output_rel = str(rel_path)
                break
@@ -267,64 +271,68 @@ def wait_for_archive_outputs(
            raise SystemExit(1)

        print('READY')
-        """
+        """,
    )

    deadline = time.time() + timeout
    while time.time() < deadline:
        stdout, _stderr, returncode = run_python_cwd(script, cwd=cwd, timeout=30)
-        if returncode == 0 and 'READY' in stdout:
+        if returncode == 0 and "READY" in stdout:
            return True
        time.sleep(interval)
    return False

+
 def _get_machine_type() -> str:
    import platform

    os_name = platform.system().lower()
    arch = platform.machine().lower()
-    in_docker = os.environ.get('IN_DOCKER', '').lower() in ('1', 'true', 'yes')
-    suffix = '-docker' if in_docker else ''
-    return f'{arch}-{os_name}{suffix}'
+    in_docker = os.environ.get("IN_DOCKER", "").lower() in ("1", "true", "yes")
+    suffix = "-docker" if in_docker else ""
+    return f"{arch}-{os_name}{suffix}"

-def _find_cached_chromium(lib_dir: Path) -> Optional[Path]:
+
+def _find_cached_chromium(lib_dir: Path) -> Path | None:
    candidates = [
-        lib_dir / 'puppeteer',
-        lib_dir / 'npm' / 'node_modules' / 'puppeteer' / '.local-chromium',
+        lib_dir / "puppeteer",
+        lib_dir / "npm" / "node_modules" / "puppeteer" / ".local-chromium",
    ]
    for base in candidates:
        if not base.exists():
            continue
-        for path in base.rglob('Chromium.app/Contents/MacOS/Chromium'):
+        for path in base.rglob("Chromium.app/Contents/MacOS/Chromium"):
            return path
-        for path in base.rglob('chrome-linux/chrome'):
+        for path in base.rglob("chrome-linux/chrome"):
            return path
-        for path in base.rglob('chrome-linux64/chrome'):
+        for path in base.rglob("chrome-linux64/chrome"):
            return path
    return None

-def _find_system_browser() -> Optional[Path]:
+
+def _find_system_browser() -> Path | None:
    candidates = [
-        Path('/Applications/Chromium.app/Contents/MacOS/Chromium'),
-        Path('/usr/bin/chromium'),
-        Path('/usr/bin/chromium-browser'),
+        Path("/Applications/Chromium.app/Contents/MacOS/Chromium"),
+        Path("/usr/bin/chromium"),
+        Path("/usr/bin/chromium-browser"),
    ]
    for candidate in candidates:
        if candidate.exists():
            return candidate
    return None

+
 def _ensure_puppeteer(shared_lib: Path) -> None:
-    npm_prefix = shared_lib / 'npm'
-    node_modules = npm_prefix / 'node_modules'
-    puppeteer_dir = node_modules / 'puppeteer'
+    npm_prefix = shared_lib / "npm"
+    node_modules = npm_prefix / "node_modules"
+    puppeteer_dir = node_modules / "puppeteer"
    if puppeteer_dir.exists():
        return
    npm_prefix.mkdir(parents=True, exist_ok=True)
    env = os.environ.copy()
-    env['PUPPETEER_SKIP_DOWNLOAD'] = '1'
+    env["PUPPETEER_SKIP_DOWNLOAD"] = "1"
    subprocess.run(
-        ['npm', 'install', 'puppeteer'],
+        ["npm", "install", "puppeteer"],
        cwd=str(npm_prefix),
        env=env,
        check=True,
@@ -345,7 +353,7 @@ def real_archive_with_example(tmp_path_factory, request):
        request.cls.data_dir = tmp_path

    stdout, stderr, returncode = run_archivebox_cmd_cwd(
-        ['init', '--quick'],
+        ["init", "--quick"],
        cwd=tmp_path,
        timeout=120,
    )
@@ -353,28 +361,28 @@ def real_archive_with_example(tmp_path_factory, request):

    stdout, stderr, returncode = run_archivebox_cmd_cwd(
        [
-            'config',
-            '--set',
-            'LISTEN_HOST=archivebox.localhost:8000',
-            'PUBLIC_INDEX=True',
-            'PUBLIC_SNAPSHOTS=True',
-            'PUBLIC_ADD_VIEW=True',
+            "config",
+            "--set",
+            "LISTEN_HOST=archivebox.localhost:8000",
+            "PUBLIC_INDEX=True",
+            "PUBLIC_SNAPSHOTS=True",
+            "PUBLIC_ADD_VIEW=True",
        ],
        cwd=tmp_path,
    )
    assert returncode == 0, f"archivebox config failed: {stderr}"

    add_env = {
-        'RESPONSES_ENABLED': 'True',
-        'SHOW_PROGRESS': 'False',
-        'USE_COLOR': 'False',
-        'RESPONSES_TIMEOUT': '30',
+        "RESPONSES_ENABLED": "True",
+        "SHOW_PROGRESS": "False",
+        "USE_COLOR": "False",
+        "RESPONSES_TIMEOUT": "30",
    }
-    cmd = [sys.executable, '-m', 'archivebox', 'add', '--depth=0', '--plugins=responses', 'https://example.com']
+    cmd = [sys.executable, "-m", "archivebox", "add", "--depth=0", "--plugins=responses", "https://example.com"]
    base_env = os.environ.copy()
-    base_env.pop('DATA_DIR', None)
-    base_env['USE_COLOR'] = 'False'
-    base_env['SHOW_PROGRESS'] = 'False'
+    base_env.pop("DATA_DIR", None)
+    base_env["USE_COLOR"] = "False"
+    base_env["SHOW_PROGRESS"] = "False"
    base_env.update(add_env)

    proc = subprocess.Popen(
@@ -386,7 +394,7 @@ def real_archive_with_example(tmp_path_factory, request):
        env=base_env,
    )

-    ready = wait_for_archive_outputs(tmp_path, 'https://example.com', timeout=600)
+    ready = wait_for_archive_outputs(tmp_path, "https://example.com", timeout=600)
    stdout, stderr = stop_process(proc)
    assert ready, f"archivebox add did not produce required outputs within timeout:\nSTDOUT:\n{stdout}\nSTDERR:\n{stderr}"

@@ -397,34 +405,34 @@ def real_archive_with_example(tmp_path_factory, request):
 # Output Assertions
 # =============================================================================

-def parse_jsonl_output(stdout: str) -> List[Dict[str, Any]]:
+
+def parse_jsonl_output(stdout: str) -> list[dict[str, Any]]:
    """Parse JSONL output into list of dicts via Process parser."""
    from archivebox.machine.models import Process
-    return Process.parse_records_from_text(stdout or '')
+
+    return Process.parse_records_from_text(stdout or "")


 def assert_jsonl_contains_type(stdout: str, record_type: str, min_count: int = 1):
    """Assert output contains at least min_count records of type."""
    records = parse_jsonl_output(stdout)
-    matching = [r for r in records if r.get('type') == record_type]
-    assert len(matching) >= min_count, \
-        f"Expected >= {min_count} {record_type}, got {len(matching)}"
+    matching = [r for r in records if r.get("type") == record_type]
+    assert len(matching) >= min_count, f"Expected >= {min_count} {record_type}, got {len(matching)}"
    return matching


-def assert_jsonl_pass_through(stdout: str, input_records: List[Dict[str, Any]]):
+def assert_jsonl_pass_through(stdout: str, input_records: list[dict[str, Any]]):
    """Assert that input records appear in output (pass-through behavior)."""
    output_records = parse_jsonl_output(stdout)
-    output_ids = {r.get('id') for r in output_records if r.get('id')}
+    output_ids = {r.get("id") for r in output_records if r.get("id")}

    for input_rec in input_records:
-        input_id = input_rec.get('id')
+        input_id = input_rec.get("id")
        if input_id:
-            assert input_id in output_ids, \
-                f"Input record {input_id} not found in output (pass-through failed)"
+            assert input_id in output_ids, f"Input record {input_id} not found in output (pass-through failed)"


-def assert_record_has_fields(record: Dict[str, Any], required_fields: List[str]):
+def assert_record_has_fields(record: dict[str, Any], required_fields: list[str]):
    """Assert record has all required fields with non-None values."""
    for field in required_fields:
        assert field in record, f"Record missing field: {field}"
@@ -435,31 +443,32 @@ def assert_record_has_fields(record: Dict[str, Any], required_fields: List[str])
 # Test Data Factories
 # =============================================================================

-def create_test_url(domain: str = 'example.com', path: str | None = None) -> str:
+
+def create_test_url(domain: str = "example.com", path: str | None = None) -> str:
    """Generate unique test URL."""
    path = path or uuid7().hex[:8]
-    return f'https://{domain}/{path}'
+    return f"https://{domain}/{path}"


-def create_test_crawl_json(urls: List[str] | None = None, **kwargs) -> Dict[str, Any]:
+def create_test_crawl_json(urls: list[str] | None = None, **kwargs) -> dict[str, Any]:
    """Create Crawl JSONL record for testing."""
    urls = urls or [create_test_url()]
    return {
-        'type': 'Crawl',
-        'urls': '\n'.join(urls),
-        'max_depth': kwargs.get('max_depth', 0),
-        'tags_str': kwargs.get('tags_str', ''),
-        'status': kwargs.get('status', 'queued'),
-        **{k: v for k, v in kwargs.items() if k not in ('max_depth', 'tags_str', 'status')},
+        "type": "Crawl",
+        "urls": "\n".join(urls),
+        "max_depth": kwargs.get("max_depth", 0),
+        "tags_str": kwargs.get("tags_str", ""),
+        "status": kwargs.get("status", "queued"),
+        **{k: v for k, v in kwargs.items() if k not in ("max_depth", "tags_str", "status")},
    }


-def create_test_snapshot_json(url: str | None = None, **kwargs) -> Dict[str, Any]:
+def create_test_snapshot_json(url: str | None = None, **kwargs) -> dict[str, Any]:
    """Create Snapshot JSONL record for testing."""
    return {
-        'type': 'Snapshot',
-        'url': url or create_test_url(),
-        'tags_str': kwargs.get('tags_str', ''),
-        'status': kwargs.get('status', 'queued'),
-        **{k: v for k, v in kwargs.items() if k not in ('tags_str', 'status')},
+        "type": "Snapshot",
+        "url": url or create_test_url(),
+        "tags_str": kwargs.get("tags_str", ""),
+        "status": kwargs.get("status", "queued"),
+        **{k: v for k, v in kwargs.items() if k not in ("tags_str", "status")},
    }
--- a/archivebox/tests/fixtures.py
+++ b/archivebox/tests/fixtures.py
@@ -5,34 +5,38 @@ from threading import Thread

 import pytest

+
@pytest.fixture
 def process(tmp_path):
    process = subprocess.run(
-        ['archivebox', 'init'],
+        ["archivebox", "init"],
        capture_output=True,
        cwd=tmp_path,
    )
    return process

+
@pytest.fixture
 def disable_extractors_dict():
    env = os.environ.copy()
-    env.update({
-        "SAVE_WGET": "false",
-        "SAVE_SINGLEFILE": "false",
-        "SAVE_READABILITY": "false",
-        "SAVE_MERCURY": "false",
-        "SAVE_HTMLTOTEXT": "false",
-        "SAVE_PDF": "false",
-        "SAVE_SCREENSHOT": "false",
-        "SAVE_DOM": "false",
-        "SAVE_HEADERS": "false",
-        "SAVE_GIT": "false",
-        "SAVE_YTDLP": "false",
-        "SAVE_ARCHIVEDOTORG": "false",
-        "SAVE_TITLE": "false",
-        "SAVE_FAVICON": "false",
-    })
+    env.update(
+        {
+            "SAVE_WGET": "false",
+            "SAVE_SINGLEFILE": "false",
+            "SAVE_READABILITY": "false",
+            "SAVE_MERCURY": "false",
+            "SAVE_HTMLTOTEXT": "false",
+            "SAVE_PDF": "false",
+            "SAVE_SCREENSHOT": "false",
+            "SAVE_DOM": "false",
+            "SAVE_HEADERS": "false",
+            "SAVE_GIT": "false",
+            "SAVE_YTDLP": "false",
+            "SAVE_ARCHIVEDOTORG": "false",
+            "SAVE_TITLE": "false",
+            "SAVE_FAVICON": "false",
+        },
+    )
    return env


--- a/archivebox/tests/migrations_helpers.py
+++ b/archivebox/tests/migrations_helpers.py
@@ -15,7 +15,6 @@ import sqlite3
 import subprocess
 from pathlib import Path
 from datetime import datetime, timezone
-from typing import Dict, List, Tuple

 from archivebox.uuid_compat import uuid7

@@ -494,6 +493,7 @@ INSERT INTO django_content_type (app_label, model) VALUES
 # Test Data Generators
 # =============================================================================

+
 def generate_uuid() -> str:
    """Generate a UUID string without dashes for SQLite."""
    return uuid7().hex
@@ -501,45 +501,50 @@ def generate_uuid() -> str:

 def generate_timestamp() -> str:
    """Generate a timestamp string like ArchiveBox uses."""
-    return datetime.now(timezone.utc).strftime('%Y%m%d%H%M%S') + '.000000'
+    return datetime.now(timezone.utc).strftime("%Y%m%d%H%M%S") + ".000000"


-def seed_0_4_data(db_path: Path) -> Dict[str, List[Dict]]:
+def seed_0_4_data(db_path: Path) -> dict[str, list[dict]]:
    """Seed a 0.4.x database with realistic test data."""
    conn = sqlite3.connect(str(db_path))
    cursor = conn.cursor()

    created_data = {
-        'snapshots': [],
-        'tags_str': [],
+        "snapshots": [],
+        "tags_str": [],
    }

    test_urls = [
-        ('https://example.com/page1', 'Example Page 1', 'news,tech'),
-        ('https://example.org/article', 'Article Title', 'blog,reading'),
-        ('https://github.com/user/repo', 'GitHub Repository', 'code,github'),
-        ('https://news.ycombinator.com/item?id=12345', 'HN Discussion', 'news,discussion'),
-        ('https://en.wikipedia.org/wiki/Test', 'Wikipedia Test', 'reference,wiki'),
+        ("https://example.com/page1", "Example Page 1", "news,tech"),
+        ("https://example.org/article", "Article Title", "blog,reading"),
+        ("https://github.com/user/repo", "GitHub Repository", "code,github"),
+        ("https://news.ycombinator.com/item?id=12345", "HN Discussion", "news,discussion"),
+        ("https://en.wikipedia.org/wiki/Test", "Wikipedia Test", "reference,wiki"),
    ]

    for i, (url, title, tags) in enumerate(test_urls):
        snapshot_id = generate_uuid()
-        timestamp = f'2024010{i+1}120000.000000'
-        added = f'2024-01-0{i+1} 12:00:00'
+        timestamp = f"2024010{i + 1}120000.000000"
+        added = f"2024-01-0{i + 1} 12:00:00"

-        cursor.execute("""
+        cursor.execute(
+            """
            INSERT INTO core_snapshot (id, url, timestamp, title, tags, added, updated)
            VALUES (?, ?, ?, ?, ?, ?, ?)
-        """, (snapshot_id, url, timestamp, title, tags, added, added))
+        """,
+            (snapshot_id, url, timestamp, title, tags, added, added),
+        )

-        created_data['snapshots'].append({
-            'id': snapshot_id,
-            'url': url,
-            'timestamp': timestamp,
-            'title': title,
-            'tags': tags,
-        })
-        created_data['tags_str'].append(tags)
+        created_data["snapshots"].append(
+            {
+                "id": snapshot_id,
+                "url": url,
+                "timestamp": timestamp,
+                "title": title,
+                "tags": tags,
+            },
+        )
+        created_data["tags_str"].append(tags)

    cursor.execute("""
        INSERT INTO django_migrations (app, name, applied)
@@ -552,16 +557,16 @@ def seed_0_4_data(db_path: Path) -> Dict[str, List[Dict]]:
    return created_data


-def seed_0_7_data(db_path: Path) -> Dict[str, List[Dict]]:
+def seed_0_7_data(db_path: Path) -> dict[str, list[dict]]:
    """Seed a 0.7.x database with realistic test data."""
    conn = sqlite3.connect(str(db_path))
    cursor = conn.cursor()

    created_data = {
-        'users': [],
-        'snapshots': [],
-        'tags': [],
-        'archiveresults': [],
+        "users": [],
+        "snapshots": [],
+        "tags": [],
+        "archiveresults": [],
    }

    # Create a user
@@ -572,125 +577,145 @@ def seed_0_7_data(db_path: Path) -> Dict[str, List[Dict]]:
                'admin@example.com', 1, 1, datetime('now'))
    """)
    user_id = cursor.lastrowid
-    created_data['users'].append({'id': user_id, 'username': 'admin'})
+    created_data["users"].append({"id": user_id, "username": "admin"})

    # Create 5 tags
-    tag_names = ['news', 'tech', 'blog', 'reference', 'code']
+    tag_names = ["news", "tech", "blog", "reference", "code"]
    for name in tag_names:
-        cursor.execute("""
+        cursor.execute(
+            """
            INSERT INTO core_tag (name, slug) VALUES (?, ?)
-        """, (name, name.lower()))
+        """,
+            (name, name.lower()),
+        )
        tag_id = cursor.lastrowid
-        created_data['tags'].append({'id': tag_id, 'name': name, 'slug': name.lower()})
+        created_data["tags"].append({"id": tag_id, "name": name, "slug": name.lower()})

    # Create 5 snapshots
    test_urls = [
-        ('https://example.com/page1', 'Example Page 1'),
-        ('https://example.org/article', 'Article Title'),
-        ('https://github.com/user/repo', 'GitHub Repository'),
-        ('https://news.ycombinator.com/item?id=12345', 'HN Discussion'),
-        ('https://en.wikipedia.org/wiki/Test', 'Wikipedia Test'),
+        ("https://example.com/page1", "Example Page 1"),
+        ("https://example.org/article", "Article Title"),
+        ("https://github.com/user/repo", "GitHub Repository"),
+        ("https://news.ycombinator.com/item?id=12345", "HN Discussion"),
+        ("https://en.wikipedia.org/wiki/Test", "Wikipedia Test"),
    ]

    for i, (url, title) in enumerate(test_urls):
        snapshot_id = generate_uuid()
-        timestamp = f'2024010{i+1}120000.000000'
-        added = f'2024-01-0{i+1} 12:00:00'
+        timestamp = f"2024010{i + 1}120000.000000"
+        added = f"2024-01-0{i + 1} 12:00:00"

-        cursor.execute("""
+        cursor.execute(
+            """
            INSERT INTO core_snapshot (id, url, timestamp, title, added, updated)
            VALUES (?, ?, ?, ?, ?, ?)
-        """, (snapshot_id, url, timestamp, title, added, added))
+        """,
+            (snapshot_id, url, timestamp, title, added, added),
+        )

-        created_data['snapshots'].append({
-            'id': snapshot_id,
-            'url': url,
-            'timestamp': timestamp,
-            'title': title,
-        })
+        created_data["snapshots"].append(
+            {
+                "id": snapshot_id,
+                "url": url,
+                "timestamp": timestamp,
+                "title": title,
+            },
+        )

        # Assign 2 tags to each snapshot
-        tag_ids = [created_data['tags'][i % 5]['id'], created_data['tags'][(i + 1) % 5]['id']]
+        tag_ids = [created_data["tags"][i % 5]["id"], created_data["tags"][(i + 1) % 5]["id"]]
        for tag_id in tag_ids:
-            cursor.execute("""
+            cursor.execute(
+                """
                INSERT INTO core_snapshot_tags (snapshot_id, tag_id) VALUES (?, ?)
-            """, (snapshot_id, tag_id))
+            """,
+                (snapshot_id, tag_id),
+            )

        # Create 5 archive results for each snapshot
-        extractors = ['title', 'favicon', 'screenshot', 'singlefile', 'wget']
-        statuses = ['succeeded', 'succeeded', 'failed', 'succeeded', 'skipped']
+        extractors = ["title", "favicon", "screenshot", "singlefile", "wget"]
+        statuses = ["succeeded", "succeeded", "failed", "succeeded", "skipped"]

        for j, (extractor, status) in enumerate(zip(extractors, statuses)):
-            cursor.execute("""
+            cursor.execute(
+                """
                INSERT INTO core_archiveresult
                (snapshot_id, extractor, cmd, pwd, cmd_version, output, start_ts, end_ts, status)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
-            """, (
-                snapshot_id, extractor,
-                json.dumps([extractor, '--version']),
-                f'/data/archive/{timestamp}',
-                '1.0.0',
-                f'{extractor}/index.html' if status == 'succeeded' else '',
-                f'2024-01-0{i+1} 12:00:0{j}',
-                f'2024-01-0{i+1} 12:00:1{j}',
-                status
-            ))
+            """,
+                (
+                    snapshot_id,
+                    extractor,
+                    json.dumps([extractor, "--version"]),
+                    f"/data/archive/{timestamp}",
+                    "1.0.0",
+                    f"{extractor}/index.html" if status == "succeeded" else "",
+                    f"2024-01-0{i + 1} 12:00:0{j}",
+                    f"2024-01-0{i + 1} 12:00:1{j}",
+                    status,
+                ),
+            )

-            created_data['archiveresults'].append({
-                'snapshot_id': snapshot_id,
-                'extractor': extractor,
-                'status': status,
-            })
+            created_data["archiveresults"].append(
+                {
+                    "snapshot_id": snapshot_id,
+                    "extractor": extractor,
+                    "status": status,
+                },
+            )

    # Record migrations as applied (0.7.x migrations up to 0022)
    migrations = [
-        ('contenttypes', '0001_initial'),
-        ('contenttypes', '0002_remove_content_type_name'),
-        ('auth', '0001_initial'),
-        ('auth', '0002_alter_permission_name_max_length'),
-        ('auth', '0003_alter_user_email_max_length'),
-        ('auth', '0004_alter_user_username_opts'),
-        ('auth', '0005_alter_user_last_login_null'),
-        ('auth', '0006_require_contenttypes_0002'),
-        ('auth', '0007_alter_validators_add_error_messages'),
-        ('auth', '0008_alter_user_username_max_length'),
-        ('auth', '0009_alter_user_last_name_max_length'),
-        ('auth', '0010_alter_group_name_max_length'),
-        ('auth', '0011_update_proxy_permissions'),
-        ('auth', '0012_alter_user_first_name_max_length'),
-        ('admin', '0001_initial'),
-        ('admin', '0002_logentry_remove_auto_add'),
-        ('admin', '0003_logentry_add_action_flag_choices'),
-        ('sessions', '0001_initial'),
-        ('core', '0001_initial'),
-        ('core', '0002_auto_20200625_1521'),
-        ('core', '0003_auto_20200630_1034'),
-        ('core', '0004_auto_20200713_1552'),
-        ('core', '0005_auto_20200728_0326'),
-        ('core', '0006_auto_20201012_1520'),
-        ('core', '0007_archiveresult'),
-        ('core', '0008_auto_20210105_1421'),
-        ('core', '0009_auto_20210216_1038'),
-        ('core', '0010_auto_20210216_1055'),
-        ('core', '0011_auto_20210216_1331'),
-        ('core', '0012_auto_20210216_1425'),
-        ('core', '0013_auto_20210218_0729'),
-        ('core', '0014_auto_20210218_0729'),
-        ('core', '0015_auto_20210218_0730'),
-        ('core', '0016_auto_20210218_1204'),
-        ('core', '0017_auto_20210219_0211'),
-        ('core', '0018_auto_20210327_0952'),
-        ('core', '0019_auto_20210401_0654'),
-        ('core', '0020_auto_20210410_1031'),
-        ('core', '0021_auto_20220914_0934'),
-        ('core', '0022_auto_20231023_2008'),
+        ("contenttypes", "0001_initial"),
+        ("contenttypes", "0002_remove_content_type_name"),
+        ("auth", "0001_initial"),
+        ("auth", "0002_alter_permission_name_max_length"),
+        ("auth", "0003_alter_user_email_max_length"),
+        ("auth", "0004_alter_user_username_opts"),
+        ("auth", "0005_alter_user_last_login_null"),
+        ("auth", "0006_require_contenttypes_0002"),
+        ("auth", "0007_alter_validators_add_error_messages"),
+        ("auth", "0008_alter_user_username_max_length"),
+        ("auth", "0009_alter_user_last_name_max_length"),
+        ("auth", "0010_alter_group_name_max_length"),
+        ("auth", "0011_update_proxy_permissions"),
+        ("auth", "0012_alter_user_first_name_max_length"),
+        ("admin", "0001_initial"),
+        ("admin", "0002_logentry_remove_auto_add"),
+        ("admin", "0003_logentry_add_action_flag_choices"),
+        ("sessions", "0001_initial"),
+        ("core", "0001_initial"),
+        ("core", "0002_auto_20200625_1521"),
+        ("core", "0003_auto_20200630_1034"),
+        ("core", "0004_auto_20200713_1552"),
+        ("core", "0005_auto_20200728_0326"),
+        ("core", "0006_auto_20201012_1520"),
+        ("core", "0007_archiveresult"),
+        ("core", "0008_auto_20210105_1421"),
+        ("core", "0009_auto_20210216_1038"),
+        ("core", "0010_auto_20210216_1055"),
+        ("core", "0011_auto_20210216_1331"),
+        ("core", "0012_auto_20210216_1425"),
+        ("core", "0013_auto_20210218_0729"),
+        ("core", "0014_auto_20210218_0729"),
+        ("core", "0015_auto_20210218_0730"),
+        ("core", "0016_auto_20210218_1204"),
+        ("core", "0017_auto_20210219_0211"),
+        ("core", "0018_auto_20210327_0952"),
+        ("core", "0019_auto_20210401_0654"),
+        ("core", "0020_auto_20210410_1031"),
+        ("core", "0021_auto_20220914_0934"),
+        ("core", "0022_auto_20231023_2008"),
    ]

    for app, name in migrations:
-        cursor.execute("""
+        cursor.execute(
+            """
            INSERT INTO django_migrations (app, name, applied)
            VALUES (?, ?, datetime('now'))
-        """, (app, name))
+        """,
+            (app, name),
+        )

    conn.commit()
    conn.close()
@@ -698,17 +723,17 @@ def seed_0_7_data(db_path: Path) -> Dict[str, List[Dict]]:
    return created_data


-def seed_0_8_data(db_path: Path) -> Dict[str, List[Dict]]:
+def seed_0_8_data(db_path: Path) -> dict[str, list[dict]]:
    """Seed a 0.8.x database with realistic test data including Crawls."""
    conn = sqlite3.connect(str(db_path))
    cursor = conn.cursor()

    created_data = {
-        'users': [],
-        'crawls': [],
-        'snapshots': [],
-        'tags': [],
-        'archiveresults': [],
+        "users": [],
+        "crawls": [],
+        "snapshots": [],
+        "tags": [],
+        "archiveresults": [],
    }

    # Create a user
@@ -719,243 +744,271 @@ def seed_0_8_data(db_path: Path) -> Dict[str, List[Dict]]:
                'admin@example.com', 1, 1, datetime('now'))
    """)
    user_id = cursor.lastrowid
-    created_data['users'].append({'id': user_id, 'username': 'admin'})
+    created_data["users"].append({"id": user_id, "username": "admin"})

    # Create 5 tags
-    tag_names = ['news', 'tech', 'blog', 'reference', 'code']
+    tag_names = ["news", "tech", "blog", "reference", "code"]
    for name in tag_names:
-        cursor.execute("""
+        cursor.execute(
+            """
            INSERT INTO core_tag (name, slug, created_at, modified_at, created_by_id)
            VALUES (?, ?, datetime('now'), datetime('now'), ?)
-        """, (name, name.lower(), user_id))
+        """,
+            (name, name.lower(), user_id),
+        )
        tag_id = cursor.lastrowid
-        created_data['tags'].append({'id': tag_id, 'name': name, 'slug': name.lower()})
+        created_data["tags"].append({"id": tag_id, "name": name, "slug": name.lower()})

    # Create 2 Crawls (0.9.0 schema - no seeds)
    test_crawls = [
-        ('https://example.com\nhttps://example.org', 0, 'Example Crawl'),
-        ('https://github.com/ArchiveBox', 1, 'GitHub Crawl'),
+        ("https://example.com\nhttps://example.org", 0, "Example Crawl"),
+        ("https://github.com/ArchiveBox", 1, "GitHub Crawl"),
    ]

    for i, (urls, max_depth, label) in enumerate(test_crawls):
        crawl_id = generate_uuid()
-        cursor.execute("""
+        cursor.execute(
+            """
            INSERT INTO crawls_crawl (id, created_at, created_by_id, modified_at, urls,
                                      config, max_depth, tags_str, label, status, retry_at,
                                      num_uses_failed, num_uses_succeeded)
            VALUES (?, datetime('now'), ?, datetime('now'), ?, '{}', ?, '', ?, 'queued', datetime('now'), 0, 0)
-        """, (crawl_id, user_id, urls, max_depth, label))
+        """,
+            (crawl_id, user_id, urls, max_depth, label),
+        )

-        created_data['crawls'].append({
-            'id': crawl_id,
-            'urls': urls,
-            'max_depth': max_depth,
-            'label': label,
-        })
+        created_data["crawls"].append(
+            {
+                "id": crawl_id,
+                "urls": urls,
+                "max_depth": max_depth,
+                "label": label,
+            },
+        )

    # Create 5 snapshots linked to crawls
    test_urls = [
-        ('https://example.com/page1', 'Example Page 1', created_data['crawls'][0]['id']),
-        ('https://example.org/article', 'Article Title', created_data['crawls'][0]['id']),
-        ('https://github.com/user/repo', 'GitHub Repository', created_data['crawls'][1]['id']),
-        ('https://news.ycombinator.com/item?id=12345', 'HN Discussion', None),
-        ('https://en.wikipedia.org/wiki/Test', 'Wikipedia Test', None),
+        ("https://example.com/page1", "Example Page 1", created_data["crawls"][0]["id"]),
+        ("https://example.org/article", "Article Title", created_data["crawls"][0]["id"]),
+        ("https://github.com/user/repo", "GitHub Repository", created_data["crawls"][1]["id"]),
+        ("https://news.ycombinator.com/item?id=12345", "HN Discussion", None),
+        ("https://en.wikipedia.org/wiki/Test", "Wikipedia Test", None),
    ]

    for i, (url, title, crawl_id) in enumerate(test_urls):
        snapshot_id = generate_uuid()
-        timestamp = f'2024010{i+1}120000.000000'
-        created_at = f'2024-01-0{i+1} 12:00:00'
+        timestamp = f"2024010{i + 1}120000.000000"
+        created_at = f"2024-01-0{i + 1} 12:00:00"

-        cursor.execute("""
+        cursor.execute(
+            """
            INSERT INTO core_snapshot (id, created_by_id, created_at, modified_at, url, timestamp,
                                       bookmarked_at, crawl_id, title, depth, status, config, notes)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, 0, 'queued', '{}', '')
-        """, (snapshot_id, user_id, created_at, created_at, url, timestamp, created_at, crawl_id, title))
+        """,
+            (snapshot_id, user_id, created_at, created_at, url, timestamp, created_at, crawl_id, title),
+        )

-        created_data['snapshots'].append({
-            'id': snapshot_id,
-            'url': url,
-            'timestamp': timestamp,
-            'title': title,
-            'crawl_id': crawl_id,
-        })
+        created_data["snapshots"].append(
+            {
+                "id": snapshot_id,
+                "url": url,
+                "timestamp": timestamp,
+                "title": title,
+                "crawl_id": crawl_id,
+            },
+        )

        # Assign 2 tags to each snapshot
-        tag_ids = [created_data['tags'][i % 5]['id'], created_data['tags'][(i + 1) % 5]['id']]
+        tag_ids = [created_data["tags"][i % 5]["id"], created_data["tags"][(i + 1) % 5]["id"]]
        for tag_id in tag_ids:
-            cursor.execute("""
+            cursor.execute(
+                """
                INSERT INTO core_snapshot_tags (snapshot_id, tag_id) VALUES (?, ?)
-            """, (snapshot_id, tag_id))
+            """,
+                (snapshot_id, tag_id),
+            )

        # Create 5 archive results for each snapshot
-        extractors = ['title', 'favicon', 'screenshot', 'singlefile', 'wget']
-        statuses = ['succeeded', 'succeeded', 'failed', 'succeeded', 'skipped']
+        extractors = ["title", "favicon", "screenshot", "singlefile", "wget"]
+        statuses = ["succeeded", "succeeded", "failed", "succeeded", "skipped"]

        for j, (extractor, status) in enumerate(zip(extractors, statuses)):
            result_uuid = generate_uuid()
-            cursor.execute("""
+            cursor.execute(
+                """
                INSERT INTO core_archiveresult
                (uuid, created_by_id, created_at, modified_at, snapshot_id, extractor, pwd,
                 cmd, cmd_version, output, start_ts, end_ts, status, retry_at, notes, output_dir)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'), '', ?)
-            """, (
-                result_uuid, user_id, f'2024-01-0{i+1} 12:00:0{j}', f'2024-01-0{i+1} 12:00:1{j}',
-                snapshot_id, extractor,
-                f'/data/archive/{timestamp}',
-                json.dumps([extractor, '--version']),
-                '1.0.0',
-                f'{extractor}/index.html' if status == 'succeeded' else '',
-                f'2024-01-0{i+1} 12:00:0{j}',
-                f'2024-01-0{i+1} 12:00:1{j}',
-                status,
-                f'{extractor}',
-            ))
+            """,
+                (
+                    result_uuid,
+                    user_id,
+                    f"2024-01-0{i + 1} 12:00:0{j}",
+                    f"2024-01-0{i + 1} 12:00:1{j}",
+                    snapshot_id,
+                    extractor,
+                    f"/data/archive/{timestamp}",
+                    json.dumps([extractor, "--version"]),
+                    "1.0.0",
+                    f"{extractor}/index.html" if status == "succeeded" else "",
+                    f"2024-01-0{i + 1} 12:00:0{j}",
+                    f"2024-01-0{i + 1} 12:00:1{j}",
+                    status,
+                    f"{extractor}",
+                ),
+            )

-            created_data['archiveresults'].append({
-                'uuid': result_uuid,
-                'snapshot_id': snapshot_id,
-                'extractor': extractor,
-                'status': status,
-            })
+            created_data["archiveresults"].append(
+                {
+                    "uuid": result_uuid,
+                    "snapshot_id": snapshot_id,
+                    "extractor": extractor,
+                    "status": status,
+                },
+            )

    # Record migrations as applied (0.8.x migrations)
    migrations = [
-        ('contenttypes', '0001_initial'),
-        ('contenttypes', '0002_remove_content_type_name'),
-        ('auth', '0001_initial'),
-        ('auth', '0002_alter_permission_name_max_length'),
-        ('auth', '0003_alter_user_email_max_length'),
-        ('auth', '0004_alter_user_username_opts'),
-        ('auth', '0005_alter_user_last_login_null'),
-        ('auth', '0006_require_contenttypes_0002'),
-        ('auth', '0007_alter_validators_add_error_messages'),
-        ('auth', '0008_alter_user_username_max_length'),
-        ('auth', '0009_alter_user_last_name_max_length'),
-        ('auth', '0010_alter_group_name_max_length'),
-        ('auth', '0011_update_proxy_permissions'),
-        ('auth', '0012_alter_user_first_name_max_length'),
-        ('admin', '0001_initial'),
-        ('admin', '0002_logentry_remove_auto_add'),
-        ('admin', '0003_logentry_add_action_flag_choices'),
-        ('sessions', '0001_initial'),
-        ('core', '0001_initial'),
-        ('core', '0002_auto_20200625_1521'),
-        ('core', '0003_auto_20200630_1034'),
-        ('core', '0004_auto_20200713_1552'),
-        ('core', '0005_auto_20200728_0326'),
-        ('core', '0006_auto_20201012_1520'),
-        ('core', '0007_archiveresult'),
-        ('core', '0008_auto_20210105_1421'),
-        ('core', '0009_auto_20210216_1038'),
-        ('core', '0010_auto_20210216_1055'),
-        ('core', '0011_auto_20210216_1331'),
-        ('core', '0012_auto_20210216_1425'),
-        ('core', '0013_auto_20210218_0729'),
-        ('core', '0014_auto_20210218_0729'),
-        ('core', '0015_auto_20210218_0730'),
-        ('core', '0016_auto_20210218_1204'),
-        ('core', '0017_auto_20210219_0211'),
-        ('core', '0018_auto_20210327_0952'),
-        ('core', '0019_auto_20210401_0654'),
-        ('core', '0020_auto_20210410_1031'),
-        ('core', '0021_auto_20220914_0934'),
-        ('core', '0022_auto_20231023_2008'),
+        ("contenttypes", "0001_initial"),
+        ("contenttypes", "0002_remove_content_type_name"),
+        ("auth", "0001_initial"),
+        ("auth", "0002_alter_permission_name_max_length"),
+        ("auth", "0003_alter_user_email_max_length"),
+        ("auth", "0004_alter_user_username_opts"),
+        ("auth", "0005_alter_user_last_login_null"),
+        ("auth", "0006_require_contenttypes_0002"),
+        ("auth", "0007_alter_validators_add_error_messages"),
+        ("auth", "0008_alter_user_username_max_length"),
+        ("auth", "0009_alter_user_last_name_max_length"),
+        ("auth", "0010_alter_group_name_max_length"),
+        ("auth", "0011_update_proxy_permissions"),
+        ("auth", "0012_alter_user_first_name_max_length"),
+        ("admin", "0001_initial"),
+        ("admin", "0002_logentry_remove_auto_add"),
+        ("admin", "0003_logentry_add_action_flag_choices"),
+        ("sessions", "0001_initial"),
+        ("core", "0001_initial"),
+        ("core", "0002_auto_20200625_1521"),
+        ("core", "0003_auto_20200630_1034"),
+        ("core", "0004_auto_20200713_1552"),
+        ("core", "0005_auto_20200728_0326"),
+        ("core", "0006_auto_20201012_1520"),
+        ("core", "0007_archiveresult"),
+        ("core", "0008_auto_20210105_1421"),
+        ("core", "0009_auto_20210216_1038"),
+        ("core", "0010_auto_20210216_1055"),
+        ("core", "0011_auto_20210216_1331"),
+        ("core", "0012_auto_20210216_1425"),
+        ("core", "0013_auto_20210218_0729"),
+        ("core", "0014_auto_20210218_0729"),
+        ("core", "0015_auto_20210218_0730"),
+        ("core", "0016_auto_20210218_1204"),
+        ("core", "0017_auto_20210219_0211"),
+        ("core", "0018_auto_20210327_0952"),
+        ("core", "0019_auto_20210401_0654"),
+        ("core", "0020_auto_20210410_1031"),
+        ("core", "0021_auto_20220914_0934"),
+        ("core", "0022_auto_20231023_2008"),
        # For 0.8.x (dev branch), record the migrations that 0023_new_schema replaces
-        ('core', '0023_alter_archiveresult_options_archiveresult_abid_and_more'),
-        ('core', '0024_auto_20240513_1143'),
-        ('core', '0025_alter_archiveresult_uuid'),
-        ('core', '0026_archiveresult_created_archiveresult_created_by_and_more'),
-        ('core', '0027_update_snapshot_ids'),
-        ('core', '0028_alter_archiveresult_uuid'),
-        ('core', '0029_alter_archiveresult_id'),
-        ('core', '0030_alter_archiveresult_uuid'),
-        ('core', '0031_alter_archiveresult_id_alter_archiveresult_uuid_and_more'),
-        ('core', '0032_alter_archiveresult_id'),
-        ('core', '0033_rename_id_archiveresult_old_id'),
-        ('core', '0034_alter_archiveresult_old_id_alter_archiveresult_uuid'),
-        ('core', '0035_remove_archiveresult_uuid_archiveresult_id'),
-        ('core', '0036_alter_archiveresult_id_alter_archiveresult_old_id'),
-        ('core', '0037_rename_id_snapshot_old_id'),
-        ('core', '0038_rename_uuid_snapshot_id'),
-        ('core', '0039_rename_snapshot_archiveresult_snapshot_old'),
-        ('core', '0040_archiveresult_snapshot'),
-        ('core', '0041_alter_archiveresult_snapshot_and_more'),
-        ('core', '0042_remove_archiveresult_snapshot_old'),
-        ('core', '0043_alter_archiveresult_snapshot_alter_snapshot_id_and_more'),
-        ('core', '0044_alter_archiveresult_snapshot_alter_tag_uuid_and_more'),
-        ('core', '0045_alter_snapshot_old_id'),
-        ('core', '0046_alter_archiveresult_snapshot_alter_snapshot_id_and_more'),
-        ('core', '0047_alter_snapshottag_unique_together_and_more'),
-        ('core', '0048_alter_archiveresult_snapshot_and_more'),
-        ('core', '0049_rename_snapshot_snapshottag_snapshot_old_and_more'),
-        ('core', '0050_alter_snapshottag_snapshot_old'),
-        ('core', '0051_snapshottag_snapshot_alter_snapshottag_snapshot_old'),
-        ('core', '0052_alter_snapshottag_unique_together_and_more'),
-        ('core', '0053_remove_snapshottag_snapshot_old'),
-        ('core', '0054_alter_snapshot_timestamp'),
-        ('core', '0055_alter_tag_slug'),
-        ('core', '0056_remove_tag_uuid'),
-        ('core', '0057_rename_id_tag_old_id'),
-        ('core', '0058_alter_tag_old_id'),
-        ('core', '0059_tag_id'),
-        ('core', '0060_alter_tag_id'),
-        ('core', '0061_rename_tag_snapshottag_old_tag_and_more'),
-        ('core', '0062_alter_snapshottag_old_tag'),
-        ('core', '0063_snapshottag_tag_alter_snapshottag_old_tag'),
-        ('core', '0064_alter_snapshottag_unique_together_and_more'),
-        ('core', '0065_remove_snapshottag_old_tag'),
-        ('core', '0066_alter_snapshottag_tag_alter_tag_id_alter_tag_old_id'),
-        ('core', '0067_alter_snapshottag_tag'),
-        ('core', '0068_alter_archiveresult_options'),
-        ('core', '0069_alter_archiveresult_created_alter_snapshot_added_and_more'),
-        ('core', '0070_alter_archiveresult_created_by_alter_snapshot_added_and_more'),
-        ('core', '0071_remove_archiveresult_old_id_remove_snapshot_old_id_and_more'),
-        ('core', '0072_rename_added_snapshot_bookmarked_at_and_more'),
-        ('core', '0073_rename_created_archiveresult_created_at_and_more'),
-        ('core', '0074_alter_snapshot_downloaded_at'),
+        ("core", "0023_alter_archiveresult_options_archiveresult_abid_and_more"),
+        ("core", "0024_auto_20240513_1143"),
+        ("core", "0025_alter_archiveresult_uuid"),
+        ("core", "0026_archiveresult_created_archiveresult_created_by_and_more"),
+        ("core", "0027_update_snapshot_ids"),
+        ("core", "0028_alter_archiveresult_uuid"),
+        ("core", "0029_alter_archiveresult_id"),
+        ("core", "0030_alter_archiveresult_uuid"),
+        ("core", "0031_alter_archiveresult_id_alter_archiveresult_uuid_and_more"),
+        ("core", "0032_alter_archiveresult_id"),
+        ("core", "0033_rename_id_archiveresult_old_id"),
+        ("core", "0034_alter_archiveresult_old_id_alter_archiveresult_uuid"),
+        ("core", "0035_remove_archiveresult_uuid_archiveresult_id"),
+        ("core", "0036_alter_archiveresult_id_alter_archiveresult_old_id"),
+        ("core", "0037_rename_id_snapshot_old_id"),
+        ("core", "0038_rename_uuid_snapshot_id"),
+        ("core", "0039_rename_snapshot_archiveresult_snapshot_old"),
+        ("core", "0040_archiveresult_snapshot"),
+        ("core", "0041_alter_archiveresult_snapshot_and_more"),
+        ("core", "0042_remove_archiveresult_snapshot_old"),
+        ("core", "0043_alter_archiveresult_snapshot_alter_snapshot_id_and_more"),
+        ("core", "0044_alter_archiveresult_snapshot_alter_tag_uuid_and_more"),
+        ("core", "0045_alter_snapshot_old_id"),
+        ("core", "0046_alter_archiveresult_snapshot_alter_snapshot_id_and_more"),
+        ("core", "0047_alter_snapshottag_unique_together_and_more"),
+        ("core", "0048_alter_archiveresult_snapshot_and_more"),
+        ("core", "0049_rename_snapshot_snapshottag_snapshot_old_and_more"),
+        ("core", "0050_alter_snapshottag_snapshot_old"),
+        ("core", "0051_snapshottag_snapshot_alter_snapshottag_snapshot_old"),
+        ("core", "0052_alter_snapshottag_unique_together_and_more"),
+        ("core", "0053_remove_snapshottag_snapshot_old"),
+        ("core", "0054_alter_snapshot_timestamp"),
+        ("core", "0055_alter_tag_slug"),
+        ("core", "0056_remove_tag_uuid"),
+        ("core", "0057_rename_id_tag_old_id"),
+        ("core", "0058_alter_tag_old_id"),
+        ("core", "0059_tag_id"),
+        ("core", "0060_alter_tag_id"),
+        ("core", "0061_rename_tag_snapshottag_old_tag_and_more"),
+        ("core", "0062_alter_snapshottag_old_tag"),
+        ("core", "0063_snapshottag_tag_alter_snapshottag_old_tag"),
+        ("core", "0064_alter_snapshottag_unique_together_and_more"),
+        ("core", "0065_remove_snapshottag_old_tag"),
+        ("core", "0066_alter_snapshottag_tag_alter_tag_id_alter_tag_old_id"),
+        ("core", "0067_alter_snapshottag_tag"),
+        ("core", "0068_alter_archiveresult_options"),
+        ("core", "0069_alter_archiveresult_created_alter_snapshot_added_and_more"),
+        ("core", "0070_alter_archiveresult_created_by_alter_snapshot_added_and_more"),
+        ("core", "0071_remove_archiveresult_old_id_remove_snapshot_old_id_and_more"),
+        ("core", "0072_rename_added_snapshot_bookmarked_at_and_more"),
+        ("core", "0073_rename_created_archiveresult_created_at_and_more"),
+        ("core", "0074_alter_snapshot_downloaded_at"),
        # For 0.8.x: DO NOT record 0023_new_schema - it replaces 0023-0074 for fresh installs
        # We already recorded 0023-0074 above, so Django will know the state
        # For 0.8.x: Record original machine migrations (before squashing)
        # DO NOT record 0001_squashed here - it replaces 0001-0004 for fresh installs
-        ('machine', '0001_initial'),
-        ('machine', '0002_alter_machine_stats_installedbinary'),
-        ('machine', '0003_alter_installedbinary_options_and_more'),
-        ('machine', '0004_alter_installedbinary_abspath_and_more'),
+        ("machine", "0001_initial"),
+        ("machine", "0002_alter_machine_stats_installedbinary"),
+        ("machine", "0003_alter_installedbinary_options_and_more"),
+        ("machine", "0004_alter_installedbinary_abspath_and_more"),
        # Then the new migrations after squashing
-        ('machine', '0002_rename_custom_cmds_to_overrides'),
-        ('machine', '0003_alter_dependency_id_alter_installedbinary_dependency_and_more'),
-        ('machine', '0004_drop_dependency_table'),
+        ("machine", "0002_rename_custom_cmds_to_overrides"),
+        ("machine", "0003_alter_dependency_id_alter_installedbinary_dependency_and_more"),
+        ("machine", "0004_drop_dependency_table"),
        # Crawls must come before core.0024 because 0024_b depends on it
-        ('crawls', '0001_initial'),
+        ("crawls", "0001_initial"),
        # Core 0024 migrations chain (in dependency order)
-        ('core', '0024_b_clear_config_fields'),
-        ('core', '0024_c_disable_fk_checks'),
-        ('core', '0024_d_fix_crawls_config'),
-        ('core', '0024_snapshot_crawl'),
-        ('core', '0024_f_add_snapshot_config'),
-        ('core', '0025_allow_duplicate_urls_per_crawl'),
+        ("core", "0024_b_clear_config_fields"),
+        ("core", "0024_c_disable_fk_checks"),
+        ("core", "0024_d_fix_crawls_config"),
+        ("core", "0024_snapshot_crawl"),
+        ("core", "0024_f_add_snapshot_config"),
+        ("core", "0025_allow_duplicate_urls_per_crawl"),
        # For 0.8.x: Record original api migration (before squashing)
        # DO NOT record 0001_squashed here - it replaces 0001 for fresh installs
-        ('api', '0001_initial'),
-        ('api', '0002_alter_apitoken_options'),
-        ('api', '0003_rename_user_apitoken_created_by_apitoken_abid_and_more'),
-        ('api', '0004_alter_apitoken_id_alter_apitoken_uuid'),
-        ('api', '0005_remove_apitoken_uuid_remove_outboundwebhook_uuid_and_more'),
-        ('api', '0006_remove_outboundwebhook_uuid_apitoken_id_and_more'),
-        ('api', '0007_alter_apitoken_created_by'),
-        ('api', '0008_alter_apitoken_created_alter_apitoken_created_by_and_more'),
-        ('api', '0009_rename_created_apitoken_created_at_and_more'),
+        ("api", "0001_initial"),
+        ("api", "0002_alter_apitoken_options"),
+        ("api", "0003_rename_user_apitoken_created_by_apitoken_abid_and_more"),
+        ("api", "0004_alter_apitoken_id_alter_apitoken_uuid"),
+        ("api", "0005_remove_apitoken_uuid_remove_outboundwebhook_uuid_and_more"),
+        ("api", "0006_remove_outboundwebhook_uuid_apitoken_id_and_more"),
+        ("api", "0007_alter_apitoken_created_by"),
+        ("api", "0008_alter_apitoken_created_alter_apitoken_created_by_and_more"),
+        ("api", "0009_rename_created_apitoken_created_at_and_more"),
        # Note: crawls.0001_initial moved earlier (before core.0024) due to dependencies
        # Stop here - 0.8.x ends at core.0025, crawls.0001, and we want to TEST the later migrations
        # Do NOT record 0026+ as they need to be tested during migration
    ]

    for app, name in migrations:
-        cursor.execute("""
+        cursor.execute(
+            """
            INSERT INTO django_migrations (app, name, applied)
            VALUES (?, ?, datetime('now'))
-        """, (app, name))
+        """,
+            (app, name),
+        )

    conn.commit()
    conn.close()
@@ -967,33 +1020,34 @@ def seed_0_8_data(db_path: Path) -> Dict[str, List[Dict]]:
 # Helper Functions
 # =============================================================================

+
 def run_archivebox(data_dir: Path, args: list, timeout: int = 60, env: dict | None = None) -> subprocess.CompletedProcess:
    """Run archivebox command in subprocess with given data directory."""
    base_env = os.environ.copy()
-    base_env['DATA_DIR'] = str(data_dir)
-    base_env['USE_COLOR'] = 'False'
-    base_env['SHOW_PROGRESS'] = 'False'
+    base_env["DATA_DIR"] = str(data_dir)
+    base_env["USE_COLOR"] = "False"
+    base_env["SHOW_PROGRESS"] = "False"
    # Disable ALL extractors for faster tests (can be overridden by env parameter)
-    base_env['SAVE_ARCHIVEDOTORG'] = 'False'
-    base_env['SAVE_TITLE'] = 'False'
-    base_env['SAVE_FAVICON'] = 'False'
-    base_env['SAVE_WGET'] = 'False'
-    base_env['SAVE_SINGLEFILE'] = 'False'
-    base_env['SAVE_SCREENSHOT'] = 'False'
-    base_env['SAVE_PDF'] = 'False'
-    base_env['SAVE_DOM'] = 'False'
-    base_env['SAVE_READABILITY'] = 'False'
-    base_env['SAVE_MERCURY'] = 'False'
-    base_env['SAVE_GIT'] = 'False'
-    base_env['SAVE_YTDLP'] = 'False'
-    base_env['SAVE_HEADERS'] = 'False'
-    base_env['SAVE_HTMLTOTEXT'] = 'False'
+    base_env["SAVE_ARCHIVEDOTORG"] = "False"
+    base_env["SAVE_TITLE"] = "False"
+    base_env["SAVE_FAVICON"] = "False"
+    base_env["SAVE_WGET"] = "False"
+    base_env["SAVE_SINGLEFILE"] = "False"
+    base_env["SAVE_SCREENSHOT"] = "False"
+    base_env["SAVE_PDF"] = "False"
+    base_env["SAVE_DOM"] = "False"
+    base_env["SAVE_READABILITY"] = "False"
+    base_env["SAVE_MERCURY"] = "False"
+    base_env["SAVE_GIT"] = "False"
+    base_env["SAVE_YTDLP"] = "False"
+    base_env["SAVE_HEADERS"] = "False"
+    base_env["SAVE_HTMLTOTEXT"] = "False"

    # Override with any custom env vars
    if env:
        base_env.update(env)

-    cmd = [sys.executable, '-m', 'archivebox'] + args
+    cmd = [sys.executable, "-m", "archivebox"] + args

    return subprocess.run(
        cmd,
@@ -1007,12 +1061,12 @@ def run_archivebox(data_dir: Path, args: list, timeout: int = 60, env: dict | No

 def create_data_dir_structure(data_dir: Path):
    """Create the basic ArchiveBox data directory structure."""
-    (data_dir / 'archive').mkdir(parents=True, exist_ok=True)
-    (data_dir / 'sources').mkdir(parents=True, exist_ok=True)
-    (data_dir / 'logs').mkdir(parents=True, exist_ok=True)
+    (data_dir / "archive").mkdir(parents=True, exist_ok=True)
+    (data_dir / "sources").mkdir(parents=True, exist_ok=True)
+    (data_dir / "logs").mkdir(parents=True, exist_ok=True)


-def verify_snapshot_count(db_path: Path, expected: int) -> Tuple[bool, str]:
+def verify_snapshot_count(db_path: Path, expected: int) -> tuple[bool, str]:
    """Verify the number of snapshots in the database."""
    conn = sqlite3.connect(str(db_path))
    cursor = conn.cursor()
@@ -1025,7 +1079,7 @@ def verify_snapshot_count(db_path: Path, expected: int) -> Tuple[bool, str]:
    return False, f"Snapshot count mismatch: expected {expected}, got {count}"


-def verify_tag_count(db_path: Path, expected: int) -> Tuple[bool, str]:
+def verify_tag_count(db_path: Path, expected: int) -> tuple[bool, str]:
    """Verify the number of tags in the database (exact match)."""
    conn = sqlite3.connect(str(db_path))
    cursor = conn.cursor()
@@ -1038,7 +1092,7 @@ def verify_tag_count(db_path: Path, expected: int) -> Tuple[bool, str]:
    return False, f"Tag count mismatch: expected {expected}, got {count}"


-def verify_archiveresult_count(db_path: Path, expected: int) -> Tuple[bool, str]:
+def verify_archiveresult_count(db_path: Path, expected: int) -> tuple[bool, str]:
    """Verify the number of archive results in the database."""
    conn = sqlite3.connect(str(db_path))
    cursor = conn.cursor()
@@ -1051,7 +1105,7 @@ def verify_archiveresult_count(db_path: Path, expected: int) -> Tuple[bool, str]
    return False, f"ArchiveResult count mismatch: expected {expected}, got {count}"


-def verify_snapshot_urls(db_path: Path, expected_urls: List[str]) -> Tuple[bool, str]:
+def verify_snapshot_urls(db_path: Path, expected_urls: list[str]) -> tuple[bool, str]:
    """Verify ALL expected URLs exist in snapshots."""
    conn = sqlite3.connect(str(db_path))
    cursor = conn.cursor()
@@ -1065,7 +1119,7 @@ def verify_snapshot_urls(db_path: Path, expected_urls: List[str]) -> Tuple[bool,
    return False, f"Missing URLs: {missing}"


-def verify_snapshot_titles(db_path: Path, expected_titles: Dict[str, str]) -> Tuple[bool, str]:
+def verify_snapshot_titles(db_path: Path, expected_titles: dict[str, str]) -> tuple[bool, str]:
    """Verify ALL snapshot titles are preserved."""
    conn = sqlite3.connect(str(db_path))
    cursor = conn.cursor()
@@ -1085,7 +1139,7 @@ def verify_snapshot_titles(db_path: Path, expected_titles: Dict[str, str]) -> Tu
    return False, f"Title mismatches: {mismatches}"


-def verify_foreign_keys(db_path: Path) -> Tuple[bool, str]:
+def verify_foreign_keys(db_path: Path) -> tuple[bool, str]:
    """Verify foreign key relationships are intact."""
    conn = sqlite3.connect(str(db_path))
    cursor = conn.cursor()
@@ -1104,21 +1158,21 @@ def verify_foreign_keys(db_path: Path) -> Tuple[bool, str]:
    return False, f"Found {orphaned_results} orphaned ArchiveResults"


-def verify_all_snapshots_in_output(output: str, snapshots: List[Dict]) -> Tuple[bool, str]:
+def verify_all_snapshots_in_output(output: str, snapshots: list[dict]) -> tuple[bool, str]:
    """Verify ALL snapshots appear in command output (not just one)."""
    missing = []
    for snapshot in snapshots:
-        url_fragment = snapshot['url'][:30]
-        title = snapshot.get('title', '')
+        url_fragment = snapshot["url"][:30]
+        title = snapshot.get("title", "")
        if url_fragment not in output and (not title or title not in output):
-            missing.append(snapshot['url'])
+            missing.append(snapshot["url"])

    if not missing:
        return True, "All snapshots found in output"
    return False, f"Missing snapshots in output: {missing}"


-def verify_crawl_count(db_path: Path, expected: int) -> Tuple[bool, str]:
+def verify_crawl_count(db_path: Path, expected: int) -> tuple[bool, str]:
    """Verify the number of crawls in the database."""
    conn = sqlite3.connect(str(db_path))
    cursor = conn.cursor()
@@ -1131,7 +1185,7 @@ def verify_crawl_count(db_path: Path, expected: int) -> Tuple[bool, str]:
    return False, f"Crawl count mismatch: expected {expected}, got {count}"


-def verify_process_migration(db_path: Path, expected_archiveresult_count: int) -> Tuple[bool, str]:
+def verify_process_migration(db_path: Path, expected_archiveresult_count: int) -> tuple[bool, str]:
    """
    Verify that ArchiveResults were properly migrated to Process records.

@@ -1170,13 +1224,13 @@ def verify_process_migration(db_path: Path, expected_archiveresult_count: int) -
    status_errors = []
    for ar_status, p_status, p_exit_code in cursor.fetchall():
        expected_p_status, expected_exit_code = {
-            'queued': ('queued', None),
-            'started': ('running', None),
-            'backoff': ('queued', None),
-            'succeeded': ('exited', 0),
-            'failed': ('exited', 1),
-            'skipped': ('exited', None),
-        }.get(ar_status, ('queued', None))
+            "queued": ("queued", None),
+            "started": ("running", None),
+            "backoff": ("queued", None),
+            "succeeded": ("exited", 0),
+            "failed": ("exited", 1),
+            "skipped": ("exited", None),
+        }.get(ar_status, ("queued", None))

        if p_status != expected_p_status:
            status_errors.append(f"AR status {ar_status} → Process {p_status}, expected {expected_p_status}")
--- a/archivebox/tests/test_add_view.py
+++ b/archivebox/tests/test_add_view.py
@@ -12,48 +12,50 @@ from archivebox.crawls.models import Crawl
 pytestmark = pytest.mark.django_db

 User = get_user_model()
-WEB_HOST = 'web.archivebox.localhost:8000'
-ADMIN_HOST = 'admin.archivebox.localhost:8000'
+WEB_HOST = "web.archivebox.localhost:8000"
+ADMIN_HOST = "admin.archivebox.localhost:8000"


@pytest.fixture
 def admin_user(db):
    return User.objects.create_superuser(
-        username='addviewadmin',
-        email='addviewadmin@test.com',
-        password='testpassword',
+        username="addviewadmin",
+        email="addviewadmin@test.com",
+        password="testpassword",
    )


 def test_add_view_renders_tag_editor_and_url_filter_fields(client, admin_user, monkeypatch):
-    monkeypatch.setattr(SERVER_CONFIG, 'PUBLIC_ADD_VIEW', True)
+    monkeypatch.setattr(SERVER_CONFIG, "PUBLIC_ADD_VIEW", True)

-    response = client.get(reverse('add'), HTTP_HOST=WEB_HOST)
+    response = client.get(reverse("add"), HTTP_HOST=WEB_HOST)
    body = response.content.decode()

    assert response.status_code == 200
-    assert 'tag-editor-container' in body
+    assert "tag-editor-container" in body
    assert 'name="url_filters_allowlist"' in body
    assert 'name="url_filters_denylist"' in body
-    assert 'Same domain only' in body
+    assert "Same domain only" in body
    assert 'name="persona"' in body
-    assert 'Overwrite existing snapshots' not in body
-    assert 'Update/retry previously failed URLs' not in body
-    assert 'Index only dry run (add crawl but don&#x27;t archive yet)' in body
+    assert "Overwrite existing snapshots" not in body
+    assert "Update/retry previously failed URLs" not in body
+    assert "Index only dry run (add crawl but don&#x27;t archive yet)" in body
    assert 'name="notes"' in body
+    assert 'name="max_urls"' in body
+    assert 'name="max_size"' in body
    assert '<input type="text" name="notes"' in body
-    assert body.index('name="persona"') < body.index('<h3>Crawl Plugins</h3>')
-    assert 'data-url-regex=' in body
+    assert body.index('name="persona"') < body.index("<h3>Crawl Plugins</h3>")
+    assert "data-url-regex=" in body
    assert 'id="url-highlight-layer"' in body
    assert 'id="detected-urls-list"' in body
-    assert 'detected-url-toggle-btn' in body
+    assert "detected-url-toggle-btn" in body


 def test_add_view_checks_configured_search_backend_by_default(client, monkeypatch):
-    monkeypatch.setattr(SERVER_CONFIG, 'PUBLIC_ADD_VIEW', True)
-    monkeypatch.setattr(SEARCH_BACKEND_CONFIG, 'SEARCH_BACKEND_ENGINE', 'sqlite')
+    monkeypatch.setattr(SERVER_CONFIG, "PUBLIC_ADD_VIEW", True)
+    monkeypatch.setattr(SEARCH_BACKEND_CONFIG, "SEARCH_BACKEND_ENGINE", "sqlite")

-    response = client.get(reverse('add'), HTTP_HOST=WEB_HOST)
+    response = client.get(reverse("add"), HTTP_HOST=WEB_HOST)
    body = response.content.decode()

    assert response.status_code == 200
@@ -65,99 +67,181 @@ def test_add_view_checks_configured_search_backend_by_default(client, monkeypatc


 def test_add_view_creates_crawl_with_tag_and_url_filter_overrides(client, admin_user, monkeypatch):
-    monkeypatch.setattr(SERVER_CONFIG, 'PUBLIC_ADD_VIEW', True)
+    monkeypatch.setattr(SERVER_CONFIG, "PUBLIC_ADD_VIEW", True)
    client.force_login(admin_user)

    response = client.post(
-        reverse('add'),
+        reverse("add"),
        data={
-            'url': 'https://example.com\nhttps://cdn.example.com/asset.js',
-            'tag': 'alpha,beta',
-            'depth': '1',
-            'url_filters_allowlist': 'example.com\n*.example.com',
-            'url_filters_denylist': 'cdn.example.com',
-            'notes': 'Created from /add/',
-            'schedule': '',
-            'persona': 'Default',
-            'index_only': '',
-            'config': '{}',
+            "url": "https://example.com\nhttps://cdn.example.com/asset.js",
+            "tag": "alpha,beta",
+            "depth": "1",
+            "max_urls": "3",
+            "max_size": "45mb",
+            "url_filters_allowlist": "example.com\n*.example.com",
+            "url_filters_denylist": "cdn.example.com",
+            "notes": "Created from /add/",
+            "schedule": "",
+            "persona": "Default",
+            "index_only": "",
+            "config": "{}",
        },
        HTTP_HOST=WEB_HOST,
    )

    assert response.status_code == 302

-    crawl = Crawl.objects.order_by('-created_at').first()
+    crawl = Crawl.objects.order_by("-created_at").first()
    assert crawl is not None
-    assert crawl.tags_str == 'alpha,beta'
-    assert crawl.notes == 'Created from /add/'
-    assert crawl.config.get('DEFAULT_PERSONA') == 'Default'
-    assert crawl.config['URL_ALLOWLIST'] == 'example.com\n*.example.com'
-    assert crawl.config['URL_DENYLIST'] == 'cdn.example.com'
-    assert 'OVERWRITE' not in crawl.config
-    assert 'ONLY_NEW' not in crawl.config
+    assert crawl.tags_str == "alpha,beta"
+    assert crawl.notes == "Created from /add/"
+    assert crawl.max_urls == 3
+    assert crawl.max_size == 45 * 1024 * 1024
+    assert crawl.config.get("DEFAULT_PERSONA") == "Default"
+    assert crawl.config["MAX_URLS"] == 3
+    assert crawl.config["MAX_SIZE"] == 45 * 1024 * 1024
+    assert crawl.config["URL_ALLOWLIST"] == "example.com\n*.example.com"
+    assert crawl.config["URL_DENYLIST"] == "cdn.example.com"
+    assert "OVERWRITE" not in crawl.config
+    assert "ONLY_NEW" not in crawl.config
+
+
+def test_add_view_starts_background_runner_after_creating_crawl(client, admin_user, monkeypatch):
+    monkeypatch.setattr(SERVER_CONFIG, "PUBLIC_ADD_VIEW", True)
+    client.force_login(admin_user)
+
+    runner_calls = []
+    monkeypatch.setattr("archivebox.services.runner.ensure_background_runner", lambda: runner_calls.append(True) or True)
+
+    response = client.post(
+        reverse("add"),
+        data={
+            "url": "https://example.com",
+            "tag": "",
+            "depth": "0",
+            "max_urls": "0",
+            "max_size": "0",
+            "url_filters_allowlist": "",
+            "url_filters_denylist": "",
+            "notes": "",
+            "schedule": "",
+            "persona": "Default",
+            "index_only": "",
+            "config": "{}",
+        },
+        HTTP_HOST=WEB_HOST,
+    )
+
+    assert response.status_code == 302
+    assert runner_calls == [True]


 def test_add_view_extracts_urls_from_mixed_text_input(client, admin_user, monkeypatch):
-    monkeypatch.setattr(SERVER_CONFIG, 'PUBLIC_ADD_VIEW', True)
+    monkeypatch.setattr(SERVER_CONFIG, "PUBLIC_ADD_VIEW", True)
    client.force_login(admin_user)

    response = client.post(
-        reverse('add'),
+        reverse("add"),
        data={
-            'url': '\n'.join([
-                'https://sweeting.me,https://google.com',
-                'Notes: [ArchiveBox](https://github.com/ArchiveBox/ArchiveBox), https://news.ycombinator.com',
-                '[Wiki](https://en.wikipedia.org/wiki/Classification_(machine_learning))',
-                '{"items":["https://example.com/three"]}',
-                'csv,https://example.com/four',
-            ]),
-            'tag': '',
-            'depth': '0',
-            'url_filters_allowlist': '',
-            'url_filters_denylist': '',
-            'notes': '',
-            'schedule': '',
-            'persona': 'Default',
-            'index_only': '',
-            'config': '{}',
+            "url": "\n".join(
+                [
+                    "https://sweeting.me,https://google.com",
+                    "Notes: [ArchiveBox](https://github.com/ArchiveBox/ArchiveBox), https://news.ycombinator.com",
+                    "[Wiki](https://en.wikipedia.org/wiki/Classification_(machine_learning))",
+                    '{"items":["https://example.com/three"]}',
+                    "csv,https://example.com/four",
+                ],
+            ),
+            "tag": "",
+            "depth": "0",
+            "max_urls": "0",
+            "max_size": "0",
+            "url_filters_allowlist": "",
+            "url_filters_denylist": "",
+            "notes": "",
+            "schedule": "",
+            "persona": "Default",
+            "index_only": "",
+            "config": "{}",
        },
        HTTP_HOST=WEB_HOST,
    )

    assert response.status_code == 302

-    crawl = Crawl.objects.order_by('-created_at').first()
+    crawl = Crawl.objects.order_by("-created_at").first()
    assert crawl is not None
-    assert crawl.urls == '\n'.join([
-        'https://sweeting.me',
-        'https://google.com',
-        'https://github.com/ArchiveBox/ArchiveBox',
-        'https://news.ycombinator.com',
-        'https://en.wikipedia.org/wiki/Classification_(machine_learning)',
-        'https://example.com/three',
-        'https://example.com/four',
-    ])
+    assert crawl.urls == "\n".join(
+        [
+            "https://sweeting.me",
+            "https://google.com",
+            "https://github.com/ArchiveBox/ArchiveBox",
+            "https://news.ycombinator.com",
+            "https://en.wikipedia.org/wiki/Classification_(machine_learning)",
+            "https://example.com/three",
+            "https://example.com/four",
+        ],
+    )
+
+
+def test_add_view_trims_trailing_punctuation_from_markdown_urls(client, admin_user, monkeypatch):
+    monkeypatch.setattr(SERVER_CONFIG, "PUBLIC_ADD_VIEW", True)
+    client.force_login(admin_user)
+
+    response = client.post(
+        reverse("add"),
+        data={
+            "url": "\n".join(
+                [
+                    "Docs: https://github.com/ArchiveBox/ArchiveBox.",
+                    "Issue: https://github.com/abc?abc#234234?.",
+                ],
+            ),
+            "tag": "",
+            "depth": "0",
+            "max_urls": "0",
+            "max_size": "0",
+            "url_filters_allowlist": "",
+            "url_filters_denylist": "",
+            "notes": "",
+            "schedule": "",
+            "persona": "Default",
+            "index_only": "",
+            "config": "{}",
+        },
+        HTTP_HOST=WEB_HOST,
+    )
+
+    assert response.status_code == 302
+
+    crawl = Crawl.objects.order_by("-created_at").first()
+    assert crawl is not None
+    assert crawl.urls == "\n".join(
+        [
+            "https://github.com/ArchiveBox/ArchiveBox",
+            "https://github.com/abc?abc#234234",
+        ],
+    )


 def test_add_view_exposes_api_token_for_tag_widget_autocomplete(client, admin_user, monkeypatch):
-    monkeypatch.setattr(SERVER_CONFIG, 'PUBLIC_ADD_VIEW', True)
+    monkeypatch.setattr(SERVER_CONFIG, "PUBLIC_ADD_VIEW", True)
    client.force_login(admin_user)

-    response = client.get(reverse('add'), HTTP_HOST=WEB_HOST)
+    response = client.get(reverse("add"), HTTP_HOST=WEB_HOST)

    assert response.status_code == 200
-    assert b'window.ARCHIVEBOX_API_KEY' in response.content
+    assert b"window.ARCHIVEBOX_API_KEY" in response.content


 def test_tags_autocomplete_requires_auth_when_public_snapshots_list_disabled(client, settings):
    settings.PUBLIC_SNAPSHOTS_LIST = False
    settings.PUBLIC_INDEX = False
-    Tag.objects.create(name='archive')
+    Tag.objects.create(name="archive")

    response = client.get(
-        reverse('api-1:tags_autocomplete'),
-        {'q': 'a'},
+        reverse("api-1:tags_autocomplete"),
+        {"q": "a"},
        HTTP_HOST=ADMIN_HOST,
    )

@@ -167,29 +251,29 @@ def test_tags_autocomplete_requires_auth_when_public_snapshots_list_disabled(cli
 def test_tags_autocomplete_allows_public_access_when_public_snapshots_list_enabled(client, settings):
    settings.PUBLIC_SNAPSHOTS_LIST = True
    settings.PUBLIC_INDEX = False
-    Tag.objects.create(name='archive')
+    Tag.objects.create(name="archive")

    response = client.get(
-        reverse('api-1:tags_autocomplete'),
-        {'q': 'a'},
+        reverse("api-1:tags_autocomplete"),
+        {"q": "a"},
        HTTP_HOST=ADMIN_HOST,
    )

    assert response.status_code == 200
-    assert response.json()['tags'][0]['name'] == 'archive'
+    assert response.json()["tags"][0]["name"] == "archive"


 def test_tags_autocomplete_allows_authenticated_user_when_public_snapshots_list_disabled(client, admin_user, settings):
    settings.PUBLIC_SNAPSHOTS_LIST = False
    settings.PUBLIC_INDEX = False
-    Tag.objects.create(name='archive')
+    Tag.objects.create(name="archive")
    client.force_login(admin_user)

    response = client.get(
-        reverse('api-1:tags_autocomplete'),
-        {'q': 'a'},
+        reverse("api-1:tags_autocomplete"),
+        {"q": "a"},
        HTTP_HOST=ADMIN_HOST,
    )

    assert response.status_code == 200
-    assert response.json()['tags'][0]['name'] == 'archive'
+    assert response.json()["tags"][0]["name"] == "archive"
--- a/archivebox/tests/test_admin_config_widget.py
+++ b/archivebox/tests/test_admin_config_widget.py
@@ -4,83 +4,83 @@ from archivebox.base_models.admin import KeyValueWidget
 def test_key_value_widget_renders_enum_autocomplete_metadata(monkeypatch):
    monkeypatch.setattr(
        KeyValueWidget,
-        '_get_config_options',
+        "_get_config_options",
        lambda self: {
-            'CHROME_WAIT_FOR': {
-                'plugin': 'chrome',
-                'type': 'string',
-                'default': 'networkidle2',
-                'description': 'Page load completion condition',
-                'enum': ['domcontentloaded', 'load', 'networkidle0', 'networkidle2'],
+            "CHROME_WAIT_FOR": {
+                "plugin": "chrome",
+                "type": "string",
+                "default": "networkidle2",
+                "description": "Page load completion condition",
+                "enum": ["domcontentloaded", "load", "networkidle0", "networkidle2"],
            },
        },
    )

    html = str(
        KeyValueWidget().render(
-            'config',
-            {'CHROME_WAIT_FOR': 'load'},
-            attrs={'id': 'id_config'},
-        )
+            "config",
+            {"CHROME_WAIT_FOR": "load"},
+            attrs={"id": "id_config"},
+        ),
    )

    assert '"enum": ["domcontentloaded", "load", "networkidle0", "networkidle2"]' in html
    assert 'class="kv-value-options"' in html
    assert 'class="kv-help"' in html
-    assert 'configureValueInput_id_config' in html
-    assert 'describeMeta_id_config' in html
-    assert 'validateValueAgainstMeta_id_config' in html
+    assert "configureValueInput_id_config" in html
+    assert "describeMeta_id_config" in html
+    assert "validateValueAgainstMeta_id_config" in html


 def test_key_value_widget_renders_numeric_and_pattern_constraints(monkeypatch):
    monkeypatch.setattr(
        KeyValueWidget,
-        '_get_config_options',
+        "_get_config_options",
        lambda self: {
-            'TIMEOUT': {
-                'plugin': 'base',
-                'type': 'integer',
-                'default': 60,
-                'description': 'Timeout in seconds',
-                'minimum': 5,
-                'maximum': 120,
+            "TIMEOUT": {
+                "plugin": "base",
+                "type": "integer",
+                "default": 60,
+                "description": "Timeout in seconds",
+                "minimum": 5,
+                "maximum": 120,
            },
-            'CHROME_RESOLUTION': {
-                'plugin': 'chrome',
-                'type': 'string',
-                'default': '1440,2000',
-                'description': 'Viewport resolution',
-                'pattern': '^\\d+,\\d+$',
+            "CHROME_RESOLUTION": {
+                "plugin": "chrome",
+                "type": "string",
+                "default": "1440,2000",
+                "description": "Viewport resolution",
+                "pattern": "^\\d+,\\d+$",
            },
        },
    )

-    html = str(KeyValueWidget().render('config', {}, attrs={'id': 'id_config'}))
+    html = str(KeyValueWidget().render("config", {}, attrs={"id": "id_config"}))

    assert '"minimum": 5' in html
    assert '"maximum": 120' in html
    assert '"pattern": "^\\\\d+,\\\\d+$"' in html
-    assert 'Expected: ' in html
-    assert 'Example: ' in html
-    assert 'setValueValidationState_id_config' in html
-    assert 'coerceValueForStorage_id_config' in html
+    assert "Expected: " in html
+    assert "Example: " in html
+    assert "setValueValidationState_id_config" in html
+    assert "coerceValueForStorage_id_config" in html


 def test_key_value_widget_accepts_common_boolean_spellings(monkeypatch):
    monkeypatch.setattr(
        KeyValueWidget,
-        '_get_config_options',
+        "_get_config_options",
        lambda self: {
-            'DEBUG': {
-                'plugin': 'base',
-                'type': 'boolean',
-                'default': False,
-                'description': 'Enable debug mode',
+            "DEBUG": {
+                "plugin": "base",
+                "type": "boolean",
+                "default": False,
+                "description": "Enable debug mode",
            },
        },
    )

-    html = str(KeyValueWidget().render('config', {'DEBUG': 'True'}, attrs={'id': 'id_config'}))
+    html = str(KeyValueWidget().render("config", {"DEBUG": "True"}, attrs={"id": "id_config"}))

    assert "enumValues = ['True', 'False']" in html
    assert "raw.toLowerCase()" in html
@@ -91,35 +91,35 @@ def test_key_value_widget_accepts_common_boolean_spellings(monkeypatch):
 def test_key_value_widget_shows_array_and_object_examples_and_binary_rules(monkeypatch):
    monkeypatch.setattr(
        KeyValueWidget,
-        '_get_config_options',
+        "_get_config_options",
        lambda self: {
-            'WGET_ARGS_EXTRA': {
-                'plugin': 'wget',
-                'type': 'array',
-                'default': [],
-                'description': 'Extra arguments to append to wget command',
+            "WGET_ARGS_EXTRA": {
+                "plugin": "wget",
+                "type": "array",
+                "default": [],
+                "description": "Extra arguments to append to wget command",
            },
-            'SAVE_ALLOWLIST': {
-                'plugin': 'base',
-                'type': 'object',
-                'default': {},
-                'description': 'Regex allowlist mapped to enabled methods',
+            "SAVE_ALLOWLIST": {
+                "plugin": "base",
+                "type": "object",
+                "default": {},
+                "description": "Regex allowlist mapped to enabled methods",
            },
-            'WGET_BINARY': {
-                'plugin': 'wget',
-                'type': 'string',
-                'default': 'wget',
-                'description': 'Path to wget binary',
+            "WGET_BINARY": {
+                "plugin": "wget",
+                "type": "string",
+                "default": "wget",
+                "description": "Path to wget binary",
            },
        },
    )

-    html = str(KeyValueWidget().render('config', {}, attrs={'id': 'id_config'}))
+    html = str(KeyValueWidget().render("config", {}, attrs={"id": "id_config"}))

    assert 'Example: ["--extra-arg"]' in html
    assert 'Example: {"^https://example\\\\.com": ["wget"]}' in html
-    assert 'Example: wget or /usr/bin/wget' in html
-    assert 'validateBinaryValue_id_config' in html
+    assert "Example: wget or /usr/bin/wget" in html
+    assert "validateBinaryValue_id_config" in html
    assert "meta.key.endsWith('_BINARY')" in html
    assert "Binary paths cannot contain quotes" in html

@@ -127,25 +127,25 @@ def test_key_value_widget_shows_array_and_object_examples_and_binary_rules(monke
 def test_key_value_widget_falls_back_to_binary_validation_for_unknown_binary_keys(monkeypatch):
    monkeypatch.setattr(
        KeyValueWidget,
-        '_get_config_options',
+        "_get_config_options",
        lambda self: {
-            'CHROME_BINARY': {
-                'plugin': 'base',
-                'type': 'string',
-                'default': '',
-                'description': 'Resolved Chromium/Chrome binary path shared across plugins',
+            "CHROME_BINARY": {
+                "plugin": "base",
+                "type": "string",
+                "default": "",
+                "description": "Resolved Chromium/Chrome binary path shared across plugins",
            },
        },
    )

    html = str(
        KeyValueWidget().render(
-            'config',
-            {'NODE_BINARY': '/opt/homebrew/bin/node'},
-            attrs={'id': 'id_config'},
-        )
+            "config",
+            {"NODE_BINARY": "/opt/homebrew/bin/node"},
+            attrs={"id": "id_config"},
+        ),
    )

-    assert 'function getMetaForKey_id_config' in html
+    assert "function getMetaForKey_id_config" in html
    assert "if (key.endsWith('_BINARY'))" in html
-    assert 'Path to binary executable' in html
+    assert "Path to binary executable" in html
--- a/archivebox/tests/test_admin_links.py
+++ b/archivebox/tests/test_admin_links.py
@@ -1,5 +1,8 @@
 import pytest
 from django.contrib.admin.sites import AdminSite
+from django.test import RequestFactory
+from django.urls import reverse
+import html
 from uuid import uuid4


@@ -26,18 +29,18 @@ def _create_machine():
    from archivebox.machine.models import Machine

    return Machine.objects.create(
-        guid=f'test-guid-{uuid4()}',
-        hostname='test-host',
+        guid=f"test-guid-{uuid4()}",
+        hostname="test-host",
        hw_in_docker=False,
        hw_in_vm=False,
-        hw_manufacturer='Test',
-        hw_product='Test Product',
-        hw_uuid=f'test-hw-{uuid4()}',
-        os_arch='arm64',
-        os_family='darwin',
-        os_platform='macOS',
-        os_release='14.0',
-        os_kernel='Darwin',
+        hw_manufacturer="Test",
+        hw_product="Test Product",
+        hw_uuid=f"test-hw-{uuid4()}",
+        os_arch="arm64",
+        os_family="darwin",
+        os_platform="macOS",
+        os_release="14.0",
+        os_kernel="Darwin",
        stats={},
        config={},
    )
@@ -48,16 +51,16 @@ def _create_iface(machine):

    return NetworkInterface.objects.create(
        machine=machine,
-        mac_address='00:11:22:33:44:66',
-        ip_public='203.0.113.11',
-        ip_local='10.0.0.11',
-        dns_server='1.1.1.1',
-        hostname='test-host',
-        iface='en0',
-        isp='Test ISP',
-        city='Test City',
-        region='Test Region',
-        country='Test Country',
+        mac_address="00:11:22:33:44:66",
+        ip_public="203.0.113.11",
+        ip_local="10.0.0.11",
+        dns_server="1.1.1.1",
+        hostname="test-host",
+        iface="en0",
+        isp="Test ISP",
+        city="Test City",
+        region="Test Region",
+        country="Test Country",
    )


@@ -72,14 +75,14 @@ def test_archiveresult_admin_links_plugin_and_process():
        machine=iface.machine,
        iface=iface,
        process_type=Process.TypeChoices.HOOK,
-        pwd=str(snapshot.output_dir / 'wget'),
-        cmd=['/tmp/on_Snapshot__06_wget.finite.bg.py', '--url=https://example.com'],
+        pwd=str(snapshot.output_dir / "wget"),
+        cmd=["/tmp/on_Snapshot__06_wget.finite.bg.py", "--url=https://example.com"],
        status=Process.StatusChoices.EXITED,
    )
    result = ArchiveResult.objects.create(
        snapshot=snapshot,
-        plugin='wget',
-        hook_name='on_Snapshot__06_wget.finite.bg.py',
+        plugin="wget",
+        hook_name="on_Snapshot__06_wget.finite.bg.py",
        process=process,
        status=ArchiveResult.StatusChoices.SUCCEEDED,
    )
@@ -89,8 +92,85 @@ def test_archiveresult_admin_links_plugin_and_process():
    plugin_html = str(admin.plugin_with_icon(result))
    process_html = str(admin.process_link(result))

-    assert '/admin/environment/plugins/builtin.wget/' in plugin_html
-    assert f'/admin/machine/process/{process.id}/change' in process_html
+    assert "/admin/environment/plugins/builtin.wget/" in plugin_html
+    assert f"/admin/machine/process/{process.id}/change" in process_html
+
+
+def test_snapshot_admin_zip_links():
+    from archivebox.core.admin_snapshots import SnapshotAdmin
+    from archivebox.core.models import Snapshot
+
+    snapshot = _create_snapshot()
+    admin = SnapshotAdmin(Snapshot, AdminSite())
+
+    zip_url = admin.get_snapshot_zip_url(snapshot)
+
+    assert html.escape(zip_url, quote=True) not in str(admin.files(snapshot))
+    assert html.escape(zip_url, quote=True) in str(admin.size_with_stats(snapshot))
+    assert html.escape(zip_url, quote=True) in str(admin.admin_actions(snapshot))
+
+
+def test_archiveresult_admin_zip_links():
+    from archivebox.core.admin_archiveresults import ArchiveResultAdmin
+    from archivebox.core.models import ArchiveResult
+
+    snapshot = _create_snapshot()
+    result = ArchiveResult.objects.create(
+        snapshot=snapshot,
+        plugin="wget",
+        hook_name="on_Snapshot__06_wget.finite.bg.py",
+        status=ArchiveResult.StatusChoices.SUCCEEDED,
+        output_str="Saved output",
+    )
+
+    admin = ArchiveResultAdmin(ArchiveResult, AdminSite())
+    zip_url = admin.get_output_zip_url(result)
+
+    assert html.escape(zip_url, quote=True) in str(admin.zip_link(result))
+    assert html.escape(zip_url, quote=True) in str(admin.admin_actions(result))
+
+
+def test_archiveresult_admin_copy_command_redacts_sensitive_env_keys():
+    from archivebox.core.admin_archiveresults import ArchiveResultAdmin
+    from archivebox.core.models import ArchiveResult
+    from archivebox.machine.models import Process
+
+    snapshot = _create_snapshot()
+    iface = _create_iface(_create_machine())
+    process = Process.objects.create(
+        machine=iface.machine,
+        iface=iface,
+        process_type=Process.TypeChoices.HOOK,
+        pwd=str(snapshot.output_dir / "wget"),
+        cmd=["/tmp/on_Snapshot__06_wget.finite.bg.py", "--url=https://example.com"],
+        env={
+            "SOURCE_URL": "https://example.com",
+            "SAFE_FLAG": "1",
+            "API_KEY": "super-secret-key",
+            "ACCESS_TOKEN": "super-secret-token",
+            "SHARED_SECRET": "super-secret-secret",
+        },
+        status=Process.StatusChoices.EXITED,
+    )
+    result = ArchiveResult.objects.create(
+        snapshot=snapshot,
+        plugin="wget",
+        hook_name="on_Snapshot__06_wget.finite.bg.py",
+        process=process,
+        status=ArchiveResult.StatusChoices.SUCCEEDED,
+    )
+
+    admin = ArchiveResultAdmin(ArchiveResult, AdminSite())
+    cmd_html = str(admin.cmd_str(result))
+
+    assert "SAFE_FLAG=1" in cmd_html
+    assert "SOURCE_URL=https://example.com" in cmd_html
+    assert "API_KEY" not in cmd_html
+    assert "ACCESS_TOKEN" not in cmd_html
+    assert "SHARED_SECRET" not in cmd_html
+    assert "super-secret-key" not in cmd_html
+    assert "super-secret-token" not in cmd_html
+    assert "super-secret-secret" not in cmd_html


 def test_process_admin_links_binary_and_iface():
@@ -101,11 +181,11 @@ def test_process_admin_links_binary_and_iface():
    iface = _create_iface(machine)
    binary = Binary.objects.create(
        machine=machine,
-        name='wget',
-        abspath='/usr/local/bin/wget',
-        version='1.21.2',
-        binprovider='env',
-        binproviders='env',
+        name="wget",
+        abspath="/usr/local/bin/wget",
+        version="1.21.2",
+        binprovider="env",
+        binproviders="env",
        status=Binary.StatusChoices.INSTALLED,
    )
    process = Process.objects.create(
@@ -113,8 +193,8 @@ def test_process_admin_links_binary_and_iface():
        iface=iface,
        binary=binary,
        process_type=Process.TypeChoices.HOOK,
-        pwd='/tmp/wget',
-        cmd=['/tmp/on_Snapshot__06_wget.finite.bg.py', '--url=https://example.com'],
+        pwd="/tmp/wget",
+        cmd=["/tmp/on_Snapshot__06_wget.finite.bg.py", "--url=https://example.com"],
        status=Process.StatusChoices.EXITED,
    )

@@ -123,5 +203,107 @@ def test_process_admin_links_binary_and_iface():
    binary_html = str(admin.binary_link(process))
    iface_html = str(admin.iface_link(process))

-    assert f'/admin/machine/binary/{binary.id}/change' in binary_html
-    assert f'/admin/machine/networkinterface/{iface.id}/change' in iface_html
+    assert f"/admin/machine/binary/{binary.id}/change" in binary_html
+    assert f"/admin/machine/networkinterface/{iface.id}/change" in iface_html
+
+
+def test_process_admin_kill_actions_only_terminate_running_processes(monkeypatch):
+    from archivebox.machine.admin import ProcessAdmin
+    from archivebox.machine.models import Process
+
+    machine = _create_machine()
+    running = Process.objects.create(
+        machine=machine,
+        process_type=Process.TypeChoices.HOOK,
+        pwd="/tmp/running",
+        cmd=["/tmp/on_Snapshot__06_wget.finite.bg.py", "--url=https://example.com"],
+        status=Process.StatusChoices.RUNNING,
+    )
+    exited = Process.objects.create(
+        machine=machine,
+        process_type=Process.TypeChoices.HOOK,
+        pwd="/tmp/exited",
+        cmd=["/tmp/on_Snapshot__06_wget.finite.bg.py", "--url=https://example.com"],
+        status=Process.StatusChoices.EXITED,
+    )
+
+    admin = ProcessAdmin(Process, AdminSite())
+    request = RequestFactory().post("/admin/machine/process/")
+
+    terminated = []
+    flashed = []
+
+    monkeypatch.setattr(Process, "is_running", property(lambda self: self.pk == running.pk), raising=False)
+    monkeypatch.setattr(Process, "terminate", lambda self, graceful_timeout=5.0: terminated.append(self.pk) or True)
+    monkeypatch.setattr(admin, "message_user", lambda req, msg, level=None: flashed.append((msg, level)))
+
+    admin.kill_processes(request, Process.objects.filter(pk__in=[running.pk, exited.pk]).order_by("created_at"))
+
+    assert terminated == [running.pk]
+    assert any("Killed 1 running process" in msg for msg, _level in flashed)
+    assert any("Skipped 1 process" in msg for msg, _level in flashed)
+
+
+def test_process_admin_object_kill_action_redirects_and_skips_exited(monkeypatch):
+    from archivebox.machine.admin import ProcessAdmin
+    from archivebox.machine.models import Process
+
+    machine = _create_machine()
+    process = Process.objects.create(
+        machine=machine,
+        process_type=Process.TypeChoices.HOOK,
+        pwd="/tmp/exited",
+        cmd=["/tmp/on_Snapshot__06_wget.finite.bg.py", "--url=https://example.com"],
+        status=Process.StatusChoices.EXITED,
+    )
+
+    admin = ProcessAdmin(Process, AdminSite())
+    request = RequestFactory().post(f"/admin/machine/process/{process.pk}/change/")
+
+    terminated = []
+    flashed = []
+
+    monkeypatch.setattr(Process, "is_running", property(lambda self: False), raising=False)
+    monkeypatch.setattr(Process, "terminate", lambda self, graceful_timeout=5.0: terminated.append(self.pk) or True)
+    monkeypatch.setattr(admin, "message_user", lambda req, msg, level=None: flashed.append((msg, level)))
+
+    response = admin.kill_process(request, process)
+
+    assert response.status_code == 302
+    assert response.url == reverse("admin:machine_process_change", args=[process.pk])
+    assert terminated == []
+    assert any("Skipped 1 process" in msg for msg, _level in flashed)
+
+
+def test_process_admin_output_summary_uses_archiveresult_output_files():
+    from archivebox.core.models import ArchiveResult
+    from archivebox.machine.admin import ProcessAdmin
+    from archivebox.machine.models import Process
+
+    snapshot = _create_snapshot()
+    machine = _create_machine()
+    process = Process.objects.create(
+        machine=machine,
+        process_type=Process.TypeChoices.HOOK,
+        pwd=str(snapshot.output_dir / "wget"),
+        cmd=["/tmp/on_Snapshot__06_wget.finite.bg.py", "--url=https://example.com"],
+        status=Process.StatusChoices.EXITED,
+    )
+    ArchiveResult.objects.create(
+        snapshot=snapshot,
+        plugin="wget",
+        hook_name="on_Snapshot__06_wget.finite.bg.py",
+        process=process,
+        status=ArchiveResult.StatusChoices.SUCCEEDED,
+        output_files={
+            "index.html": {"extension": "html", "mimetype": "text/html", "size": 1024},
+            "title.txt": {"extension": "txt", "mimetype": "text/plain", "size": "512"},
+        },
+    )
+
+    admin = ProcessAdmin(Process, AdminSite())
+
+    output_html = str(admin.output_summary(process))
+
+    assert "2 files" in output_html
+    assert "1.5 KB" in output_html
--- a/archivebox/tests/test_admin_views.py
+++ b/archivebox/tests/test_admin_views.py
--- a/archivebox/tests/test_api_cli_schedule.py
+++ b/archivebox/tests/test_api_cli_schedule.py
@@ -12,25 +12,25 @@ User = get_user_model()
 class CLIScheduleAPITests(TestCase):
    def setUp(self):
        self.user = User.objects.create_user(
-            username='api-user',
-            password='testpass123',
-            email='api@example.com',
+            username="api-user",
+            password="testpass123",
+            email="api@example.com",
        )

    def test_schedule_api_creates_schedule(self):
-        request = RequestFactory().post('/api/v1/cli/schedule')
+        request = RequestFactory().post("/api/v1/cli/schedule")
        request.user = self.user
-        setattr(request, 'stdout', StringIO())
-        setattr(request, 'stderr', StringIO())
+        setattr(request, "stdout", StringIO())
+        setattr(request, "stderr", StringIO())
        args = ScheduleCommandSchema(
-            every='daily',
-            import_path='https://example.com/feed.xml',
+            every="daily",
+            import_path="https://example.com/feed.xml",
            quiet=True,
        )

        response = cli_schedule(request, args)

-        self.assertTrue(response['success'])
-        self.assertEqual(response['result_format'], 'json')
+        self.assertTrue(response["success"])
+        self.assertEqual(response["result_format"], "json")
        self.assertEqual(CrawlSchedule.objects.count(), 1)
-        self.assertEqual(len(response['result']['created_schedule_ids']), 1)
+        self.assertEqual(len(response["result"]["created_schedule_ids"]), 1)
--- a/archivebox/tests/test_archive_result_service.py
+++ b/archivebox/tests/test_archive_result_service.py
@@ -4,8 +4,10 @@ from uuid import uuid4
 import pytest
 from django.db import connection

-from abx_dl.events import ProcessCompletedEvent, ProcessStartedEvent
+
+from abx_dl.events import BinaryEvent, ProcessCompletedEvent, ProcessStartedEvent
 from abx_dl.orchestrator import create_bus
+from abx_dl.output_files import OutputFile


 pytestmark = pytest.mark.django_db
@@ -36,18 +38,18 @@ def _create_machine():
    from archivebox.machine.models import Machine

    return Machine.objects.create(
-        guid=f'test-guid-{uuid4()}',
-        hostname='test-host',
+        guid=f"test-guid-{uuid4()}",
+        hostname="test-host",
        hw_in_docker=False,
        hw_in_vm=False,
-        hw_manufacturer='Test',
-        hw_product='Test Product',
-        hw_uuid=f'test-hw-{uuid4()}',
-        os_arch='arm64',
-        os_family='darwin',
-        os_platform='macOS',
-        os_release='14.0',
-        os_kernel='Darwin',
+        hw_manufacturer="Test",
+        hw_product="Test Product",
+        hw_uuid=f"test-hw-{uuid4()}",
+        os_arch="arm64",
+        os_family="darwin",
+        os_platform="macOS",
+        os_release="14.0",
+        os_kernel="Darwin",
        stats={},
        config={},
    )
@@ -58,16 +60,16 @@ def _create_iface(machine):

    return NetworkInterface.objects.create(
        machine=machine,
-        mac_address='00:11:22:33:44:55',
-        ip_public='203.0.113.10',
-        ip_local='10.0.0.10',
-        dns_server='1.1.1.1',
-        hostname='test-host',
-        iface='en0',
-        isp='Test ISP',
-        city='Test City',
-        region='Test Region',
-        country='Test Country',
+        mac_address="00:11:22:33:44:55",
+        ip_public="203.0.113.10",
+        ip_local="10.0.0.10",
+        dns_server="1.1.1.1",
+        hostname="test-host",
+        iface="en0",
+        isp="Test ISP",
+        city="Test City",
+        region="Test Region",
+        country="Test Country",
    )


@@ -92,7 +94,7 @@ def test_process_completed_projects_inline_archiveresult():
        stderr="",
        exit_code=0,
        output_dir=str(plugin_dir),
-        output_files=["index.html"],
+        output_files=[OutputFile(path="index.html", extension="html", mimetype="text/html", size=15)],
        process_id="proc-inline",
        snapshot_id=str(snapshot.id),
        start_ts="2026-03-22T12:00:00+00:00",
@@ -118,6 +120,8 @@ def test_process_completed_projects_inline_archiveresult():
    assert result.status == ArchiveResult.StatusChoices.SUCCEEDED
    assert result.output_str == "wget/index.html"
    assert "index.html" in result.output_files
+    assert result.output_files["index.html"] == {"extension": "html", "mimetype": "text/html", "size": 15}
+    assert result.output_size == 15
    _cleanup_machine_process_rows()


@@ -215,24 +219,212 @@ def test_process_completed_projects_noresults_archiveresult():
    result = ArchiveResult.objects.get(snapshot=snapshot, plugin="title", hook_name="on_Snapshot__54_title.js")
    assert result.status == ArchiveResult.StatusChoices.NORESULTS
    assert result.output_str == "No title found"
+
+
+def test_retry_failed_archiveresults_requeues_snapshot_in_queued_state():
+    from archivebox.core.models import ArchiveResult, Snapshot
+
+    snapshot = _create_snapshot()
+    ArchiveResult.objects.create(
+        snapshot=snapshot,
+        plugin="chrome",
+        hook_name="on_Snapshot__11_chrome_wait",
+        status=ArchiveResult.StatusChoices.FAILED,
+        output_str="timed out",
+        output_files={"stderr.log": {}},
+        output_size=123,
+        output_mimetypes="text/plain",
+    )
+
+    reset_count = snapshot.retry_failed_archiveresults()
+
+    snapshot.refresh_from_db()
+    result = ArchiveResult.objects.get(snapshot=snapshot, plugin="chrome", hook_name="on_Snapshot__11_chrome_wait")
+    assert reset_count == 1
+    assert snapshot.status == Snapshot.StatusChoices.QUEUED
+    assert snapshot.retry_at is not None
+    assert snapshot.current_step == 0
+    assert result.status == ArchiveResult.StatusChoices.QUEUED
+    assert result.output_str == ""
+    assert result.output_json is None
+    assert result.output_files == {}
+    assert result.output_size == 0
+    assert result.output_mimetypes == ""
+    assert result.start_ts is None
+    assert result.end_ts is None
+    snapshot.refresh_from_db()
+    assert snapshot.title in (None, "")
    _cleanup_machine_process_rows()


+def test_process_completed_projects_snapshot_title_from_output_str():
+    from archivebox.services.archive_result_service import ArchiveResultService, _collect_output_metadata
+    from archivebox.services.process_service import ProcessService
+
+    snapshot = _create_snapshot()
+    plugin_dir = Path(snapshot.output_dir) / "title"
+    plugin_dir.mkdir(parents=True, exist_ok=True)
+
+    bus = create_bus(name="test_snapshot_title_output_str")
+    process_service = ProcessService(bus)
+    service = ArchiveResultService(bus, process_service=process_service)
+
+    event = ProcessCompletedEvent(
+        plugin_name="title",
+        hook_name="on_Snapshot__54_title.js",
+        stdout='{"snapshot_id":"%s","type":"ArchiveResult","status":"succeeded","output_str":"Example Domain"}\n' % snapshot.id,
+        stderr="",
+        exit_code=0,
+        output_dir=str(plugin_dir),
+        output_files=[],
+        process_id="proc-title-output-str",
+        snapshot_id=str(snapshot.id),
+        start_ts="2026-03-22T12:00:00+00:00",
+        end_ts="2026-03-22T12:00:01+00:00",
+    )
+
+    output_files, output_size, output_mimetypes = _collect_output_metadata(plugin_dir)
+    service._project_from_process_completed(
+        event,
+        {
+            "snapshot_id": str(snapshot.id),
+            "plugin": "title",
+            "hook_name": "on_Snapshot__54_title.js",
+            "status": "succeeded",
+            "output_str": "Example Domain",
+        },
+        output_files,
+        output_size,
+        output_mimetypes,
+    )
+
+    snapshot.refresh_from_db()
+    assert snapshot.title == "Example Domain"
+    _cleanup_machine_process_rows()
+
+
+def test_process_completed_projects_snapshot_title_from_title_file():
+    from archivebox.services.archive_result_service import ArchiveResultService, _collect_output_metadata
+    from archivebox.services.process_service import ProcessService
+
+    snapshot = _create_snapshot()
+    plugin_dir = Path(snapshot.output_dir) / "title"
+    plugin_dir.mkdir(parents=True, exist_ok=True)
+    (plugin_dir / "title.txt").write_text("Example Domain")
+
+    bus = create_bus(name="test_snapshot_title_file")
+    process_service = ProcessService(bus)
+    service = ArchiveResultService(bus, process_service=process_service)
+
+    event = ProcessCompletedEvent(
+        plugin_name="title",
+        hook_name="on_Snapshot__54_title.js",
+        stdout='{"snapshot_id":"%s","type":"ArchiveResult","status":"noresults","output_str":"No title found"}\n' % snapshot.id,
+        stderr="",
+        exit_code=0,
+        output_dir=str(plugin_dir),
+        output_files=[OutputFile(path="title.txt", extension="txt", mimetype="text/plain", size=14)],
+        process_id="proc-title-file",
+        snapshot_id=str(snapshot.id),
+        start_ts="2026-03-22T12:00:00+00:00",
+        end_ts="2026-03-22T12:00:01+00:00",
+    )
+
+    output_files, output_size, output_mimetypes = _collect_output_metadata(plugin_dir)
+    service._project_from_process_completed(
+        event,
+        {
+            "snapshot_id": str(snapshot.id),
+            "plugin": "title",
+            "hook_name": "on_Snapshot__54_title.js",
+            "status": "noresults",
+            "output_str": "No title found",
+        },
+        output_files,
+        output_size,
+        output_mimetypes,
+    )
+
+    snapshot.refresh_from_db()
+    assert snapshot.title == "Example Domain"
+    _cleanup_machine_process_rows()
+
+
+def test_snapshot_resolved_title_falls_back_to_title_file_without_db_title():
+    from archivebox.core.models import ArchiveResult
+
+    snapshot = _create_snapshot()
+    plugin_dir = Path(snapshot.output_dir) / "title"
+    plugin_dir.mkdir(parents=True, exist_ok=True)
+    (plugin_dir / "title.txt").write_text("Example Domain")
+    ArchiveResult.objects.create(
+        snapshot=snapshot,
+        plugin="title",
+        hook_name="on_Snapshot__54_title.js",
+        status="noresults",
+        output_str="No title found",
+        output_files={"title.txt": {}},
+    )
+
+    snapshot.refresh_from_db()
+    assert snapshot.title in (None, "")
+    assert snapshot.resolved_title == "Example Domain"
+    _cleanup_machine_process_rows()
+
+
+def test_collect_output_metadata_preserves_file_metadata():
+    from archivebox.services.archive_result_service import _resolve_output_metadata
+
+    output_files, output_size, output_mimetypes = _resolve_output_metadata(
+        [OutputFile(path="index.html", extension="html", mimetype="text/html", size=42)],
+        Path("/tmp/does-not-need-to-exist"),
+    )
+
+    assert output_files == {
+        "index.html": {
+            "extension": "html",
+            "mimetype": "text/html",
+            "size": 42,
+        },
+    }
+    assert output_size == 42
+    assert output_mimetypes == "text/html"
+
+
+def test_collect_output_metadata_detects_warc_gz_mimetype(tmp_path):
+    from archivebox.services.archive_result_service import _collect_output_metadata
+
+    plugin_dir = tmp_path / "wget"
+    warc_file = plugin_dir / "warc" / "capture.warc.gz"
+    warc_file.parent.mkdir(parents=True, exist_ok=True)
+    warc_file.write_bytes(b"warc-bytes")
+
+    output_files, output_size, output_mimetypes = _collect_output_metadata(plugin_dir)
+
+    assert output_files["warc/capture.warc.gz"] == {
+        "extension": "gz",
+        "mimetype": "application/warc",
+        "size": 10,
+    }
+    assert output_size == 10
+    assert output_mimetypes == "application/warc"
+
+
 def test_process_started_hydrates_binary_and_iface_from_existing_binary_records(monkeypatch):
    from archivebox.machine.models import Binary, NetworkInterface
    from archivebox.services.process_service import ProcessService

    machine = _create_machine()
    iface = _create_iface(machine)
-    monkeypatch.setattr(NetworkInterface, 'current', classmethod(lambda cls, refresh=False: iface))
+    monkeypatch.setattr(NetworkInterface, "current", classmethod(lambda cls, refresh=False: iface))

    binary = Binary.objects.create(
        machine=machine,
-        name='postlight-parser',
-        abspath='/tmp/postlight-parser',
-        version='2.2.3',
-        binprovider='npm',
-        binproviders='npm',
+        name="postlight-parser",
+        abspath="/tmp/postlight-parser",
+        version="2.2.3",
+        binprovider="npm",
+        binproviders="npm",
        status=Binary.StatusChoices.INSTALLED,
    )

@@ -268,15 +460,15 @@ def test_process_started_uses_node_binary_for_js_hooks_without_plugin_binary(mon

    machine = _create_machine()
    iface = _create_iface(machine)
-    monkeypatch.setattr(NetworkInterface, 'current', classmethod(lambda cls, refresh=False: iface))
+    monkeypatch.setattr(NetworkInterface, "current", classmethod(lambda cls, refresh=False: iface))

    node = Binary.objects.create(
        machine=machine,
-        name='node',
-        abspath='/tmp/node',
-        version='22.0.0',
-        binprovider='env',
-        binproviders='env',
+        name="node",
+        abspath="/tmp/node",
+        version="22.0.0",
+        binprovider="env",
+        binproviders="env",
        status=Binary.StatusChoices.INSTALLED,
    )

@@ -303,3 +495,40 @@ def test_process_started_uses_node_binary_for_js_hooks_without_plugin_binary(mon
    process = service._get_or_create_process(event)
    assert process.binary_id == node.id
    assert process.iface_id == iface.id
+
+
+def test_binary_event_reuses_existing_installed_binary_row(monkeypatch):
+    from archivebox.machine.models import Binary, Machine
+    from archivebox.services.binary_service import BinaryService as ArchiveBoxBinaryService
+
+    machine = _create_machine()
+    monkeypatch.setattr(Machine, "current", classmethod(lambda cls: machine))
+
+    binary = Binary.objects.create(
+        machine=machine,
+        name="wget",
+        abspath="/bin/sh",
+        version="9.9.9",
+        binprovider="env",
+        binproviders="env,apt,brew",
+        status=Binary.StatusChoices.INSTALLED,
+    )
+
+    service = ArchiveBoxBinaryService(create_bus(name="test_binary_event_reuses_existing_installed_binary_row"))
+    event = BinaryEvent(
+        name="wget",
+        plugin_name="wget",
+        hook_name="on_Crawl__10_wget_install.finite.bg",
+        output_dir="/tmp/wget",
+        binproviders="provider",
+    )
+
+    service._project_binary(event)
+
+    binary.refresh_from_db()
+    assert Binary.objects.filter(machine=machine, name="wget").count() == 1
+    assert binary.status == Binary.StatusChoices.INSTALLED
+    assert binary.abspath == "/bin/sh"
+    assert binary.version == "9.9.9"
+    assert binary.binprovider == "env"
+    assert binary.binproviders == "provider"
--- a/archivebox/tests/test_auth_ldap.py
+++ b/archivebox/tests/test_auth_ldap.py
@@ -78,8 +78,8 @@ class TestLDAPConfig(unittest.TestCase):
        from archivebox.config import get_CONFIG

        all_config = get_CONFIG()
-        self.assertIn('LDAP_CONFIG', all_config)
-        self.assertEqual(all_config['LDAP_CONFIG'].__class__.__name__, 'LDAPConfig')
+        self.assertIn("LDAP_CONFIG", all_config)
+        self.assertEqual(all_config["LDAP_CONFIG"].__class__.__name__, "LDAPConfig")


 class TestLDAPIntegration(unittest.TestCase):
@@ -95,7 +95,7 @@ class TestLDAPIntegration(unittest.TestCase):
        self.assertIn("django.contrib.auth.backends.ModelBackend", settings.AUTHENTICATION_BACKENDS)

        # LDAP backend should not be present when disabled
-        ldap_backends = [b for b in settings.AUTHENTICATION_BACKENDS if 'ldap' in b.lower()]
+        ldap_backends = [b for b in settings.AUTHENTICATION_BACKENDS if "ldap" in b.lower()]
        self.assertEqual(len(ldap_backends), 0, "LDAP backend should not be present when LDAP_ENABLED=False")

    def test_django_settings_with_ldap_library_check(self):
@@ -106,7 +106,8 @@ class TestLDAPIntegration(unittest.TestCase):
        if not ldap_available:
            # Settings should have loaded without LDAP backend
            from django.conf import settings
-            ldap_backends = [b for b in settings.AUTHENTICATION_BACKENDS if 'ldap' in b.lower()]
+
+            ldap_backends = [b for b in settings.AUTHENTICATION_BACKENDS if "ldap" in b.lower()]
            self.assertEqual(len(ldap_backends), 0, "LDAP backend should not be present when libraries unavailable")


@@ -117,14 +118,14 @@ class TestLDAPAuthBackend(unittest.TestCase):
        """Test that ArchiveBoxLDAPBackend class is defined."""
        from archivebox.ldap.auth import ArchiveBoxLDAPBackend

-        self.assertTrue(hasattr(ArchiveBoxLDAPBackend, 'authenticate_ldap_user'))
+        self.assertTrue(hasattr(ArchiveBoxLDAPBackend, "authenticate_ldap_user"))

    def test_ldap_backend_inherits_correctly(self):
        """Test that ArchiveBoxLDAPBackend has correct inheritance."""
        from archivebox.ldap.auth import ArchiveBoxLDAPBackend

        # Should have authenticate_ldap_user method (from base or overridden)
-        self.assertTrue(callable(getattr(ArchiveBoxLDAPBackend, 'authenticate_ldap_user', None)))
+        self.assertTrue(callable(getattr(ArchiveBoxLDAPBackend, "authenticate_ldap_user", None)))


 class TestArchiveBoxWithLDAP(unittest.TestCase):
@@ -132,7 +133,7 @@ class TestArchiveBoxWithLDAP(unittest.TestCase):

    def setUp(self):
        """Set up test environment."""
-        self.work_dir = tempfile.mkdtemp(prefix='archivebox-ldap-test-')
+        self.work_dir = tempfile.mkdtemp(prefix="archivebox-ldap-test-")

    def test_archivebox_init_without_ldap(self):
        """Test that archivebox init works without LDAP enabled."""
@@ -140,15 +141,15 @@ class TestArchiveBoxWithLDAP(unittest.TestCase):

        # Run archivebox init
        result = subprocess.run(
-            [sys.executable, '-m', 'archivebox', 'init'],
+            [sys.executable, "-m", "archivebox", "init"],
            cwd=self.work_dir,
            capture_output=True,
            timeout=45,
            env={
                **os.environ,
-                'DATA_DIR': self.work_dir,
-                'LDAP_ENABLED': 'False',
-            }
+                "DATA_DIR": self.work_dir,
+                "LDAP_ENABLED": "False",
+            },
        )

        # Should succeed
@@ -160,16 +161,16 @@ class TestArchiveBoxWithLDAP(unittest.TestCase):

        # Run archivebox version with LDAP config env vars
        result = subprocess.run(
-            [sys.executable, '-m', 'archivebox', 'version'],
+            [sys.executable, "-m", "archivebox", "version"],
            cwd=self.work_dir,
            capture_output=True,
            timeout=10,
            env={
                **os.environ,
-                'DATA_DIR': self.work_dir,
-                'LDAP_ENABLED': 'False',
-                'LDAP_SERVER_URI': 'ldap://ldap-test.localhost:389',
-            }
+                "DATA_DIR": self.work_dir,
+                "LDAP_ENABLED": "False",
+                "LDAP_SERVER_URI": "ldap://ldap-test.localhost:389",
+            },
        )

        # Should succeed
@@ -181,7 +182,7 @@ class TestLDAPConfigValidationInArchiveBox(unittest.TestCase):

    def setUp(self):
        """Set up test environment."""
-        self.work_dir = tempfile.mkdtemp(prefix='archivebox-ldap-validation-')
+        self.work_dir = tempfile.mkdtemp(prefix="archivebox-ldap-validation-")

    def test_archivebox_init_with_incomplete_ldap_config(self):
        """Test that archivebox init fails with helpful error when LDAP config is incomplete."""
@@ -189,16 +190,16 @@ class TestLDAPConfigValidationInArchiveBox(unittest.TestCase):

        # Run archivebox init with LDAP enabled but missing required fields
        result = subprocess.run(
-            [sys.executable, '-m', 'archivebox', 'init'],
+            [sys.executable, "-m", "archivebox", "init"],
            cwd=self.work_dir,
            capture_output=True,
            timeout=45,
            env={
                **os.environ,
-                'DATA_DIR': self.work_dir,
-                'LDAP_ENABLED': 'True',
+                "DATA_DIR": self.work_dir,
+                "LDAP_ENABLED": "True",
                # Missing: LDAP_SERVER_URI, LDAP_BIND_DN, etc.
-            }
+            },
        )

        # Should fail with validation error
@@ -206,9 +207,12 @@ class TestLDAPConfigValidationInArchiveBox(unittest.TestCase):

        # Check error message
        stderr = result.stderr.decode()
-        self.assertIn("LDAP_* config options must all be set", stderr,
-                     f"Expected validation error message in: {stderr}")
+        self.assertIn(
+            "LDAP_* config options must all be set",
+            stderr,
+            f"Expected validation error message in: {stderr}",
+        )


-if __name__ == '__main__':
+if __name__ == "__main__":
    unittest.main()
--- a/archivebox/tests/test_cli_add.py
+++ b/archivebox/tests/test_cli_add.py
@@ -14,8 +14,8 @@ def _find_snapshot_dir(data_dir: Path, snapshot_id: str) -> Path | None:
    candidates = {snapshot_id}
    if len(snapshot_id) == 32:
        candidates.add(f"{snapshot_id[:8]}-{snapshot_id[8:12]}-{snapshot_id[12:16]}-{snapshot_id[16:20]}-{snapshot_id[20:]}")
-    elif len(snapshot_id) == 36 and '-' in snapshot_id:
-        candidates.add(snapshot_id.replace('-', ''))
+    elif len(snapshot_id) == 36 and "-" in snapshot_id:
+        candidates.add(snapshot_id.replace("-", ""))

    for needle in candidates:
        for path in data_dir.rglob(needle):
@@ -28,7 +28,7 @@ def test_add_single_url_creates_snapshot_in_db(tmp_path, process, disable_extrac
    """Test that adding a single URL creates a snapshot in the database."""
    os.chdir(tmp_path)
    result = subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )
@@ -41,14 +41,14 @@ def test_add_single_url_creates_snapshot_in_db(tmp_path, process, disable_extrac
    conn.close()

    assert len(snapshots) == 1
-    assert snapshots[0][0] == 'https://example.com'
+    assert snapshots[0][0] == "https://example.com"


 def test_add_bg_creates_root_snapshot_rows_immediately(tmp_path, process, disable_extractors_dict):
    """Background add should create root snapshots immediately so the queue is visible in the DB."""
    os.chdir(tmp_path)
    result = subprocess.run(
-        ['archivebox', 'add', '--bg', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--bg", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )
@@ -61,15 +61,15 @@ def test_add_bg_creates_root_snapshot_rows_immediately(tmp_path, process, disabl
    conn.close()

    assert len(snapshots) == 1
-    assert snapshots[0][0] == 'https://example.com'
-    assert snapshots[0][1] == 'queued'
+    assert snapshots[0][0] == "https://example.com"
+    assert snapshots[0][1] == "queued"


 def test_add_creates_crawl_record(tmp_path, process, disable_extractors_dict):
    """Test that add command creates a Crawl record in the database."""
    os.chdir(tmp_path)
    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )
@@ -86,7 +86,7 @@ def test_add_creates_source_file(tmp_path, process, disable_extractors_dict):
    """Test that add creates a source file with the URL."""
    os.chdir(tmp_path)
    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )
@@ -105,7 +105,7 @@ def test_add_multiple_urls_single_command(tmp_path, process, disable_extractors_
    """Test adding multiple URLs in a single command."""
    os.chdir(tmp_path)
    result = subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com', 'https://example.org'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com", "https://example.org"],
        capture_output=True,
        env=disable_extractors_dict,
    )
@@ -119,8 +119,8 @@ def test_add_multiple_urls_single_command(tmp_path, process, disable_extractors_
    conn.close()

    assert snapshot_count == 2
-    assert urls[0][0] == 'https://example.com'
-    assert urls[1][0] == 'https://example.org'
+    assert urls[0][0] == "https://example.com"
+    assert urls[1][0] == "https://example.org"


 def test_add_from_file(tmp_path, process, disable_extractors_dict):
@@ -136,7 +136,7 @@ def test_add_from_file(tmp_path, process, disable_extractors_dict):
    urls_file.write_text("https://example.com\nhttps://example.org\n")

    result = subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', str(urls_file)],
+        ["archivebox", "add", "--index-only", "--depth=0", str(urls_file)],
        capture_output=True,
        env=disable_extractors_dict,
    )
@@ -158,41 +158,41 @@ def test_add_with_depth_0_flag(tmp_path, process, disable_extractors_dict):
    """Test that --depth=0 flag is accepted and works."""
    os.chdir(tmp_path)
    result = subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )

    assert result.returncode == 0
-    assert 'unrecognized arguments: --depth' not in result.stderr.decode('utf-8')
+    assert "unrecognized arguments: --depth" not in result.stderr.decode("utf-8")


 def test_add_with_depth_1_flag(tmp_path, process, disable_extractors_dict):
    """Test that --depth=1 flag is accepted."""
    os.chdir(tmp_path)
    result = subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=1', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=1", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )

    assert result.returncode == 0
-    assert 'unrecognized arguments: --depth' not in result.stderr.decode('utf-8')
+    assert "unrecognized arguments: --depth" not in result.stderr.decode("utf-8")


 def test_add_rejects_invalid_depth_values(tmp_path, process, disable_extractors_dict):
    """Test that add rejects depth values outside the supported range."""
    os.chdir(tmp_path)

-    for depth in ('5', '-1'):
+    for depth in ("5", "-1"):
        result = subprocess.run(
-            ['archivebox', 'add', '--index-only', f'--depth={depth}', 'https://example.com'],
+            ["archivebox", "add", "--index-only", f"--depth={depth}", "https://example.com"],
            capture_output=True,
            env=disable_extractors_dict,
        )
-        stderr = result.stderr.decode('utf-8').lower()
+        stderr = result.stderr.decode("utf-8").lower()
        assert result.returncode != 0
-        assert 'invalid' in stderr or 'not one of' in stderr
+        assert "invalid" in stderr or "not one of" in stderr


 def test_add_with_tags(tmp_path, process, disable_extractors_dict):
@@ -203,7 +203,7 @@ def test_add_with_tags(tmp_path, process, disable_extractors_dict):
    """
    os.chdir(tmp_path)
    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', '--tag=test,example', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "--tag=test,example", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )
@@ -214,14 +214,14 @@ def test_add_with_tags(tmp_path, process, disable_extractors_dict):
    conn.close()

    # Tags are stored as a comma-separated string in crawl
-    assert 'test' in tags_str or 'example' in tags_str
+    assert "test" in tags_str or "example" in tags_str


 def test_add_records_selected_persona_on_crawl(tmp_path, process, disable_extractors_dict):
    """Test add persists the selected persona so browser config derives from it later."""
    os.chdir(tmp_path)
    result = subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', '--persona=Default', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "--persona=Default", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )
@@ -231,12 +231,12 @@ def test_add_records_selected_persona_on_crawl(tmp_path, process, disable_extrac
    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()
    persona_id, default_persona = c.execute(
-        "SELECT persona_id, json_extract(config, '$.DEFAULT_PERSONA') FROM crawls_crawl LIMIT 1"
+        "SELECT persona_id, json_extract(config, '$.DEFAULT_PERSONA') FROM crawls_crawl LIMIT 1",
    ).fetchone()
    conn.close()

    assert persona_id
-    assert default_persona == 'Default'
+    assert default_persona == "Default"
    assert (tmp_path / "personas" / "Default" / "chrome_user_data").is_dir()


@@ -244,10 +244,13 @@ def test_add_records_url_filter_overrides_on_crawl(tmp_path, process, disable_ex
    os.chdir(tmp_path)
    result = subprocess.run(
        [
-            'archivebox', 'add', '--index-only', '--depth=0',
-            '--domain-allowlist=example.com,*.example.com',
-            '--domain-denylist=static.example.com',
-            'https://example.com',
+            "archivebox",
+            "add",
+            "--index-only",
+            "--depth=0",
+            "--domain-allowlist=example.com,*.example.com",
+            "--domain-denylist=static.example.com",
+            "https://example.com",
        ],
        capture_output=True,
        env=disable_extractors_dict,
@@ -258,12 +261,12 @@ def test_add_records_url_filter_overrides_on_crawl(tmp_path, process, disable_ex
    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()
    allowlist, denylist = c.execute(
-        "SELECT json_extract(config, '$.URL_ALLOWLIST'), json_extract(config, '$.URL_DENYLIST') FROM crawls_crawl LIMIT 1"
+        "SELECT json_extract(config, '$.URL_ALLOWLIST'), json_extract(config, '$.URL_DENYLIST') FROM crawls_crawl LIMIT 1",
    ).fetchone()
    conn.close()

-    assert allowlist == 'example.com,*.example.com'
-    assert denylist == 'static.example.com'
+    assert allowlist == "example.com,*.example.com"
+    assert denylist == "static.example.com"
    assert (tmp_path / "personas" / "Default" / "chrome_extensions").is_dir()


@@ -277,14 +280,14 @@ def test_add_duplicate_url_creates_separate_crawls(tmp_path, process, disable_ex

    # Add URL first time
    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )

    # Add same URL second time
    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )
@@ -306,27 +309,27 @@ def test_add_with_overwrite_flag(tmp_path, process, disable_extractors_dict):

    # Add URL first time
    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )

    # Add with overwrite
    result = subprocess.run(
-        ['archivebox', 'add', '--index-only', '--overwrite', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--overwrite", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )

    assert result.returncode == 0
-    assert 'unrecognized arguments: --overwrite' not in result.stderr.decode('utf-8')
+    assert "unrecognized arguments: --overwrite" not in result.stderr.decode("utf-8")


 def test_add_creates_snapshot_output_directory(tmp_path, process, disable_extractors_dict):
    """Test that add creates the current snapshot output directory on disk."""
    os.chdir(tmp_path)
    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )
@@ -346,14 +349,39 @@ def test_add_help_shows_depth_and_tag_options(tmp_path, process):
    os.chdir(tmp_path)

    result = subprocess.run(
-        ['archivebox', 'add', '--help'],
+        ["archivebox", "add", "--help"],
        capture_output=True,
        text=True,
    )

    assert result.returncode == 0
-    assert '--depth' in result.stdout
-    assert '--tag' in result.stdout
+    assert "--depth" in result.stdout
+    assert "--max-urls" in result.stdout
+    assert "--max-size" in result.stdout
+    assert "--tag" in result.stdout
+
+
+def test_add_records_max_url_and_size_limits_on_crawl(tmp_path, process, disable_extractors_dict):
+    os.chdir(tmp_path)
+    result = subprocess.run(
+        ["archivebox", "add", "--index-only", "--depth=1", "--max-urls=3", "--max-size=45mb", "https://example.com"],
+        capture_output=True,
+        env=disable_extractors_dict,
+    )
+
+    assert result.returncode == 0
+
+    conn = sqlite3.connect("index.sqlite3")
+    c = conn.cursor()
+    max_urls, max_size, config_max_urls, config_max_size = c.execute(
+        "SELECT max_urls, max_size, json_extract(config, '$.MAX_URLS'), json_extract(config, '$.MAX_SIZE') FROM crawls_crawl LIMIT 1",
+    ).fetchone()
+    conn.close()
+
+    assert max_urls == 3
+    assert max_size == 45 * 1024 * 1024
+    assert config_max_urls == 3
+    assert config_max_size == 45 * 1024 * 1024


 def test_add_without_args_shows_usage(tmp_path, process):
@@ -361,21 +389,21 @@ def test_add_without_args_shows_usage(tmp_path, process):
    os.chdir(tmp_path)

    result = subprocess.run(
-        ['archivebox', 'add'],
+        ["archivebox", "add"],
        capture_output=True,
        text=True,
    )

    combined = result.stdout + result.stderr
    assert result.returncode != 0
-    assert 'usage' in combined.lower() or 'url' in combined.lower()
+    assert "usage" in combined.lower() or "url" in combined.lower()


 def test_add_index_only_skips_extraction(tmp_path, process, disable_extractors_dict):
    """Test that --index-only flag skips extraction (fast)."""
    os.chdir(tmp_path)
    result = subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
        timeout=30,  # Should be fast
@@ -396,7 +424,7 @@ def test_add_links_snapshot_to_crawl(tmp_path, process, disable_extractors_dict)
    """Test that add links the snapshot to the crawl via crawl_id."""
    os.chdir(tmp_path)
    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )
@@ -419,7 +447,7 @@ def test_add_sets_snapshot_timestamp(tmp_path, process, disable_extractors_dict)
    """Test that add sets a timestamp on the snapshot."""
    os.chdir(tmp_path)
    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )
--- a/archivebox/tests/test_cli_archiveresult.py
+++ b/archivebox/tests/test_cli_archiveresult.py
@@ -17,10 +17,10 @@ from archivebox.tests.conftest import (
 )

 PROJECTOR_TEST_ENV = {
-    'PLUGINS': 'favicon',
-    'SAVE_FAVICON': 'True',
-    'USE_COLOR': 'False',
-    'SHOW_PROGRESS': 'False',
+    "PLUGINS": "favicon",
+    "SAVE_FAVICON": "True",
+    "USE_COLOR": "False",
+    "SHOW_PROGRESS": "False",
 }


@@ -32,12 +32,12 @@ class TestArchiveResultCreate:
        url = create_test_url()

        # Create a snapshot first
-        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
+        stdout1, _, _ = run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive)
        snapshot = parse_jsonl_output(stdout1)[0]

        # Pipe snapshot to archiveresult create
        stdout2, stderr, code = run_archivebox_cmd(
-            ['archiveresult', 'create', '--plugin=title'],
+            ["archiveresult", "create", "--plugin=title"],
            stdin=json.dumps(snapshot),
            data_dir=initialized_archive,
        )
@@ -46,49 +46,49 @@ class TestArchiveResultCreate:

        records = parse_jsonl_output(stdout2)
        # Should have the Snapshot passed through and an ArchiveResult request emitted
-        types = [r.get('type') for r in records]
-        assert 'Snapshot' in types
-        assert 'ArchiveResult' in types
+        types = [r.get("type") for r in records]
+        assert "Snapshot" in types
+        assert "ArchiveResult" in types

-        ar = next(r for r in records if r['type'] == 'ArchiveResult')
-        assert ar['plugin'] == 'title'
-        assert 'id' not in ar
+        ar = next(r for r in records if r["type"] == "ArchiveResult")
+        assert ar["plugin"] == "title"
+        assert "id" not in ar

    def test_create_with_specific_plugin(self, initialized_archive):
        """Create archive result for specific plugin."""
        url = create_test_url()
-        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
+        stdout1, _, _ = run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive)
        snapshot = parse_jsonl_output(stdout1)[0]

        stdout2, stderr, code = run_archivebox_cmd(
-            ['archiveresult', 'create', '--plugin=screenshot'],
+            ["archiveresult", "create", "--plugin=screenshot"],
            stdin=json.dumps(snapshot),
            data_dir=initialized_archive,
        )

        assert code == 0
        records = parse_jsonl_output(stdout2)
-        ar_records = [r for r in records if r.get('type') == 'ArchiveResult']
+        ar_records = [r for r in records if r.get("type") == "ArchiveResult"]
        assert len(ar_records) >= 1
-        assert ar_records[0]['plugin'] == 'screenshot'
+        assert ar_records[0]["plugin"] == "screenshot"

    def test_create_pass_through_crawl(self, initialized_archive):
        """Pass-through Crawl records unchanged."""
        url = create_test_url()

        # Create crawl and snapshot
-        stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], data_dir=initialized_archive)
+        stdout1, _, _ = run_archivebox_cmd(["crawl", "create", url], data_dir=initialized_archive)
        crawl = parse_jsonl_output(stdout1)[0]

        stdout2, _, _ = run_archivebox_cmd(
-            ['snapshot', 'create'],
+            ["snapshot", "create"],
            stdin=json.dumps(crawl),
            data_dir=initialized_archive,
        )

        # Now pipe all to archiveresult create
        stdout3, stderr, code = run_archivebox_cmd(
-            ['archiveresult', 'create', '--plugin=title'],
+            ["archiveresult", "create", "--plugin=title"],
            stdin=stdout2,
            data_dir=initialized_archive,
        )
@@ -96,23 +96,23 @@ class TestArchiveResultCreate:
        assert code == 0
        records = parse_jsonl_output(stdout3)

-        types = [r.get('type') for r in records]
-        assert 'Crawl' in types
-        assert 'Snapshot' in types
-        assert 'ArchiveResult' in types
+        types = [r.get("type") for r in records]
+        assert "Crawl" in types
+        assert "Snapshot" in types
+        assert "ArchiveResult" in types

    def test_create_pass_through_only_when_no_snapshots(self, initialized_archive):
        """Only pass-through records but no new snapshots returns success."""
-        crawl_record = {'type': 'Crawl', 'id': 'fake-id', 'urls': 'https://example.com'}
+        crawl_record = {"type": "Crawl", "id": "fake-id", "urls": "https://example.com"}

        stdout, stderr, code = run_archivebox_cmd(
-            ['archiveresult', 'create'],
+            ["archiveresult", "create"],
            stdin=json.dumps(crawl_record),
            data_dir=initialized_archive,
        )

        assert code == 0
-        assert 'Passed through' in stderr
+        assert "Passed through" in stderr


 class TestArchiveResultList:
@@ -121,26 +121,26 @@ class TestArchiveResultList:
    def test_list_empty(self, initialized_archive):
        """List with no archive results returns empty."""
        stdout, stderr, code = run_archivebox_cmd(
-            ['archiveresult', 'list'],
+            ["archiveresult", "list"],
            data_dir=initialized_archive,
        )

        assert code == 0
-        assert 'Listed 0 archive results' in stderr
+        assert "Listed 0 archive results" in stderr

    def test_list_filter_by_status(self, initialized_archive):
        """Filter archive results by status."""
        # Create snapshot and materialize an archive result via the runner
        url = create_test_url()
-        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
+        stdout1, _, _ = run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive)
        snapshot = parse_jsonl_output(stdout1)[0]
        stdout2, _, _ = run_archivebox_cmd(
-            ['archiveresult', 'create', '--plugin=favicon'],
+            ["archiveresult", "create", "--plugin=favicon"],
            stdin=json.dumps(snapshot),
            data_dir=initialized_archive,
        )
        run_archivebox_cmd(
-            ['run'],
+            ["run"],
            stdin=stdout2,
            data_dir=initialized_archive,
            timeout=120,
@@ -148,38 +148,38 @@ class TestArchiveResultList:
        )
        created = parse_jsonl_output(
            run_archivebox_cmd(
-                ['archiveresult', 'list', '--plugin=favicon'],
+                ["archiveresult", "list", "--plugin=favicon"],
                data_dir=initialized_archive,
-            )[0]
+            )[0],
        )[0]
        run_archivebox_cmd(
-            ['archiveresult', 'update', '--status=queued'],
+            ["archiveresult", "update", "--status=queued"],
            stdin=json.dumps(created),
            data_dir=initialized_archive,
        )

        stdout, stderr, code = run_archivebox_cmd(
-            ['archiveresult', 'list', '--status=queued'],
+            ["archiveresult", "list", "--status=queued"],
            data_dir=initialized_archive,
        )

        assert code == 0
        records = parse_jsonl_output(stdout)
        for r in records:
-            assert r['status'] == 'queued'
+            assert r["status"] == "queued"

    def test_list_filter_by_plugin(self, initialized_archive):
        """Filter archive results by plugin."""
        url = create_test_url()
-        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
+        stdout1, _, _ = run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive)
        snapshot = parse_jsonl_output(stdout1)[0]
        stdout2, _, _ = run_archivebox_cmd(
-            ['archiveresult', 'create', '--plugin=favicon'],
+            ["archiveresult", "create", "--plugin=favicon"],
            stdin=json.dumps(snapshot),
            data_dir=initialized_archive,
        )
        run_archivebox_cmd(
-            ['run'],
+            ["run"],
            stdin=stdout2,
            data_dir=initialized_archive,
            timeout=120,
@@ -187,29 +187,29 @@ class TestArchiveResultList:
        )

        stdout, stderr, code = run_archivebox_cmd(
-            ['archiveresult', 'list', '--plugin=favicon'],
+            ["archiveresult", "list", "--plugin=favicon"],
            data_dir=initialized_archive,
        )

        assert code == 0
        records = parse_jsonl_output(stdout)
        for r in records:
-            assert r['plugin'] == 'favicon'
+            assert r["plugin"] == "favicon"

    def test_list_with_limit(self, initialized_archive):
        """Limit number of results."""
        # Create multiple archive results
        for _ in range(3):
            url = create_test_url()
-            stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
+            stdout1, _, _ = run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive)
            snapshot = parse_jsonl_output(stdout1)[0]
            stdout2, _, _ = run_archivebox_cmd(
-                ['archiveresult', 'create', '--plugin=favicon'],
+                ["archiveresult", "create", "--plugin=favicon"],
                stdin=json.dumps(snapshot),
                data_dir=initialized_archive,
            )
            run_archivebox_cmd(
-                ['run'],
+                ["run"],
                stdin=stdout2,
                data_dir=initialized_archive,
                timeout=120,
@@ -217,7 +217,7 @@ class TestArchiveResultList:
            )

        stdout, stderr, code = run_archivebox_cmd(
-            ['archiveresult', 'list', '--limit=2'],
+            ["archiveresult", "list", "--limit=2"],
            data_dir=initialized_archive,
        )

@@ -232,38 +232,38 @@ class TestArchiveResultUpdate:
    def test_update_status(self, initialized_archive):
        """Update archive result status."""
        url = create_test_url()
-        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
+        stdout1, _, _ = run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive)
        snapshot = parse_jsonl_output(stdout1)[0]

        stdout2, _, _ = run_archivebox_cmd(
-            ['archiveresult', 'create', '--plugin=favicon'],
+            ["archiveresult", "create", "--plugin=favicon"],
            stdin=json.dumps(snapshot),
            data_dir=initialized_archive,
        )
        stdout_run, _, _ = run_archivebox_cmd(
-            ['run'],
+            ["run"],
            stdin=stdout2,
            data_dir=initialized_archive,
            timeout=120,
            env=PROJECTOR_TEST_ENV,
        )
        stdout_list, _, _ = run_archivebox_cmd(
-            ['archiveresult', 'list', '--plugin=favicon'],
+            ["archiveresult", "list", "--plugin=favicon"],
            data_dir=initialized_archive,
        )
        ar = parse_jsonl_output(stdout_list)[0]

        stdout3, stderr, code = run_archivebox_cmd(
-            ['archiveresult', 'update', '--status=failed'],
+            ["archiveresult", "update", "--status=failed"],
            stdin=json.dumps(ar),
            data_dir=initialized_archive,
        )

        assert code == 0
-        assert 'Updated 1 archive results' in stderr
+        assert "Updated 1 archive results" in stderr

        records = parse_jsonl_output(stdout3)
-        assert records[0]['status'] == 'failed'
+        assert records[0]["status"] == "failed"


 class TestArchiveResultDelete:
@@ -272,65 +272,65 @@ class TestArchiveResultDelete:
    def test_delete_requires_yes(self, initialized_archive):
        """Delete requires --yes flag."""
        url = create_test_url()
-        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
+        stdout1, _, _ = run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive)
        snapshot = parse_jsonl_output(stdout1)[0]

        stdout2, _, _ = run_archivebox_cmd(
-            ['archiveresult', 'create', '--plugin=favicon'],
+            ["archiveresult", "create", "--plugin=favicon"],
            stdin=json.dumps(snapshot),
            data_dir=initialized_archive,
        )
        stdout_run, _, _ = run_archivebox_cmd(
-            ['run'],
+            ["run"],
            stdin=stdout2,
            data_dir=initialized_archive,
            timeout=120,
            env=PROJECTOR_TEST_ENV,
        )
        stdout_list, _, _ = run_archivebox_cmd(
-            ['archiveresult', 'list', '--plugin=favicon'],
+            ["archiveresult", "list", "--plugin=favicon"],
            data_dir=initialized_archive,
        )
        ar = parse_jsonl_output(stdout_list)[0]

        stdout, stderr, code = run_archivebox_cmd(
-            ['archiveresult', 'delete'],
+            ["archiveresult", "delete"],
            stdin=json.dumps(ar),
            data_dir=initialized_archive,
        )

        assert code == 1
-        assert '--yes' in stderr
+        assert "--yes" in stderr

    def test_delete_with_yes(self, initialized_archive):
        """Delete with --yes flag works."""
        url = create_test_url()
-        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
+        stdout1, _, _ = run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive)
        snapshot = parse_jsonl_output(stdout1)[0]

        stdout2, _, _ = run_archivebox_cmd(
-            ['archiveresult', 'create', '--plugin=favicon'],
+            ["archiveresult", "create", "--plugin=favicon"],
            stdin=json.dumps(snapshot),
            data_dir=initialized_archive,
        )
        stdout_run, _, _ = run_archivebox_cmd(
-            ['run'],
+            ["run"],
            stdin=stdout2,
            data_dir=initialized_archive,
            timeout=120,
            env=PROJECTOR_TEST_ENV,
        )
        stdout_list, _, _ = run_archivebox_cmd(
-            ['archiveresult', 'list', '--plugin=favicon'],
+            ["archiveresult", "list", "--plugin=favicon"],
            data_dir=initialized_archive,
        )
        ar = parse_jsonl_output(stdout_list)[0]

        stdout, stderr, code = run_archivebox_cmd(
-            ['archiveresult', 'delete', '--yes'],
+            ["archiveresult", "delete", "--yes"],
            stdin=json.dumps(ar),
            data_dir=initialized_archive,
        )

        assert code == 0
-        assert 'Deleted 1 archive results' in stderr
+        assert "Deleted 1 archive results" in stderr
--- a/archivebox/tests/test_cli_config.py
+++ b/archivebox/tests/test_cli_config.py
@@ -11,27 +11,27 @@ import subprocess
 def test_config_displays_all_config(tmp_path, process):
    """Test that config without args displays all configuration."""
    os.chdir(tmp_path)
-    result = subprocess.run(['archivebox', 'config'], capture_output=True, text=True)
+    result = subprocess.run(["archivebox", "config"], capture_output=True, text=True)

    assert result.returncode == 0
    output = result.stdout
    # Should show config sections
    assert len(output) > 100
    # Should show at least some standard config keys
-    assert 'TIMEOUT' in output or 'OUTPUT_PERMISSIONS' in output
+    assert "TIMEOUT" in output or "OUTPUT_PERMISSIONS" in output


 def test_config_get_specific_key(tmp_path, process):
    """Test that config --get KEY retrieves specific value."""
    os.chdir(tmp_path)
    result = subprocess.run(
-        ['archivebox', 'config', '--get', 'TIMEOUT'],
+        ["archivebox", "config", "--get", "TIMEOUT"],
        capture_output=True,
        text=True,
    )

    assert result.returncode == 0
-    assert 'TIMEOUT' in result.stdout
+    assert "TIMEOUT" in result.stdout


 def test_config_set_writes_to_file(tmp_path, process):
@@ -39,7 +39,7 @@ def test_config_set_writes_to_file(tmp_path, process):
    os.chdir(tmp_path)

    result = subprocess.run(
-        ['archivebox', 'config', '--set', 'TIMEOUT=120'],
+        ["archivebox", "config", "--set", "TIMEOUT=120"],
        capture_output=True,
        text=True,
    )
@@ -47,11 +47,11 @@ def test_config_set_writes_to_file(tmp_path, process):
    assert result.returncode == 0

    # Verify config file was updated
-    config_file = tmp_path / 'ArchiveBox.conf'
+    config_file = tmp_path / "ArchiveBox.conf"
    assert config_file.exists()

    content = config_file.read_text()
-    assert 'TIMEOUT' in content or '120' in content
+    assert "TIMEOUT" in content or "120" in content


 def test_config_set_and_get_roundtrip(tmp_path, process):
@@ -60,19 +60,19 @@ def test_config_set_and_get_roundtrip(tmp_path, process):

    # Set a unique value
    subprocess.run(
-        ['archivebox', 'config', '--set', 'TIMEOUT=987'],
+        ["archivebox", "config", "--set", "TIMEOUT=987"],
        capture_output=True,
        text=True,
    )

    # Get the value back
    result = subprocess.run(
-        ['archivebox', 'config', '--get', 'TIMEOUT'],
+        ["archivebox", "config", "--get", "TIMEOUT"],
        capture_output=True,
        text=True,
    )

-    assert '987' in result.stdout
+    assert "987" in result.stdout


 def test_config_set_multiple_values(tmp_path, process):
@@ -80,7 +80,7 @@ def test_config_set_multiple_values(tmp_path, process):
    os.chdir(tmp_path)

    result = subprocess.run(
-        ['archivebox', 'config', '--set', 'TIMEOUT=111', 'YTDLP_TIMEOUT=222'],
+        ["archivebox", "config", "--set", "TIMEOUT=111", "YTDLP_TIMEOUT=222"],
        capture_output=True,
        text=True,
    )
@@ -88,10 +88,10 @@ def test_config_set_multiple_values(tmp_path, process):
    assert result.returncode == 0

    # Verify both were written
-    config_file = tmp_path / 'ArchiveBox.conf'
+    config_file = tmp_path / "ArchiveBox.conf"
    content = config_file.read_text()
-    assert '111' in content
-    assert '222' in content
+    assert "111" in content
+    assert "222" in content


 def test_config_set_invalid_key_fails(tmp_path, process):
@@ -99,7 +99,7 @@ def test_config_set_invalid_key_fails(tmp_path, process):
    os.chdir(tmp_path)

    result = subprocess.run(
-        ['archivebox', 'config', '--set', 'TOTALLY_INVALID_KEY_XYZ=value'],
+        ["archivebox", "config", "--set", "TOTALLY_INVALID_KEY_XYZ=value"],
        capture_output=True,
        text=True,
    )
@@ -112,7 +112,7 @@ def test_config_set_requires_equals_sign(tmp_path, process):
    os.chdir(tmp_path)

    result = subprocess.run(
-        ['archivebox', 'config', '--set', 'TIMEOUT'],
+        ["archivebox", "config", "--set", "TIMEOUT"],
        capture_output=True,
        text=True,
    )
@@ -125,13 +125,13 @@ def test_config_search_finds_keys(tmp_path, process):
    os.chdir(tmp_path)

    result = subprocess.run(
-        ['archivebox', 'config', '--search', 'TIMEOUT'],
+        ["archivebox", "config", "--search", "TIMEOUT"],
        capture_output=True,
        text=True,
    )

    # Should find timeout-related config
-    assert 'TIMEOUT' in result.stdout
+    assert "TIMEOUT" in result.stdout


 def test_config_preserves_existing_values(tmp_path, process):
@@ -140,21 +140,21 @@ def test_config_preserves_existing_values(tmp_path, process):

    # Set first value
    subprocess.run(
-        ['archivebox', 'config', '--set', 'TIMEOUT=100'],
+        ["archivebox", "config", "--set", "TIMEOUT=100"],
        capture_output=True,
    )

    # Set second value
    subprocess.run(
-        ['archivebox', 'config', '--set', 'YTDLP_TIMEOUT=200'],
+        ["archivebox", "config", "--set", "YTDLP_TIMEOUT=200"],
        capture_output=True,
    )

    # Verify both are in config file
-    config_file = tmp_path / 'ArchiveBox.conf'
+    config_file = tmp_path / "ArchiveBox.conf"
    content = config_file.read_text()
-    assert 'TIMEOUT' in content
-    assert 'YTDLP_TIMEOUT' in content
+    assert "TIMEOUT" in content
+    assert "YTDLP_TIMEOUT" in content


 def test_config_file_is_valid_toml(tmp_path, process):
@@ -162,15 +162,15 @@ def test_config_file_is_valid_toml(tmp_path, process):
    os.chdir(tmp_path)

    subprocess.run(
-        ['archivebox', 'config', '--set', 'TIMEOUT=150'],
+        ["archivebox", "config", "--set", "TIMEOUT=150"],
        capture_output=True,
    )

-    config_file = tmp_path / 'ArchiveBox.conf'
+    config_file = tmp_path / "ArchiveBox.conf"
    content = config_file.read_text()

    # Basic TOML validation - should have sections and key=value pairs
-    assert '[' in content or '=' in content
+    assert "[" in content or "=" in content


 def test_config_updates_existing_value(tmp_path, process):
@@ -179,22 +179,22 @@ def test_config_updates_existing_value(tmp_path, process):

    # Set initial value
    subprocess.run(
-        ['archivebox', 'config', '--set', 'TIMEOUT=100'],
+        ["archivebox", "config", "--set", "TIMEOUT=100"],
        capture_output=True,
    )

    # Update to new value
    subprocess.run(
-        ['archivebox', 'config', '--set', 'TIMEOUT=200'],
+        ["archivebox", "config", "--set", "TIMEOUT=200"],
        capture_output=True,
    )

    # Get current value
    result = subprocess.run(
-        ['archivebox', 'config', '--get', 'TIMEOUT'],
+        ["archivebox", "config", "--get", "TIMEOUT"],
        capture_output=True,
        text=True,
    )

    # Should show updated value
-    assert '200' in result.stdout
+    assert "200" in result.stdout
--- a/archivebox/tests/test_cli_crawl.py
+++ b/archivebox/tests/test_cli_crawl.py
@@ -25,26 +25,26 @@ class TestCrawlCreate:
        url = create_test_url()

        stdout, stderr, code = run_archivebox_cmd(
-            ['crawl', 'create', url],
+            ["crawl", "create", url],
            data_dir=initialized_archive,
        )

        assert code == 0, f"Command failed: {stderr}"
-        assert 'Created crawl' in stderr
+        assert "Created crawl" in stderr

        # Check JSONL output
        records = parse_jsonl_output(stdout)
        assert len(records) == 1
-        assert records[0]['type'] == 'Crawl'
-        assert url in records[0]['urls']
+        assert records[0]["type"] == "Crawl"
+        assert url in records[0]["urls"]

    def test_create_from_stdin_urls(self, initialized_archive):
        """Create crawl from stdin URLs (one per line)."""
        urls = [create_test_url() for _ in range(3)]
-        stdin = '\n'.join(urls)
+        stdin = "\n".join(urls)

        stdout, stderr, code = run_archivebox_cmd(
-            ['crawl', 'create'],
+            ["crawl", "create"],
            stdin=stdin,
            data_dir=initialized_archive,
        )
@@ -54,45 +54,45 @@ class TestCrawlCreate:
        records = parse_jsonl_output(stdout)
        assert len(records) == 1
        crawl = records[0]
-        assert crawl['type'] == 'Crawl'
+        assert crawl["type"] == "Crawl"
        # All URLs should be in the crawl
        for url in urls:
-            assert url in crawl['urls']
+            assert url in crawl["urls"]

    def test_create_with_depth(self, initialized_archive):
        """Create crawl with --depth flag."""
        url = create_test_url()

        stdout, stderr, code = run_archivebox_cmd(
-            ['crawl', 'create', '--depth=2', url],
+            ["crawl", "create", "--depth=2", url],
            data_dir=initialized_archive,
        )

        assert code == 0
        records = parse_jsonl_output(stdout)
-        assert records[0]['max_depth'] == 2
+        assert records[0]["max_depth"] == 2

    def test_create_with_tag(self, initialized_archive):
        """Create crawl with --tag flag."""
        url = create_test_url()

        stdout, stderr, code = run_archivebox_cmd(
-            ['crawl', 'create', '--tag=test-tag', url],
+            ["crawl", "create", "--tag=test-tag", url],
            data_dir=initialized_archive,
        )

        assert code == 0
        records = parse_jsonl_output(stdout)
-        assert 'test-tag' in records[0].get('tags', '')
+        assert "test-tag" in records[0].get("tags_str", "")

    def test_create_pass_through_other_types(self, initialized_archive):
        """Pass-through records of other types unchanged."""
-        tag_record = {'type': 'Tag', 'id': 'fake-tag-id', 'name': 'test'}
+        tag_record = {"type": "Tag", "id": "fake-tag-id", "name": "test"}
        url = create_test_url()
-        stdin = json.dumps(tag_record) + '\n' + json.dumps({'url': url})
+        stdin = json.dumps(tag_record) + "\n" + json.dumps({"url": url})

        stdout, stderr, code = run_archivebox_cmd(
-            ['crawl', 'create'],
+            ["crawl", "create"],
            stdin=stdin,
            data_dir=initialized_archive,
        )
@@ -101,20 +101,20 @@ class TestCrawlCreate:
        records = parse_jsonl_output(stdout)

        # Should have both the passed-through Tag and the new Crawl
-        types = [r.get('type') for r in records]
-        assert 'Tag' in types
-        assert 'Crawl' in types
+        types = [r.get("type") for r in records]
+        assert "Tag" in types
+        assert "Crawl" in types

    def test_create_pass_through_existing_crawl(self, initialized_archive):
        """Existing Crawl records (with id) are passed through."""
        # First create a crawl
        url = create_test_url()
-        stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], data_dir=initialized_archive)
+        stdout1, _, _ = run_archivebox_cmd(["crawl", "create", url], data_dir=initialized_archive)
        crawl = parse_jsonl_output(stdout1)[0]

        # Now pipe it back - should pass through
        stdout2, stderr, code = run_archivebox_cmd(
-            ['crawl', 'create'],
+            ["crawl", "create"],
            stdin=json.dumps(crawl),
            data_dir=initialized_archive,
        )
@@ -122,7 +122,7 @@ class TestCrawlCreate:
        assert code == 0
        records = parse_jsonl_output(stdout2)
        assert len(records) == 1
-        assert records[0]['id'] == crawl['id']
+        assert records[0]["id"] == crawl["id"]


 class TestCrawlList:
@@ -131,51 +131,51 @@ class TestCrawlList:
    def test_list_empty(self, initialized_archive):
        """List with no crawls returns empty."""
        stdout, stderr, code = run_archivebox_cmd(
-            ['crawl', 'list'],
+            ["crawl", "list"],
            data_dir=initialized_archive,
        )

        assert code == 0
-        assert 'Listed 0 crawls' in stderr
+        assert "Listed 0 crawls" in stderr

    def test_list_returns_created(self, initialized_archive):
        """List returns previously created crawls."""
        url = create_test_url()
-        run_archivebox_cmd(['crawl', 'create', url], data_dir=initialized_archive)
+        run_archivebox_cmd(["crawl", "create", url], data_dir=initialized_archive)

        stdout, stderr, code = run_archivebox_cmd(
-            ['crawl', 'list'],
+            ["crawl", "list"],
            data_dir=initialized_archive,
        )

        assert code == 0
        records = parse_jsonl_output(stdout)
        assert len(records) >= 1
-        assert any(url in r.get('urls', '') for r in records)
+        assert any(url in r.get("urls", "") for r in records)

    def test_list_filter_by_status(self, initialized_archive):
        """Filter crawls by status."""
        url = create_test_url()
-        run_archivebox_cmd(['crawl', 'create', url], data_dir=initialized_archive)
+        run_archivebox_cmd(["crawl", "create", url], data_dir=initialized_archive)

        stdout, stderr, code = run_archivebox_cmd(
-            ['crawl', 'list', '--status=queued'],
+            ["crawl", "list", "--status=queued"],
            data_dir=initialized_archive,
        )

        assert code == 0
        records = parse_jsonl_output(stdout)
        for r in records:
-            assert r['status'] == 'queued'
+            assert r["status"] == "queued"

    def test_list_with_limit(self, initialized_archive):
        """Limit number of results."""
        # Create multiple crawls
        for _ in range(3):
-            run_archivebox_cmd(['crawl', 'create', create_test_url()], data_dir=initialized_archive)
+            run_archivebox_cmd(["crawl", "create", create_test_url()], data_dir=initialized_archive)

        stdout, stderr, code = run_archivebox_cmd(
-            ['crawl', 'list', '--limit=2'],
+            ["crawl", "list", "--limit=2"],
            data_dir=initialized_archive,
        )

@@ -191,21 +191,21 @@ class TestCrawlUpdate:
        """Update crawl status."""
        # Create a crawl
        url = create_test_url()
-        stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], data_dir=initialized_archive)
+        stdout1, _, _ = run_archivebox_cmd(["crawl", "create", url], data_dir=initialized_archive)
        crawl = parse_jsonl_output(stdout1)[0]

        # Update it
        stdout2, stderr, code = run_archivebox_cmd(
-            ['crawl', 'update', '--status=started'],
+            ["crawl", "update", "--status=started"],
            stdin=json.dumps(crawl),
            data_dir=initialized_archive,
        )

        assert code == 0
-        assert 'Updated 1 crawls' in stderr
+        assert "Updated 1 crawls" in stderr

        records = parse_jsonl_output(stdout2)
-        assert records[0]['status'] == 'started'
+        assert records[0]["status"] == "started"


 class TestCrawlDelete:
@@ -214,45 +214,45 @@ class TestCrawlDelete:
    def test_delete_requires_yes(self, initialized_archive):
        """Delete requires --yes flag."""
        url = create_test_url()
-        stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], data_dir=initialized_archive)
+        stdout1, _, _ = run_archivebox_cmd(["crawl", "create", url], data_dir=initialized_archive)
        crawl = parse_jsonl_output(stdout1)[0]

        stdout, stderr, code = run_archivebox_cmd(
-            ['crawl', 'delete'],
+            ["crawl", "delete"],
            stdin=json.dumps(crawl),
            data_dir=initialized_archive,
        )

        assert code == 1
-        assert '--yes' in stderr
+        assert "--yes" in stderr

    def test_delete_with_yes(self, initialized_archive):
        """Delete with --yes flag works."""
        url = create_test_url()
-        stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], data_dir=initialized_archive)
+        stdout1, _, _ = run_archivebox_cmd(["crawl", "create", url], data_dir=initialized_archive)
        crawl = parse_jsonl_output(stdout1)[0]

        stdout, stderr, code = run_archivebox_cmd(
-            ['crawl', 'delete', '--yes'],
+            ["crawl", "delete", "--yes"],
            stdin=json.dumps(crawl),
            data_dir=initialized_archive,
        )

        assert code == 0
-        assert 'Deleted 1 crawls' in stderr
+        assert "Deleted 1 crawls" in stderr

    def test_delete_dry_run(self, initialized_archive):
        """Dry run shows what would be deleted."""
        url = create_test_url()
-        stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], data_dir=initialized_archive)
+        stdout1, _, _ = run_archivebox_cmd(["crawl", "create", url], data_dir=initialized_archive)
        crawl = parse_jsonl_output(stdout1)[0]

        stdout, stderr, code = run_archivebox_cmd(
-            ['crawl', 'delete', '--dry-run'],
+            ["crawl", "delete", "--dry-run"],
            stdin=json.dumps(crawl),
            data_dir=initialized_archive,
        )

        assert code == 0
-        assert 'Would delete' in stderr
-        assert 'dry run' in stderr.lower()
+        assert "Would delete" in stderr
+        assert "dry run" in stderr.lower()
--- a/archivebox/tests/test_cli_extract.py
+++ b/archivebox/tests/test_cli_extract.py
@@ -15,14 +15,14 @@ def test_extract_runs_on_existing_snapshots(tmp_path, process, disable_extractor

    # Add a snapshot first
    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )

    # Run extract
    result = subprocess.run(
-        ['archivebox', 'extract'],
+        ["archivebox", "extract"],
        capture_output=True,
        env=disable_extractors_dict,
        timeout=30,
@@ -38,7 +38,7 @@ def test_extract_preserves_snapshot_count(tmp_path, process, disable_extractors_

    # Add snapshot
    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )
@@ -50,7 +50,7 @@ def test_extract_preserves_snapshot_count(tmp_path, process, disable_extractors_

    # Run extract
    subprocess.run(
-        ['archivebox', 'extract', '--overwrite'],
+        ["archivebox", "extract", "--overwrite"],
        capture_output=True,
        env=disable_extractors_dict,
        timeout=30,
--- a/archivebox/tests/test_cli_extract_input.py
+++ b/archivebox/tests/test_cli_extract_input.py
@@ -6,34 +6,33 @@ import sqlite3
 import json


-
 def test_extract_runs_on_snapshot_id(tmp_path, process, disable_extractors_dict):
    """Test that extract command accepts a snapshot ID."""
    os.chdir(tmp_path)

    # First create a snapshot
    subprocess.run(
-        ['archivebox', 'add', '--index-only', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )

    # Get the snapshot ID
-    conn = sqlite3.connect('index.sqlite3')
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()
    snapshot_id = c.execute("SELECT id FROM core_snapshot LIMIT 1").fetchone()[0]
    conn.close()

    # Run extract on the snapshot
    result = subprocess.run(
-        ['archivebox', 'extract', '--no-wait', str(snapshot_id)],
+        ["archivebox", "extract", "--no-wait", str(snapshot_id)],
        capture_output=True,
        text=True,
        env=disable_extractors_dict,
    )

    # Should not error about invalid snapshot ID
-    assert 'not found' not in result.stderr.lower()
+    assert "not found" not in result.stderr.lower()


 def test_extract_with_enabled_extractor_creates_archiveresult(tmp_path, process, disable_extractors_dict):
@@ -42,33 +41,35 @@ def test_extract_with_enabled_extractor_creates_archiveresult(tmp_path, process,

    # First create a snapshot
    subprocess.run(
-        ['archivebox', 'add', '--index-only', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )

    # Get the snapshot ID
-    conn = sqlite3.connect('index.sqlite3')
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()
    snapshot_id = c.execute("SELECT id FROM core_snapshot LIMIT 1").fetchone()[0]
    conn.close()

    # Run extract with title extractor enabled
    env = disable_extractors_dict.copy()
-    env['SAVE_TITLE'] = 'true'
+    env["SAVE_TITLE"] = "true"

    subprocess.run(
-        ['archivebox', 'extract', '--no-wait', str(snapshot_id)],
+        ["archivebox", "extract", "--no-wait", str(snapshot_id)],
        capture_output=True,
        text=True,
        env=env,
    )

    # Check for archiveresults (may be queued, not completed with --no-wait)
-    conn = sqlite3.connect('index.sqlite3')
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()
-    count = c.execute("SELECT COUNT(*) FROM core_archiveresult WHERE snapshot_id = ?",
-                     (snapshot_id,)).fetchone()[0]
+    count = c.execute(
+        "SELECT COUNT(*) FROM core_archiveresult WHERE snapshot_id = ?",
+        (snapshot_id,),
+    ).fetchone()[0]
    conn.close()

    # May or may not have results depending on timing
@@ -81,25 +82,25 @@ def test_extract_plugin_option_accepted(tmp_path, process, disable_extractors_di

    # First create a snapshot
    subprocess.run(
-        ['archivebox', 'add', '--index-only', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )

    # Get the snapshot ID
-    conn = sqlite3.connect('index.sqlite3')
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()
    snapshot_id = c.execute("SELECT id FROM core_snapshot LIMIT 1").fetchone()[0]
    conn.close()

    result = subprocess.run(
-        ['archivebox', 'extract', '--plugin=title', '--no-wait', str(snapshot_id)],
+        ["archivebox", "extract", "--plugin=title", "--no-wait", str(snapshot_id)],
        capture_output=True,
        text=True,
        env=disable_extractors_dict,
    )

-    assert 'unrecognized arguments: --plugin' not in result.stderr
+    assert "unrecognized arguments: --plugin" not in result.stderr


 def test_extract_stdin_snapshot_id(tmp_path, process, disable_extractors_dict):
@@ -108,27 +109,27 @@ def test_extract_stdin_snapshot_id(tmp_path, process, disable_extractors_dict):

    # First create a snapshot
    subprocess.run(
-        ['archivebox', 'add', '--index-only', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )

    # Get the snapshot ID
-    conn = sqlite3.connect('index.sqlite3')
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()
    snapshot_id = c.execute("SELECT id FROM core_snapshot LIMIT 1").fetchone()[0]
    conn.close()

    result = subprocess.run(
-        ['archivebox', 'extract', '--no-wait'],
-        input=f'{snapshot_id}\n',
+        ["archivebox", "extract", "--no-wait"],
+        input=f"{snapshot_id}\n",
        capture_output=True,
        text=True,
        env=disable_extractors_dict,
    )

    # Should not show "not found" error
-    assert 'not found' not in result.stderr.lower() or result.returncode == 0
+    assert "not found" not in result.stderr.lower() or result.returncode == 0


 def test_extract_stdin_jsonl_input(tmp_path, process, disable_extractors_dict):
@@ -137,21 +138,21 @@ def test_extract_stdin_jsonl_input(tmp_path, process, disable_extractors_dict):

    # First create a snapshot
    subprocess.run(
-        ['archivebox', 'add', '--index-only', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )

    # Get the snapshot ID
-    conn = sqlite3.connect('index.sqlite3')
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()
    snapshot_id = c.execute("SELECT id FROM core_snapshot LIMIT 1").fetchone()[0]
    conn.close()

-    jsonl_input = json.dumps({"type": "Snapshot", "id": str(snapshot_id)}) + '\n'
+    jsonl_input = json.dumps({"type": "Snapshot", "id": str(snapshot_id)}) + "\n"

    result = subprocess.run(
-        ['archivebox', 'extract', '--no-wait'],
+        ["archivebox", "extract", "--no-wait"],
        input=jsonl_input,
        capture_output=True,
        text=True,
@@ -159,7 +160,7 @@ def test_extract_stdin_jsonl_input(tmp_path, process, disable_extractors_dict):
    )

    # Should not show "not found" error
-    assert 'not found' not in result.stderr.lower() or result.returncode == 0
+    assert "not found" not in result.stderr.lower() or result.returncode == 0


 def test_extract_pipeline_from_snapshot(tmp_path, process, disable_extractors_dict):
@@ -168,14 +169,14 @@ def test_extract_pipeline_from_snapshot(tmp_path, process, disable_extractors_di

    # Create snapshot and pipe to extract
    snapshot_proc = subprocess.Popen(
-        ['archivebox', 'snapshot', 'https://example.com'],
+        ["archivebox", "snapshot", "https://example.com"],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        env=disable_extractors_dict,
    )

    subprocess.run(
-        ['archivebox', 'extract', '--no-wait'],
+        ["archivebox", "extract", "--no-wait"],
        stdin=snapshot_proc.stdout,
        capture_output=True,
        text=True,
@@ -185,10 +186,12 @@ def test_extract_pipeline_from_snapshot(tmp_path, process, disable_extractors_di
    snapshot_proc.wait()

    # Check database for snapshot
-    conn = sqlite3.connect('index.sqlite3')
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()
-    snapshot = c.execute("SELECT id, url FROM core_snapshot WHERE url = ?",
-                        ('https://example.com',)).fetchone()
+    snapshot = c.execute(
+        "SELECT id, url FROM core_snapshot WHERE url = ?",
+        ("https://example.com",),
+    ).fetchone()
    conn.close()

    assert snapshot is not None, "Snapshot should be created by pipeline"
@@ -200,18 +203,18 @@ def test_extract_multiple_snapshots(tmp_path, process, disable_extractors_dict):

    # Create multiple snapshots one at a time to avoid deduplication issues
    subprocess.run(
-        ['archivebox', 'add', '--index-only', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )
    subprocess.run(
-        ['archivebox', 'add', '--index-only', 'https://iana.org'],
+        ["archivebox", "add", "--index-only", "https://iana.org"],
        capture_output=True,
        env=disable_extractors_dict,
    )

    # Get all snapshot IDs
-    conn = sqlite3.connect('index.sqlite3')
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()
    snapshot_ids = c.execute("SELECT id FROM core_snapshot").fetchall()
    conn.close()
@@ -219,9 +222,9 @@ def test_extract_multiple_snapshots(tmp_path, process, disable_extractors_dict):
    assert len(snapshot_ids) >= 2, "Should have at least 2 snapshots"

    # Extract from all snapshots
-    ids_input = '\n'.join(str(s[0]) for s in snapshot_ids) + '\n'
+    ids_input = "\n".join(str(s[0]) for s in snapshot_ids) + "\n"
    result = subprocess.run(
-        ['archivebox', 'extract', '--no-wait'],
+        ["archivebox", "extract", "--no-wait"],
        input=ids_input,
        capture_output=True,
        text=True,
@@ -230,7 +233,7 @@ def test_extract_multiple_snapshots(tmp_path, process, disable_extractors_dict):
    assert result.returncode == 0, result.stderr

    # Should not error
-    conn = sqlite3.connect('index.sqlite3')
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()
    count = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
    conn.close()
@@ -246,25 +249,25 @@ class TestExtractCLI:
        os.chdir(tmp_path)

        result = subprocess.run(
-            ['archivebox', 'extract', '--help'],
+            ["archivebox", "extract", "--help"],
            capture_output=True,
            text=True,
        )

        assert result.returncode == 0
-        assert '--plugin' in result.stdout or '-p' in result.stdout
-        assert '--wait' in result.stdout or '--no-wait' in result.stdout
+        assert "--plugin" in result.stdout or "-p" in result.stdout
+        assert "--wait" in result.stdout or "--no-wait" in result.stdout

    def test_cli_no_snapshots_shows_warning(self, tmp_path, process):
        """Test that running without snapshots shows a warning."""
        os.chdir(tmp_path)

        result = subprocess.run(
-            ['archivebox', 'extract', '--no-wait'],
-            input='',
+            ["archivebox", "extract", "--no-wait"],
+            input="",
            capture_output=True,
            text=True,
        )

        # Should show warning about no snapshots or exit normally (empty input)
-        assert result.returncode == 0 or 'No' in result.stderr
+        assert result.returncode == 0 or "No" in result.stderr
--- a/archivebox/tests/test_cli_help.py
+++ b/archivebox/tests/test_cli_help.py
@@ -11,20 +11,20 @@ import subprocess
 def test_help_runs_successfully(tmp_path):
    """Test that help command runs and produces output."""
    os.chdir(tmp_path)
-    result = subprocess.run(['archivebox', 'help'], capture_output=True, text=True)
+    result = subprocess.run(["archivebox", "help"], capture_output=True, text=True)

    assert result.returncode == 0
    combined = result.stdout + result.stderr
    assert len(combined) > 100
-    assert 'archivebox' in combined.lower()
+    assert "archivebox" in combined.lower()


 def test_help_in_initialized_dir(tmp_path, process):
    """Test help command in initialized data directory."""
    os.chdir(tmp_path)
-    result = subprocess.run(['archivebox', 'help'], capture_output=True, text=True)
+    result = subprocess.run(["archivebox", "help"], capture_output=True, text=True)

    assert result.returncode == 0
    combined = result.stdout + result.stderr
-    assert 'init' in combined
-    assert 'add' in combined
+    assert "init" in combined
+    assert "add" in combined
--- a/archivebox/tests/test_cli_init.py
+++ b/archivebox/tests/test_cli_init.py
@@ -11,13 +11,13 @@ import subprocess
 from archivebox.config.common import STORAGE_CONFIG


-DIR_PERMISSIONS = STORAGE_CONFIG.OUTPUT_PERMISSIONS.replace('6', '7').replace('4', '5')
+DIR_PERMISSIONS = STORAGE_CONFIG.OUTPUT_PERMISSIONS.replace("6", "7").replace("4", "5")


 def test_init_creates_database_file(tmp_path):
    """Test that init creates index.sqlite3 database file."""
    os.chdir(tmp_path)
-    result = subprocess.run(['archivebox', 'init'], capture_output=True)
+    result = subprocess.run(["archivebox", "init"], capture_output=True)

    assert result.returncode == 0
    db_path = tmp_path / "index.sqlite3"
@@ -28,7 +28,7 @@ def test_init_creates_database_file(tmp_path):
 def test_init_creates_archive_directory(tmp_path):
    """Test that init creates archive directory."""
    os.chdir(tmp_path)
-    subprocess.run(['archivebox', 'init'], capture_output=True)
+    subprocess.run(["archivebox", "init"], capture_output=True)

    archive_dir = tmp_path / "archive"
    assert archive_dir.exists()
@@ -38,7 +38,7 @@ def test_init_creates_archive_directory(tmp_path):
 def test_init_creates_sources_directory(tmp_path):
    """Test that init creates sources directory."""
    os.chdir(tmp_path)
-    subprocess.run(['archivebox', 'init'], capture_output=True)
+    subprocess.run(["archivebox", "init"], capture_output=True)

    sources_dir = tmp_path / "sources"
    assert sources_dir.exists()
@@ -48,7 +48,7 @@ def test_init_creates_sources_directory(tmp_path):
 def test_init_creates_logs_directory(tmp_path):
    """Test that init creates logs directory."""
    os.chdir(tmp_path)
-    subprocess.run(['archivebox', 'init'], capture_output=True)
+    subprocess.run(["archivebox", "init"], capture_output=True)

    logs_dir = tmp_path / "logs"
    assert logs_dir.exists()
@@ -58,7 +58,7 @@ def test_init_creates_logs_directory(tmp_path):
 def test_init_creates_config_file(tmp_path):
    """Test that init creates ArchiveBox.conf config file."""
    os.chdir(tmp_path)
-    subprocess.run(['archivebox', 'init'], capture_output=True)
+    subprocess.run(["archivebox", "init"], capture_output=True)

    config_file = tmp_path / "ArchiveBox.conf"
    assert config_file.exists()
@@ -68,7 +68,7 @@ def test_init_creates_config_file(tmp_path):
 def test_init_runs_migrations(tmp_path):
    """Test that init runs Django migrations and creates core tables."""
    os.chdir(tmp_path)
-    subprocess.run(['archivebox', 'init'], capture_output=True)
+    subprocess.run(["archivebox", "init"], capture_output=True)

    # Check that migrations were applied
    conn = sqlite3.connect("index.sqlite3")
@@ -76,7 +76,7 @@ def test_init_runs_migrations(tmp_path):

    # Check django_migrations table exists
    migrations = c.execute(
-        "SELECT name FROM sqlite_master WHERE type='table' AND name='django_migrations'"
+        "SELECT name FROM sqlite_master WHERE type='table' AND name='django_migrations'",
    ).fetchall()
    assert len(migrations) == 1

@@ -90,14 +90,14 @@ def test_init_runs_migrations(tmp_path):
 def test_init_creates_core_snapshot_table(tmp_path):
    """Test that init creates core_snapshot table."""
    os.chdir(tmp_path)
-    subprocess.run(['archivebox', 'init'], capture_output=True)
+    subprocess.run(["archivebox", "init"], capture_output=True)

    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()

    # Check core_snapshot table exists
    tables = c.execute(
-        "SELECT name FROM sqlite_master WHERE type='table' AND name='core_snapshot'"
+        "SELECT name FROM sqlite_master WHERE type='table' AND name='core_snapshot'",
    ).fetchall()
    assert len(tables) == 1

@@ -107,14 +107,14 @@ def test_init_creates_core_snapshot_table(tmp_path):
 def test_init_creates_crawls_crawl_table(tmp_path):
    """Test that init creates crawls_crawl table."""
    os.chdir(tmp_path)
-    subprocess.run(['archivebox', 'init'], capture_output=True)
+    subprocess.run(["archivebox", "init"], capture_output=True)

    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()

    # Check crawls_crawl table exists
    tables = c.execute(
-        "SELECT name FROM sqlite_master WHERE type='table' AND name='crawls_crawl'"
+        "SELECT name FROM sqlite_master WHERE type='table' AND name='crawls_crawl'",
    ).fetchall()
    assert len(tables) == 1

@@ -124,14 +124,14 @@ def test_init_creates_crawls_crawl_table(tmp_path):
 def test_init_creates_core_archiveresult_table(tmp_path):
    """Test that init creates core_archiveresult table."""
    os.chdir(tmp_path)
-    subprocess.run(['archivebox', 'init'], capture_output=True)
+    subprocess.run(["archivebox", "init"], capture_output=True)

    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()

    # Check core_archiveresult table exists
    tables = c.execute(
-        "SELECT name FROM sqlite_master WHERE type='table' AND name='core_archiveresult'"
+        "SELECT name FROM sqlite_master WHERE type='table' AND name='core_archiveresult'",
    ).fetchall()
    assert len(tables) == 1

@@ -141,7 +141,7 @@ def test_init_creates_core_archiveresult_table(tmp_path):
 def test_init_sets_correct_file_permissions(tmp_path):
    """Test that init sets correct permissions on created files."""
    os.chdir(tmp_path)
-    subprocess.run(['archivebox', 'init'], capture_output=True)
+    subprocess.run(["archivebox", "init"], capture_output=True)

    # Check database permissions
    db_path = tmp_path / "index.sqlite3"
@@ -157,12 +157,12 @@ def test_init_is_idempotent(tmp_path):
    os.chdir(tmp_path)

    # First init
-    result1 = subprocess.run(['archivebox', 'init'], capture_output=True, text=True)
+    result1 = subprocess.run(["archivebox", "init"], capture_output=True, text=True)
    assert result1.returncode == 0
    assert "Initializing a new ArchiveBox" in result1.stdout

    # Second init should update, not fail
-    result2 = subprocess.run(['archivebox', 'init'], capture_output=True, text=True)
+    result2 = subprocess.run(["archivebox", "init"], capture_output=True, text=True)
    assert result2.returncode == 0
    assert "updating existing ArchiveBox" in result2.stdout or "up-to-date" in result2.stdout.lower()

@@ -180,7 +180,7 @@ def test_init_with_existing_data_preserves_snapshots(tmp_path, process, disable_

    # Add a snapshot
    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )
@@ -193,7 +193,7 @@ def test_init_with_existing_data_preserves_snapshots(tmp_path, process, disable_
    conn.close()

    # Run init again
-    result = subprocess.run(['archivebox', 'init'], capture_output=True)
+    result = subprocess.run(["archivebox", "init"], capture_output=True)
    assert result.returncode == 0

    # Snapshot should still exist
@@ -208,7 +208,7 @@ def test_init_quick_flag_skips_checks(tmp_path):
    """Test that init --quick runs faster by skipping some checks."""
    os.chdir(tmp_path)

-    result = subprocess.run(['archivebox', 'init', '--quick'], capture_output=True, text=True)
+    result = subprocess.run(["archivebox", "init", "--quick"], capture_output=True, text=True)

    assert result.returncode == 0
    # Database should still be created
@@ -219,14 +219,14 @@ def test_init_quick_flag_skips_checks(tmp_path):
 def test_init_creates_machine_table(tmp_path):
    """Test that init creates the machine_machine table."""
    os.chdir(tmp_path)
-    subprocess.run(['archivebox', 'init'], capture_output=True)
+    subprocess.run(["archivebox", "init"], capture_output=True)

    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()

    # Check machine_machine table exists
    tables = c.execute(
-        "SELECT name FROM sqlite_master WHERE type='table' AND name='machine_machine'"
+        "SELECT name FROM sqlite_master WHERE type='table' AND name='machine_machine'",
    ).fetchall()
    conn.close()

@@ -236,18 +236,18 @@ def test_init_creates_machine_table(tmp_path):
 def test_init_output_shows_collection_info(tmp_path):
    """Test that init output shows helpful collection information."""
    os.chdir(tmp_path)
-    result = subprocess.run(['archivebox', 'init'], capture_output=True, text=True)
+    result = subprocess.run(["archivebox", "init"], capture_output=True, text=True)

    output = result.stdout
    # Should show some helpful info about the collection
-    assert 'ArchiveBox' in output or 'collection' in output.lower() or 'Initializing' in output
+    assert "ArchiveBox" in output or "collection" in output.lower() or "Initializing" in output


 def test_init_ignores_unrecognized_archive_directories(tmp_path, process, disable_extractors_dict):
    """Test that init upgrades existing dirs without choking on extra folders."""
    os.chdir(tmp_path)
    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
        check=True,
@@ -255,7 +255,7 @@ def test_init_ignores_unrecognized_archive_directories(tmp_path, process, disabl
    (tmp_path / "archive" / "some_random_folder").mkdir(parents=True, exist_ok=True)

    result = subprocess.run(
-        ['archivebox', 'init'],
+        ["archivebox", "init"],
        capture_output=True,
        text=True,
        env=disable_extractors_dict,
--- a/archivebox/tests/test_cli_install.py
+++ b/archivebox/tests/test_cli_install.py
@@ -14,7 +14,7 @@ def test_install_runs_successfully(tmp_path, process):
    """Test that install command runs without error."""
    os.chdir(tmp_path)
    result = subprocess.run(
-        ['archivebox', 'install', '--dry-run'],
+        ["archivebox", "install", "--dry-run"],
        capture_output=True,
        text=True,
        timeout=60,
@@ -29,7 +29,7 @@ def test_install_creates_binary_records_in_db(tmp_path, process):
    os.chdir(tmp_path)

    subprocess.run(
-        ['archivebox', 'install', '--dry-run'],
+        ["archivebox", "install", "--dry-run"],
        capture_output=True,
        timeout=60,
    )
@@ -40,7 +40,7 @@ def test_install_creates_binary_records_in_db(tmp_path, process):

    # Check machine_binary table exists
    tables = c.execute(
-        "SELECT name FROM sqlite_master WHERE type='table' AND name='machine_binary'"
+        "SELECT name FROM sqlite_master WHERE type='table' AND name='machine_binary'",
    ).fetchall()
    conn.close()

@@ -52,14 +52,14 @@ def test_install_dry_run_does_not_install(tmp_path, process):
    os.chdir(tmp_path)

    result = subprocess.run(
-        ['archivebox', 'install', '--dry-run'],
+        ["archivebox", "install", "--dry-run"],
        capture_output=True,
        text=True,
        timeout=60,
    )

    # Should complete without actually installing
-    assert 'dry' in result.stdout.lower() or result.returncode in [0, 1]
+    assert "dry" in result.stdout.lower() or result.returncode in [0, 1]


 def test_install_detects_system_binaries(tmp_path, process):
@@ -67,7 +67,7 @@ def test_install_detects_system_binaries(tmp_path, process):
    os.chdir(tmp_path)

    result = subprocess.run(
-        ['archivebox', 'install', '--dry-run'],
+        ["archivebox", "install", "--dry-run"],
        capture_output=True,
        text=True,
        timeout=60,
@@ -82,7 +82,7 @@ def test_install_shows_binary_status(tmp_path, process):
    os.chdir(tmp_path)

    result = subprocess.run(
-        ['archivebox', 'install', '--dry-run'],
+        ["archivebox", "install", "--dry-run"],
        capture_output=True,
        text=True,
        timeout=60,
@@ -97,34 +97,34 @@ def test_install_dry_run_prints_dry_run_message(tmp_path, process):
    """Test that install --dry-run clearly reports that no changes will be made."""
    os.chdir(tmp_path)
    result = subprocess.run(
-        ['archivebox', 'install', '--dry-run'],
+        ["archivebox", "install", "--dry-run"],
        capture_output=True,
        text=True,
        timeout=60,
    )

    assert result.returncode == 0
-    assert 'dry run' in result.stdout.lower()
+    assert "dry run" in result.stdout.lower()


 def test_install_help_lists_dry_run_flag(tmp_path):
    """Test that install --help documents the dry-run option."""
    os.chdir(tmp_path)
    result = subprocess.run(
-        ['archivebox', 'install', '--help'],
+        ["archivebox", "install", "--help"],
        capture_output=True,
        text=True,
    )

    assert result.returncode == 0
-    assert '--dry-run' in result.stdout or '-d' in result.stdout
+    assert "--dry-run" in result.stdout or "-d" in result.stdout


 def test_install_invalid_option_fails(tmp_path):
    """Test that invalid install options fail cleanly."""
    os.chdir(tmp_path)
    result = subprocess.run(
-        ['archivebox', 'install', '--invalid-option'],
+        ["archivebox", "install", "--invalid-option"],
        capture_output=True,
        text=True,
    )
@@ -136,29 +136,31 @@ def test_install_from_empty_dir_initializes_collection(tmp_path):
    """Test that install bootstraps an empty dir before performing work."""
    os.chdir(tmp_path)
    result = subprocess.run(
-        ['archivebox', 'install', '--dry-run'],
+        ["archivebox", "install", "--dry-run"],
        capture_output=True,
        text=True,
    )

    output = result.stdout + result.stderr
    assert result.returncode == 0
-    assert 'Initializing' in output or 'Dry run' in output or 'init' in output.lower()
+    assert "Initializing" in output or "Dry run" in output or "init" in output.lower()


 def test_install_updates_binary_table(tmp_path, process):
    """Test that install completes and only mutates dependency state."""
    os.chdir(tmp_path)
    env = os.environ.copy()
-    tmp_short = Path('/tmp') / f'abx-install-{tmp_path.name}'
+    tmp_short = Path("/tmp") / f"abx-install-{tmp_path.name}"
    tmp_short.mkdir(parents=True, exist_ok=True)
-    env.update({
-        'TMP_DIR': str(tmp_short),
-        'ARCHIVEBOX_ALLOW_NO_UNIX_SOCKETS': 'true',
-    })
+    env.update(
+        {
+            "TMP_DIR": str(tmp_short),
+            "ARCHIVEBOX_ALLOW_NO_UNIX_SOCKETS": "true",
+        },
+    )

    result = subprocess.run(
-        ['archivebox', 'install'],
+        ["archivebox", "install"],
        capture_output=True,
        text=True,
        timeout=420,
@@ -171,16 +173,18 @@ def test_install_updates_binary_table(tmp_path, process):
    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()

-    binary_counts = dict(c.execute(
-        "SELECT status, COUNT(*) FROM machine_binary GROUP BY status"
-    ).fetchall())
+    binary_counts = dict(
+        c.execute(
+            "SELECT status, COUNT(*) FROM machine_binary GROUP BY status",
+        ).fetchall(),
+    )
    snapshot_count = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
    sealed_crawls = c.execute(
-        "SELECT COUNT(*) FROM crawls_crawl WHERE status='sealed'"
+        "SELECT COUNT(*) FROM crawls_crawl WHERE status='sealed'",
    ).fetchone()[0]
    conn.close()

    assert sealed_crawls >= 1
    assert snapshot_count == 0
-    assert binary_counts.get('queued', 0) == 0
-    assert binary_counts.get('installed', 0) > 0
+    assert binary_counts.get("queued", 0) == 0
+    assert binary_counts.get("installed", 0) > 0
--- a/archivebox/tests/test_cli_list.py
+++ b/archivebox/tests/test_cli_list.py
@@ -11,52 +11,48 @@ import subprocess


 def _parse_jsonl(stdout: str) -> list[dict]:
-    return [
-        json.loads(line)
-        for line in stdout.splitlines()
-        if line.strip().startswith('{')
-    ]
+    return [json.loads(line) for line in stdout.splitlines() if line.strip().startswith("{")]


 def test_list_outputs_existing_snapshots_as_jsonl(tmp_path, process, disable_extractors_dict):
    """Test that list prints one JSON object per stored snapshot."""
    os.chdir(tmp_path)
-    for url in ['https://example.com', 'https://iana.org']:
+    for url in ["https://example.com", "https://iana.org"]:
        subprocess.run(
-            ['archivebox', 'add', '--index-only', '--depth=0', url],
+            ["archivebox", "add", "--index-only", "--depth=0", url],
            capture_output=True,
            env=disable_extractors_dict,
            check=True,
        )

    result = subprocess.run(
-        ['archivebox', 'list'],
+        ["archivebox", "list"],
        capture_output=True,
        text=True,
        timeout=30,
    )

    rows = _parse_jsonl(result.stdout)
-    urls = {row['url'] for row in rows}
+    urls = {row["url"] for row in rows}

    assert result.returncode == 0, result.stderr
-    assert 'https://example.com' in urls
-    assert 'https://iana.org' in urls
+    assert "https://example.com" in urls
+    assert "https://iana.org" in urls


 def test_list_filters_by_url_icontains(tmp_path, process, disable_extractors_dict):
    """Test that list --url__icontains returns only matching snapshots."""
    os.chdir(tmp_path)
-    for url in ['https://example.com', 'https://iana.org']:
+    for url in ["https://example.com", "https://iana.org"]:
        subprocess.run(
-            ['archivebox', 'add', '--index-only', '--depth=0', url],
+            ["archivebox", "add", "--index-only", "--depth=0", url],
            capture_output=True,
            env=disable_extractors_dict,
            check=True,
        )

    result = subprocess.run(
-        ['archivebox', 'list', '--url__icontains', 'example.com'],
+        ["archivebox", "list", "--url__icontains", "example.com"],
        capture_output=True,
        text=True,
        timeout=30,
@@ -65,15 +61,15 @@ def test_list_filters_by_url_icontains(tmp_path, process, disable_extractors_dic
    rows = _parse_jsonl(result.stdout)
    assert result.returncode == 0, result.stderr
    assert len(rows) == 1
-    assert rows[0]['url'] == 'https://example.com'
+    assert rows[0]["url"] == "https://example.com"


 def test_list_filters_by_crawl_id_and_limit(tmp_path, process, disable_extractors_dict):
    """Test that crawl-id and limit filters constrain the result set."""
    os.chdir(tmp_path)
-    for url in ['https://example.com', 'https://iana.org']:
+    for url in ["https://example.com", "https://iana.org"]:
        subprocess.run(
-            ['archivebox', 'add', '--index-only', '--depth=0', url],
+            ["archivebox", "add", "--index-only", "--depth=0", url],
            capture_output=True,
            env=disable_extractors_dict,
            check=True,
@@ -81,14 +77,16 @@ def test_list_filters_by_crawl_id_and_limit(tmp_path, process, disable_extractor

    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()
-    crawl_id = str(c.execute(
-        "SELECT crawl_id FROM core_snapshot WHERE url = ?",
-        ('https://example.com',),
-    ).fetchone()[0])
+    crawl_id = str(
+        c.execute(
+            "SELECT crawl_id FROM core_snapshot WHERE url = ?",
+            ("https://example.com",),
+        ).fetchone()[0],
+    )
    conn.close()

    result = subprocess.run(
-        ['archivebox', 'list', '--crawl-id', crawl_id, '--limit', '1'],
+        ["archivebox", "list", "--crawl-id", crawl_id, "--limit", "1"],
        capture_output=True,
        text=True,
        timeout=30,
@@ -97,15 +95,15 @@ def test_list_filters_by_crawl_id_and_limit(tmp_path, process, disable_extractor
    rows = _parse_jsonl(result.stdout)
    assert result.returncode == 0, result.stderr
    assert len(rows) == 1
-    assert rows[0]['crawl_id'].replace('-', '') == crawl_id.replace('-', '')
-    assert rows[0]['url'] == 'https://example.com'
+    assert rows[0]["crawl_id"].replace("-", "") == crawl_id.replace("-", "")
+    assert rows[0]["url"] == "https://example.com"


 def test_list_filters_by_status(tmp_path, process, disable_extractors_dict):
    """Test that list can filter using the current snapshot status."""
    os.chdir(tmp_path)
    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
        check=True,
@@ -117,7 +115,7 @@ def test_list_filters_by_status(tmp_path, process, disable_extractors_dict):
    conn.close()

    result = subprocess.run(
-        ['archivebox', 'list', '--status', status],
+        ["archivebox", "list", "--status", status],
        capture_output=True,
        text=True,
        timeout=30,
@@ -126,7 +124,7 @@ def test_list_filters_by_status(tmp_path, process, disable_extractors_dict):
    rows = _parse_jsonl(result.stdout)
    assert result.returncode == 0, result.stderr
    assert len(rows) == 1
-    assert rows[0]['status'] == status
+    assert rows[0]["status"] == status


 def test_list_help_lists_filter_options(tmp_path, process):
@@ -134,13 +132,60 @@ def test_list_help_lists_filter_options(tmp_path, process):
    os.chdir(tmp_path)

    result = subprocess.run(
-        ['archivebox', 'list', '--help'],
+        ["archivebox", "list", "--help"],
        capture_output=True,
        text=True,
        timeout=30,
    )

    assert result.returncode == 0
-    assert '--url__icontains' in result.stdout
-    assert '--crawl-id' in result.stdout
-    assert '--limit' in result.stdout
+    assert "--url__icontains" in result.stdout
+    assert "--crawl-id" in result.stdout
+    assert "--limit" in result.stdout
+    assert "--search" in result.stdout
+
+
+def test_list_allows_sort_with_limit(tmp_path, process, disable_extractors_dict):
+    """Test that list can sort and then apply limit without queryset slicing errors."""
+    os.chdir(tmp_path)
+    for url in ["https://example.com", "https://iana.org", "https://example.net"]:
+        subprocess.run(
+            ["archivebox", "add", "--index-only", "--depth=0", url],
+            capture_output=True,
+            env=disable_extractors_dict,
+            check=True,
+        )
+
+    result = subprocess.run(
+        ["archivebox", "list", "--limit", "2", "--sort", "-created_at"],
+        capture_output=True,
+        text=True,
+        timeout=30,
+    )
+
+    rows = _parse_jsonl(result.stdout)
+    assert result.returncode == 0, result.stderr
+    assert len(rows) == 2
+
+
+def test_list_search_meta_matches_metadata(tmp_path, process, disable_extractors_dict):
+    """Test that list --search=meta applies metadata search to the queryset."""
+    os.chdir(tmp_path)
+    subprocess.run(
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
+        capture_output=True,
+        env=disable_extractors_dict,
+        check=True,
+    )
+
+    result = subprocess.run(
+        ["archivebox", "list", "--search=meta", "example.com"],
+        capture_output=True,
+        text=True,
+        timeout=30,
+    )
+
+    rows = _parse_jsonl(result.stdout)
+    assert result.returncode == 0, result.stderr
+    assert len(rows) == 1
+    assert rows[0]["url"] == "https://example.com"
--- a/archivebox/tests/test_cli_manage.py
+++ b/archivebox/tests/test_cli_manage.py
@@ -13,7 +13,7 @@ def test_manage_help_works(tmp_path, process):
    os.chdir(tmp_path)

    result = subprocess.run(
-        ['archivebox', 'manage', 'help'],
+        ["archivebox", "manage", "help"],
        capture_output=True,
        text=True,
        timeout=30,
@@ -28,7 +28,7 @@ def test_manage_showmigrations_works(tmp_path, process):
    os.chdir(tmp_path)

    result = subprocess.run(
-        ['archivebox', 'manage', 'showmigrations'],
+        ["archivebox", "manage", "showmigrations"],
        capture_output=True,
        text=True,
        timeout=30,
@@ -36,7 +36,7 @@ def test_manage_showmigrations_works(tmp_path, process):

    assert result.returncode == 0
    # Should show migration status
-    assert 'core' in result.stdout or '[' in result.stdout
+    assert "core" in result.stdout or "[" in result.stdout


 def test_manage_dbshell_command_exists(tmp_path, process):
@@ -44,7 +44,7 @@ def test_manage_dbshell_command_exists(tmp_path, process):
    os.chdir(tmp_path)

    result = subprocess.run(
-        ['archivebox', 'manage', 'help', 'dbshell'],
+        ["archivebox", "manage", "help", "dbshell"],
        capture_output=True,
        text=True,
        timeout=30,
@@ -52,7 +52,7 @@ def test_manage_dbshell_command_exists(tmp_path, process):

    # Should show help for dbshell
    assert result.returncode == 0
-    assert 'dbshell' in result.stdout or 'database' in result.stdout.lower()
+    assert "dbshell" in result.stdout or "database" in result.stdout.lower()


 def test_manage_check_works(tmp_path, process):
@@ -60,7 +60,7 @@ def test_manage_check_works(tmp_path, process):
    os.chdir(tmp_path)

    result = subprocess.run(
-        ['archivebox', 'manage', 'check'],
+        ["archivebox", "manage", "check"],
        capture_output=True,
        text=True,
        timeout=30,
--- a/archivebox/tests/test_cli_piping.py
+++ b/archivebox/tests/test_cli_piping.py
@@ -111,14 +111,14 @@ def test_read_args_or_stdin_handles_args_stdin_and_mixed_jsonl():
        read_args_or_stdin(
            (),
            stream=MockTTYStringIO(
-                'https://plain-url.com\n'
+                "https://plain-url.com\n"
                '{"type":"Snapshot","url":"https://jsonl-url.com","tags":"test"}\n'
                '{"type":"Tag","id":"tag-1","name":"example"}\n'
-                '01234567-89ab-cdef-0123-456789abcdef\n'
-                'not valid json\n',
+                "01234567-89ab-cdef-0123-456789abcdef\n"
+                "not valid json\n",
                is_tty=False,
            ),
-        )
+        ),
    )
    assert len(stdin_records) == 4
    assert stdin_records[0]["url"] == "https://plain-url.com"
@@ -135,7 +135,7 @@ def test_read_args_or_stdin_handles_args_stdin_and_mixed_jsonl():
                '{"type":"Crawl","id":"crawl-1","urls":"https://example.com\\nhttps://foo.com"}\n',
                is_tty=False,
            ),
-        )
+        ),
    )
    assert len(crawl_records) == 1
    assert crawl_records[0]["type"] == TYPE_CRAWL
@@ -151,14 +151,12 @@ def test_collect_urls_from_plugins_reads_only_parser_outputs(tmp_path):

    (tmp_path / "wget").mkdir()
    (tmp_path / "wget" / "urls.jsonl").write_text(
-        '{"url":"https://wget-link-1.com"}\n'
-        '{"url":"https://wget-link-2.com"}\n',
+        '{"url":"https://wget-link-1.com"}\n{"url":"https://wget-link-2.com"}\n',
        encoding="utf-8",
    )
    (tmp_path / "parse_html_urls").mkdir()
    (tmp_path / "parse_html_urls" / "urls.jsonl").write_text(
-        '{"url":"https://html-link-1.com"}\n'
-        '{"url":"https://html-link-2.com","title":"HTML Link 2"}\n',
+        '{"url":"https://html-link-1.com"}\n{"url":"https://html-link-2.com","title":"HTML Link 2"}\n',
        encoding="utf-8",
    )
    (tmp_path / "screenshot").mkdir()
@@ -187,6 +185,22 @@ def test_collect_urls_from_plugins_trims_markdown_suffixes(tmp_path):
    assert urls[0]["url"] == "https://docs.sweeting.me/s/youtube-favorites"


+def test_collect_urls_from_plugins_trims_trailing_punctuation(tmp_path):
+    from archivebox.hooks import collect_urls_from_plugins
+
+    (tmp_path / "parse_html_urls").mkdir()
+    (tmp_path / "parse_html_urls" / "urls.jsonl").write_text(
+        ('{"url":"https://github.com/ArchiveBox/ArchiveBox."}\n{"url":"https://github.com/abc?abc#234234?."}\n'),
+        encoding="utf-8",
+    )
+
+    urls = collect_urls_from_plugins(tmp_path)
+    assert [url["url"] for url in urls] == [
+        "https://github.com/ArchiveBox/ArchiveBox",
+        "https://github.com/abc?abc#234234",
+    ]
+
+
 def test_crawl_create_stdout_pipes_into_run(initialized_archive):
    """`archivebox crawl create | archivebox run` should queue and materialize snapshots."""
    url = create_test_url()
@@ -311,10 +325,7 @@ def test_archiveresult_list_stdout_pipes_into_run(initialized_archive):
    _assert_stdout_is_jsonl_only(run_stdout)

    run_records = parse_jsonl_output(run_stdout)
-    assert any(
-        record.get("type") == "ArchiveResult" and record.get("id") == archiveresult["id"]
-        for record in run_records
-    )
+    assert any(record.get("type") == "ArchiveResult" and record.get("id") == archiveresult["id"] for record in run_records)


 def test_binary_create_stdout_pipes_into_run(initialized_archive):
--- a/archivebox/tests/test_cli_remove.py
+++ b/archivebox/tests/test_cli_remove.py
@@ -14,8 +14,8 @@ def _find_snapshot_dir(data_dir: Path, snapshot_id: str) -> Path | None:
    candidates = {snapshot_id}
    if len(snapshot_id) == 32:
        candidates.add(f"{snapshot_id[:8]}-{snapshot_id[8:12]}-{snapshot_id[12:16]}-{snapshot_id[16:20]}-{snapshot_id[20:]}")
-    elif len(snapshot_id) == 36 and '-' in snapshot_id:
-        candidates.add(snapshot_id.replace('-', ''))
+    elif len(snapshot_id) == 36 and "-" in snapshot_id:
+        candidates.add(snapshot_id.replace("-", ""))

    for needle in candidates:
        for path in data_dir.rglob(needle):
@@ -30,7 +30,7 @@ def test_remove_deletes_snapshot_from_db(tmp_path, process, disable_extractors_d

    # Add a snapshot
    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )
@@ -44,7 +44,7 @@ def test_remove_deletes_snapshot_from_db(tmp_path, process, disable_extractors_d

    # Remove it
    subprocess.run(
-        ['archivebox', 'remove', 'https://example.com', '--yes'],
+        ["archivebox", "remove", "https://example.com", "--yes"],
        capture_output=True,
        env=disable_extractors_dict,
    )
@@ -64,7 +64,7 @@ def test_remove_deletes_archive_directory(tmp_path, process, disable_extractors_

    # Add a snapshot
    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )
@@ -78,7 +78,7 @@ def test_remove_deletes_archive_directory(tmp_path, process, disable_extractors_
    assert snapshot_dir is not None, f"Snapshot output directory not found for {snapshot_id}"

    subprocess.run(
-        ['archivebox', 'remove', 'https://example.com', '--yes', '--delete'],
+        ["archivebox", "remove", "https://example.com", "--yes", "--delete"],
        capture_output=True,
        env=disable_extractors_dict,
    )
@@ -91,14 +91,14 @@ def test_remove_yes_flag_skips_confirmation(tmp_path, process, disable_extractor
    os.chdir(tmp_path)

    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )

    # Remove with --yes should complete without interaction
    result = subprocess.run(
-        ['archivebox', 'remove', 'https://example.com', '--yes'],
+        ["archivebox", "remove", "https://example.com", "--yes"],
        capture_output=True,
        env=disable_extractors_dict,
        timeout=30,
@@ -114,9 +114,9 @@ def test_remove_multiple_snapshots(tmp_path, process, disable_extractors_dict):
    os.chdir(tmp_path)

    # Add multiple snapshots
-    for url in ['https://example.com', 'https://example.org']:
+    for url in ["https://example.com", "https://example.org"]:
        subprocess.run(
-            ['archivebox', 'add', '--index-only', '--depth=0', url],
+            ["archivebox", "add", "--index-only", "--depth=0", url],
            capture_output=True,
            env=disable_extractors_dict,
        )
@@ -130,7 +130,7 @@ def test_remove_multiple_snapshots(tmp_path, process, disable_extractors_dict):

    # Remove both
    subprocess.run(
-        ['archivebox', 'remove', 'https://example.com', 'https://example.org', '--yes'],
+        ["archivebox", "remove", "https://example.com", "https://example.org", "--yes"],
        capture_output=True,
        env=disable_extractors_dict,
    )
@@ -150,14 +150,14 @@ def test_remove_with_filter(tmp_path, process, disable_extractors_dict):

    # Add snapshots
    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )

    # Remove using filter
    result = subprocess.run(
-        ['archivebox', 'remove', '--filter-type=search', '--filter=example.com', '--yes'],
+        ["archivebox", "remove", "--filter-type=search", "--filter=example.com", "--yes"],
        capture_output=True,
        env=disable_extractors_dict,
        timeout=30,
@@ -171,16 +171,16 @@ def test_remove_with_regex_filter_deletes_all_matches(tmp_path, process, disable
    """Test regex filters remove every matching snapshot."""
    os.chdir(tmp_path)

-    for url in ['https://example.com', 'https://iana.org']:
+    for url in ["https://example.com", "https://iana.org"]:
        subprocess.run(
-            ['archivebox', 'add', '--index-only', '--depth=0', url],
+            ["archivebox", "add", "--index-only", "--depth=0", url],
            capture_output=True,
            env=disable_extractors_dict,
            check=True,
        )

    result = subprocess.run(
-        ['archivebox', 'remove', '--filter-type=regex', '.*', '--yes'],
+        ["archivebox", "remove", "--filter-type=regex", ".*", "--yes"],
        capture_output=True,
        env=disable_extractors_dict,
        check=True,
@@ -193,7 +193,7 @@ def test_remove_with_regex_filter_deletes_all_matches(tmp_path, process, disable

    output = result.stdout.decode("utf-8") + result.stderr.decode("utf-8")
    assert count_after == 0
-    assert 'Removed' in output or 'Found' in output
+    assert "Removed" in output or "Found" in output


 def test_remove_nonexistent_url_fails_gracefully(tmp_path, process, disable_extractors_dict):
@@ -201,30 +201,30 @@ def test_remove_nonexistent_url_fails_gracefully(tmp_path, process, disable_extr
    os.chdir(tmp_path)

    result = subprocess.run(
-        ['archivebox', 'remove', 'https://nonexistent-url-12345.com', '--yes'],
+        ["archivebox", "remove", "https://nonexistent-url-12345.com", "--yes"],
        capture_output=True,
        env=disable_extractors_dict,
    )

    # Should fail or show error
-    stdout_text = result.stdout.decode('utf-8', errors='replace').lower()
-    assert result.returncode != 0 or 'not found' in stdout_text or 'no matches' in stdout_text
+    stdout_text = result.stdout.decode("utf-8", errors="replace").lower()
+    assert result.returncode != 0 or "not found" in stdout_text or "no matches" in stdout_text


 def test_remove_reports_remaining_link_count_correctly(tmp_path, process, disable_extractors_dict):
    """Test remove reports the remaining snapshot count after deletion."""
    os.chdir(tmp_path)

-    for url in ['https://example.com', 'https://example.org']:
+    for url in ["https://example.com", "https://example.org"]:
        subprocess.run(
-            ['archivebox', 'add', '--index-only', '--depth=0', url],
+            ["archivebox", "add", "--index-only", "--depth=0", url],
            capture_output=True,
            env=disable_extractors_dict,
            check=True,
        )

    result = subprocess.run(
-        ['archivebox', 'remove', 'https://example.org', '--yes'],
+        ["archivebox", "remove", "https://example.org", "--yes"],
        capture_output=True,
        env=disable_extractors_dict,
        check=True,
@@ -240,14 +240,14 @@ def test_remove_after_flag(tmp_path, process, disable_extractors_dict):
    os.chdir(tmp_path)

    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )

    # Try remove with --after flag (should work or show usage)
    result = subprocess.run(
-        ['archivebox', 'remove', '--after=2020-01-01', '--yes'],
+        ["archivebox", "remove", "--after=2020-01-01", "--yes"],
        capture_output=True,
        env=disable_extractors_dict,
        timeout=30,
--- a/archivebox/tests/test_cli_run.py
+++ b/archivebox/tests/test_cli_run.py
@@ -21,8 +21,8 @@ from archivebox.tests.conftest import (
 )

 RUN_TEST_ENV = {
-    'PLUGINS': 'favicon',
-    'SAVE_FAVICON': 'True',
+    "PLUGINS": "favicon",
+    "SAVE_FAVICON": "True",
 }


@@ -34,7 +34,7 @@ class TestRunWithCrawl:
        crawl_record = create_test_crawl_json()

        stdout, stderr, code = run_archivebox_cmd(
-            ['run'],
+            ["run"],
            stdin=json.dumps(crawl_record),
            data_dir=initialized_archive,
            timeout=120,
@@ -45,21 +45,21 @@ class TestRunWithCrawl:

        # Should output the created Crawl
        records = parse_jsonl_output(stdout)
-        crawl_records = [r for r in records if r.get('type') == 'Crawl']
+        crawl_records = [r for r in records if r.get("type") == "Crawl"]
        assert len(crawl_records) >= 1
-        assert crawl_records[0].get('id')  # Should have an id now
+        assert crawl_records[0].get("id")  # Should have an id now

    def test_run_with_existing_crawl(self, initialized_archive):
        """Run re-queues an existing Crawl (with id)."""
        url = create_test_url()

        # First create a crawl
-        stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], data_dir=initialized_archive, env=RUN_TEST_ENV)
+        stdout1, _, _ = run_archivebox_cmd(["crawl", "create", url], data_dir=initialized_archive, env=RUN_TEST_ENV)
        crawl = parse_jsonl_output(stdout1)[0]

        # Run with the existing crawl
        stdout2, stderr, code = run_archivebox_cmd(
-            ['run'],
+            ["run"],
            stdin=json.dumps(crawl),
            data_dir=initialized_archive,
            timeout=120,
@@ -79,7 +79,7 @@ class TestRunWithSnapshot:
        snapshot_record = create_test_snapshot_json()

        stdout, stderr, code = run_archivebox_cmd(
-            ['run'],
+            ["run"],
            stdin=json.dumps(snapshot_record),
            data_dir=initialized_archive,
            timeout=120,
@@ -89,21 +89,21 @@ class TestRunWithSnapshot:
        assert code == 0, f"Command failed: {stderr}"

        records = parse_jsonl_output(stdout)
-        snapshot_records = [r for r in records if r.get('type') == 'Snapshot']
+        snapshot_records = [r for r in records if r.get("type") == "Snapshot"]
        assert len(snapshot_records) >= 1
-        assert snapshot_records[0].get('id')
+        assert snapshot_records[0].get("id")

    def test_run_with_existing_snapshot(self, initialized_archive):
        """Run re-queues an existing Snapshot (with id)."""
        url = create_test_url()

        # First create a snapshot
-        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive, env=RUN_TEST_ENV)
+        stdout1, _, _ = run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive, env=RUN_TEST_ENV)
        snapshot = parse_jsonl_output(stdout1)[0]

        # Run with the existing snapshot
        stdout2, stderr, code = run_archivebox_cmd(
-            ['run'],
+            ["run"],
            stdin=json.dumps(snapshot),
            data_dir=initialized_archive,
            timeout=120,
@@ -117,10 +117,10 @@ class TestRunWithSnapshot:
    def test_run_with_plain_url(self, initialized_archive):
        """Run accepts plain URL records (no type field)."""
        url = create_test_url()
-        url_record = {'url': url}
+        url_record = {"url": url}

        stdout, stderr, code = run_archivebox_cmd(
-            ['run'],
+            ["run"],
            stdin=json.dumps(url_record),
            data_dir=initialized_archive,
            timeout=120,
@@ -140,21 +140,21 @@ class TestRunWithArchiveResult:
        url = create_test_url()

        # Create snapshot and archive result
-        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive, env=RUN_TEST_ENV)
+        stdout1, _, _ = run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive, env=RUN_TEST_ENV)
        snapshot = parse_jsonl_output(stdout1)[0]

        stdout2, _, _ = run_archivebox_cmd(
-            ['archiveresult', 'create', '--plugin=favicon'],
+            ["archiveresult", "create", "--plugin=favicon"],
            stdin=json.dumps(snapshot),
            data_dir=initialized_archive,
            env=RUN_TEST_ENV,
        )
-        ar = next(r for r in parse_jsonl_output(stdout2) if r.get('type') == 'ArchiveResult')
+        ar = next(r for r in parse_jsonl_output(stdout2) if r.get("type") == "ArchiveResult")

        # Update to failed
-        ar['status'] = 'failed'
+        ar["status"] = "failed"
        run_archivebox_cmd(
-            ['archiveresult', 'update', '--status=failed'],
+            ["archiveresult", "update", "--status=failed"],
            stdin=json.dumps(ar),
            data_dir=initialized_archive,
            env=RUN_TEST_ENV,
@@ -162,7 +162,7 @@ class TestRunWithArchiveResult:

        # Now run should re-queue it
        stdout3, stderr, code = run_archivebox_cmd(
-            ['run'],
+            ["run"],
            stdin=json.dumps(ar),
            data_dir=initialized_archive,
            timeout=120,
@@ -171,7 +171,7 @@ class TestRunWithArchiveResult:

        assert code == 0
        records = parse_jsonl_output(stdout3)
-        ar_records = [r for r in records if r.get('type') == 'ArchiveResult']
+        ar_records = [r for r in records if r.get("type") == "ArchiveResult"]
        assert len(ar_records) >= 1


@@ -180,19 +180,19 @@ class TestRunPassThrough:

    def test_run_passes_through_unknown_types(self, initialized_archive):
        """Run passes through records with unknown types."""
-        unknown_record = {'type': 'Unknown', 'id': 'fake-id', 'data': 'test'}
+        unknown_record = {"type": "Unknown", "id": "fake-id", "data": "test"}

        stdout, stderr, code = run_archivebox_cmd(
-            ['run'],
+            ["run"],
            stdin=json.dumps(unknown_record),
            data_dir=initialized_archive,
        )

        assert code == 0
        records = parse_jsonl_output(stdout)
-        unknown_records = [r for r in records if r.get('type') == 'Unknown']
+        unknown_records = [r for r in records if r.get("type") == "Unknown"]
        assert len(unknown_records) == 1
-        assert unknown_records[0]['data'] == 'test'
+        assert unknown_records[0]["data"] == "test"

    def test_run_outputs_all_processed_records(self, initialized_archive):
        """Run outputs all processed records for chaining."""
@@ -200,7 +200,7 @@ class TestRunPassThrough:
        crawl_record = create_test_crawl_json(urls=[url])

        stdout, stderr, code = run_archivebox_cmd(
-            ['run'],
+            ["run"],
            stdin=json.dumps(crawl_record),
            data_dir=initialized_archive,
            timeout=120,
@@ -220,16 +220,18 @@ class TestRunMixedInput:
        """Run handles mixed Crawl/Snapshot/ArchiveResult input."""
        crawl = create_test_crawl_json()
        snapshot = create_test_snapshot_json()
-        unknown = {'type': 'Tag', 'id': 'fake', 'name': 'test'}
+        unknown = {"type": "Tag", "id": "fake", "name": "test"}

-        stdin = '\n'.join([
-            json.dumps(crawl),
-            json.dumps(snapshot),
-            json.dumps(unknown),
-        ])
+        stdin = "\n".join(
+            [
+                json.dumps(crawl),
+                json.dumps(snapshot),
+                json.dumps(unknown),
+            ],
+        )

        stdout, stderr, code = run_archivebox_cmd(
-            ['run'],
+            ["run"],
            stdin=stdin,
            data_dir=initialized_archive,
            timeout=120,
@@ -239,9 +241,9 @@ class TestRunMixedInput:
        assert code == 0
        records = parse_jsonl_output(stdout)

-        types = set(r.get('type') for r in records)
+        types = {r.get("type") for r in records}
        # Should have processed Crawl and Snapshot, passed through Tag
-        assert 'Crawl' in types or 'Snapshot' in types or 'Tag' in types
+        assert "Crawl" in types or "Snapshot" in types or "Tag" in types


 class TestRunEmpty:
@@ -250,8 +252,8 @@ class TestRunEmpty:
    def test_run_empty_stdin(self, initialized_archive):
        """Run with empty stdin returns success."""
        stdout, stderr, code = run_archivebox_cmd(
-            ['run'],
-            stdin='',
+            ["run"],
+            stdin="",
            data_dir=initialized_archive,
        )

@@ -259,16 +261,16 @@ class TestRunEmpty:

    def test_run_no_records_to_process(self, initialized_archive):
        """Run with only pass-through records shows message."""
-        unknown = {'type': 'Unknown', 'id': 'fake'}
+        unknown = {"type": "Unknown", "id": "fake"}

        stdout, stderr, code = run_archivebox_cmd(
-            ['run'],
+            ["run"],
            stdin=json.dumps(unknown),
            data_dir=initialized_archive,
        )

        assert code == 0
-        assert 'No records to process' in stderr
+        assert "No records to process" in stderr


 class TestRunDaemonMode:
@@ -328,13 +330,13 @@ class TestRecoverOrphanedCrawls:
        from archivebox.services.runner import recover_orphaned_crawls

        crawl = Crawl.objects.create(
-            urls='https://example.com',
+            urls="https://example.com",
            created_by_id=get_or_create_system_user_pk(),
            status=Crawl.StatusChoices.STARTED,
            retry_at=None,
        )
        Snapshot.objects.create(
-            url='https://example.com',
+            url="https://example.com",
            crawl=crawl,
            status=Snapshot.StatusChoices.QUEUED,
            retry_at=None,
@@ -358,13 +360,13 @@ class TestRecoverOrphanedCrawls:
        from archivebox.services.runner import recover_orphaned_crawls

        crawl = Crawl.objects.create(
-            urls='https://example.com',
+            urls="https://example.com",
            created_by_id=get_or_create_system_user_pk(),
            status=Crawl.StatusChoices.STARTED,
            retry_at=None,
        )
        snapshot = Snapshot.objects.create(
-            url='https://example.com',
+            url="https://example.com",
            crawl=crawl,
            status=Snapshot.StatusChoices.QUEUED,
            retry_at=None,
@@ -376,10 +378,10 @@ class TestRecoverOrphanedCrawls:
            machine=machine,
            process_type=Process.TypeChoices.HOOK,
            status=Process.StatusChoices.RUNNING,
-            cmd=['/plugins/chrome/on_Crawl__91_chrome_wait.js'],
+            cmd=["/plugins/chrome/on_Crawl__91_chrome_wait.js"],
            env={
-                'CRAWL_ID': str(crawl.id),
-                'SNAPSHOT_ID': str(snapshot.id),
+                "CRAWL_ID": str(crawl.id),
+                "SNAPSHOT_ID": str(snapshot.id),
            },
            started_at=timezone.now(),
        )
@@ -397,13 +399,13 @@ class TestRecoverOrphanedCrawls:
        from archivebox.services.runner import recover_orphaned_crawls

        crawl = Crawl.objects.create(
-            urls='https://example.com',
+            urls="https://example.com",
            created_by_id=get_or_create_system_user_pk(),
            status=Crawl.StatusChoices.STARTED,
            retry_at=None,
        )
        Snapshot.objects.create(
-            url='https://example.com',
+            url="https://example.com",
            crawl=crawl,
            status=Snapshot.StatusChoices.SEALED,
            retry_at=None,
@@ -426,13 +428,13 @@ class TestRecoverOrphanedSnapshots:
        from archivebox.services.runner import recover_orphaned_snapshots

        crawl = Crawl.objects.create(
-            urls='https://example.com',
+            urls="https://example.com",
            created_by_id=get_or_create_system_user_pk(),
            status=Crawl.StatusChoices.SEALED,
            retry_at=None,
        )
        snapshot = Snapshot.objects.create(
-            url='https://example.com',
+            url="https://example.com",
            crawl=crawl,
            status=Snapshot.StatusChoices.STARTED,
            retry_at=None,
--- a/archivebox/tests/test_cli_schedule.py
+++ b/archivebox/tests/test_cli_schedule.py
@@ -6,26 +6,25 @@ import sqlite3
 import subprocess


-
 def test_schedule_run_all_enqueues_scheduled_crawl(tmp_path, process, disable_extractors_dict):
    os.chdir(tmp_path)

    subprocess.run(
-        ['archivebox', 'schedule', '--every=daily', '--depth=0', 'https://example.com'],
+        ["archivebox", "schedule", "--every=daily", "--depth=0", "https://example.com"],
        capture_output=True,
        text=True,
        check=True,
    )

    result = subprocess.run(
-        ['archivebox', 'schedule', '--run-all'],
+        ["archivebox", "schedule", "--run-all"],
        capture_output=True,
        text=True,
        env=disable_extractors_dict,
    )

    assert result.returncode == 0
-    assert 'Enqueued 1 scheduled crawl' in result.stdout
+    assert "Enqueued 1 scheduled crawl" in result.stdout

    conn = sqlite3.connect(tmp_path / "index.sqlite3")
    try:
@@ -42,20 +41,20 @@ def test_schedule_without_import_path_creates_maintenance_schedule(tmp_path, pro
    os.chdir(tmp_path)

    result = subprocess.run(
-        ['archivebox', 'schedule', '--every=day'],
+        ["archivebox", "schedule", "--every=day"],
        capture_output=True,
        text=True,
    )

    assert result.returncode == 0
-    assert 'Created scheduled maintenance update' in result.stdout
+    assert "Created scheduled maintenance update" in result.stdout

    conn = sqlite3.connect(tmp_path / "index.sqlite3")
    try:
        row = conn.execute(
-            "SELECT urls, status FROM crawls_crawl ORDER BY created_at DESC LIMIT 1"
+            "SELECT urls, status FROM crawls_crawl ORDER BY created_at DESC LIMIT 1",
        ).fetchone()
    finally:
        conn.close()

-    assert row == ('archivebox://update', 'sealed')
+    assert row == ("archivebox://update", "sealed")
--- a/archivebox/tests/test_cli_search.py
+++ b/archivebox/tests/test_cli_search.py
@@ -15,21 +15,21 @@ def test_search_finds_snapshots(tmp_path, process, disable_extractors_dict):

    # Add snapshots
    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )

    # Search for it
    result = subprocess.run(
-        ['archivebox', 'search', 'example'],
+        ["archivebox", "search", "example"],
        capture_output=True,
        text=True,
        timeout=30,
    )

    assert result.returncode == 0
-    assert 'example' in result.stdout
+    assert "example" in result.stdout


 def test_search_returns_no_results_for_missing_term(tmp_path, process, disable_extractors_dict):
@@ -37,13 +37,13 @@ def test_search_returns_no_results_for_missing_term(tmp_path, process, disable_e
    os.chdir(tmp_path)

    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )

    result = subprocess.run(
-        ['archivebox', 'search', 'nonexistentterm12345'],
+        ["archivebox", "search", "nonexistentterm12345"],
        capture_output=True,
        text=True,
        timeout=30,
@@ -58,7 +58,7 @@ def test_search_on_empty_archive(tmp_path, process):
    os.chdir(tmp_path)

    result = subprocess.run(
-        ['archivebox', 'search', 'anything'],
+        ["archivebox", "search", "anything"],
        capture_output=True,
        text=True,
        timeout=30,
@@ -72,14 +72,14 @@ def test_search_json_outputs_matching_snapshots(tmp_path, process, disable_extra
    """Test that search --json returns parseable matching snapshot rows."""
    os.chdir(tmp_path)
    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
        check=True,
    )

    result = subprocess.run(
-        ['archivebox', 'search', '--json'],
+        ["archivebox", "search", "--json"],
        capture_output=True,
        text=True,
        timeout=30,
@@ -87,21 +87,21 @@ def test_search_json_outputs_matching_snapshots(tmp_path, process, disable_extra

    assert result.returncode == 0, result.stderr
    payload = json.loads(result.stdout)
-    assert any('example.com' in row.get('url', '') for row in payload)
+    assert any("example.com" in row.get("url", "") for row in payload)


 def test_search_json_with_headers_wraps_links_payload(tmp_path, process, disable_extractors_dict):
    """Test that search --json --with-headers returns a headers envelope."""
    os.chdir(tmp_path)
    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
        check=True,
    )

    result = subprocess.run(
-        ['archivebox', 'search', '--json', '--with-headers'],
+        ["archivebox", "search", "--json", "--with-headers"],
        capture_output=True,
        text=True,
        timeout=30,
@@ -109,51 +109,51 @@ def test_search_json_with_headers_wraps_links_payload(tmp_path, process, disable

    assert result.returncode == 0, result.stderr
    payload = json.loads(result.stdout)
-    links = payload.get('links', payload)
-    assert any('example.com' in row.get('url', '') for row in links)
+    links = payload.get("links", payload)
+    assert any("example.com" in row.get("url", "") for row in links)


 def test_search_html_outputs_markup(tmp_path, process, disable_extractors_dict):
    """Test that search --html renders an HTML response."""
    os.chdir(tmp_path)
    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
        check=True,
    )

    result = subprocess.run(
-        ['archivebox', 'search', '--html'],
+        ["archivebox", "search", "--html"],
        capture_output=True,
        text=True,
        timeout=30,
    )

    assert result.returncode == 0, result.stderr
-    assert '<' in result.stdout
+    assert "<" in result.stdout


 def test_search_csv_outputs_requested_column(tmp_path, process, disable_extractors_dict):
    """Test that search --csv emits the requested fields."""
    os.chdir(tmp_path)
    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
        check=True,
    )

    result = subprocess.run(
-        ['archivebox', 'search', '--csv', 'url', '--with-headers'],
+        ["archivebox", "search", "--csv", "url", "--with-headers"],
        capture_output=True,
        text=True,
        timeout=30,
    )

    assert result.returncode == 0, result.stderr
-    assert 'url' in result.stdout
-    assert 'example.com' in result.stdout
+    assert "url" in result.stdout
+    assert "example.com" in result.stdout


 def test_search_with_headers_requires_structured_output_format(tmp_path, process):
@@ -161,36 +161,36 @@ def test_search_with_headers_requires_structured_output_format(tmp_path, process
    os.chdir(tmp_path)

    result = subprocess.run(
-        ['archivebox', 'search', '--with-headers'],
+        ["archivebox", "search", "--with-headers"],
        capture_output=True,
        text=True,
        timeout=30,
    )

    assert result.returncode != 0
-    assert 'requires' in result.stderr.lower() or 'json' in result.stderr.lower()
+    assert "requires" in result.stderr.lower() or "json" in result.stderr.lower()


 def test_search_sort_option_runs_successfully(tmp_path, process, disable_extractors_dict):
    """Test that search --sort accepts sortable fields."""
    os.chdir(tmp_path)
-    for url in ['https://iana.org', 'https://example.com']:
+    for url in ["https://iana.org", "https://example.com"]:
        subprocess.run(
-            ['archivebox', 'add', '--index-only', '--depth=0', url],
+            ["archivebox", "add", "--index-only", "--depth=0", url],
            capture_output=True,
            env=disable_extractors_dict,
            check=True,
        )

    result = subprocess.run(
-        ['archivebox', 'search', '--csv', 'url', '--sort=url'],
+        ["archivebox", "search", "--csv", "url", "--sort=url"],
        capture_output=True,
        text=True,
        timeout=30,
    )

    assert result.returncode == 0, result.stderr
-    assert 'example.com' in result.stdout or 'iana.org' in result.stdout
+    assert "example.com" in result.stdout or "iana.org" in result.stdout


 def test_search_help_lists_supported_filters(tmp_path, process):
@@ -198,13 +198,13 @@ def test_search_help_lists_supported_filters(tmp_path, process):
    os.chdir(tmp_path)

    result = subprocess.run(
-        ['archivebox', 'search', '--help'],
+        ["archivebox", "search", "--help"],
        capture_output=True,
        text=True,
        timeout=30,
    )

    assert result.returncode == 0
-    assert '--filter-type' in result.stdout or '-f' in result.stdout
-    assert '--status' in result.stdout
-    assert '--sort' in result.stdout
+    assert "--filter-type" in result.stdout or "-f" in result.stdout
+    assert "--status" in result.stdout
+    assert "--sort" in result.stdout
--- a/archivebox/tests/test_cli_server.py
+++ b/archivebox/tests/test_cli_server.py
@@ -24,14 +24,14 @@ def test_server_shows_usage_info(tmp_path, process):
    # Just check that the command is recognized
    # We won't actually start a full server in tests
    result = subprocess.run(
-        ['archivebox', 'server', '--help'],
+        ["archivebox", "server", "--help"],
        capture_output=True,
        text=True,
        timeout=10,
    )

    assert result.returncode == 0
-    assert 'server' in result.stdout.lower() or 'http' in result.stdout.lower()
+    assert "server" in result.stdout.lower() or "http" in result.stdout.lower()


 def test_server_init_flag(tmp_path, process):
@@ -40,14 +40,14 @@ def test_server_init_flag(tmp_path, process):

    # Check init flag is recognized
    result = subprocess.run(
-        ['archivebox', 'server', '--help'],
+        ["archivebox", "server", "--help"],
        capture_output=True,
        text=True,
        timeout=10,
    )

    assert result.returncode == 0
-    assert '--init' in result.stdout or 'init' in result.stdout.lower()
+    assert "--init" in result.stdout or "init" in result.stdout.lower()


 def test_runner_worker_uses_current_interpreter():
@@ -109,3 +109,61 @@ def test_stop_existing_background_runner_cleans_up_and_stops_orchestrators():
    runner_a.kill_tree.assert_called_once_with(graceful_timeout=2.0)
    runner_b.terminate.assert_called_once_with(graceful_timeout=2.0)
    log.assert_called_once()
+
+
+def test_stop_existing_server_workers_takes_over_same_runserver_port(monkeypatch):
+    from archivebox.cli.archivebox_server import stop_existing_server_workers
+
+    supervisor = Mock()
+    supervisor.getProcessInfo.side_effect = lambda name: {
+        "worker_runserver": {"statename": "RUNNING"},
+        "worker_daphne": {"statename": "STOPPED"},
+    }.get(name, None)
+    stop_worker = Mock()
+    log = Mock()
+
+    monkeypatch.setattr(
+        "archivebox.cli.archivebox_server._read_supervisor_worker_command",
+        lambda worker_name: f"{sys.executable} -m archivebox manage runserver 0.0.0.0:8000" if worker_name == "worker_runserver" else "",
+    )
+
+    stopped = stop_existing_server_workers(
+        supervisor=supervisor,
+        stop_worker_fn=stop_worker,
+        host="0.0.0.0",
+        port="8000",
+        log=log,
+    )
+
+    assert stopped == 1
+    stop_worker.assert_called_once_with(supervisor, "worker_runserver")
+    log.assert_called_once()
+
+
+def test_stop_existing_server_workers_leaves_different_port_running(monkeypatch):
+    from archivebox.cli.archivebox_server import stop_existing_server_workers
+
+    supervisor = Mock()
+    supervisor.getProcessInfo.side_effect = lambda name: {
+        "worker_runserver": {"statename": "RUNNING"},
+        "worker_daphne": {"statename": "STOPPED"},
+    }.get(name, None)
+    stop_worker = Mock()
+    log = Mock()
+
+    monkeypatch.setattr(
+        "archivebox.cli.archivebox_server._read_supervisor_worker_command",
+        lambda worker_name: f"{sys.executable} -m archivebox manage runserver 127.0.0.1:9000" if worker_name == "worker_runserver" else "",
+    )
+
+    stopped = stop_existing_server_workers(
+        supervisor=supervisor,
+        stop_worker_fn=stop_worker,
+        host="0.0.0.0",
+        port="8000",
+        log=log,
+    )
+
+    assert stopped == 0
+    stop_worker.assert_not_called()
+    log.assert_not_called()
--- a/archivebox/tests/test_cli_shell.py
+++ b/archivebox/tests/test_cli_shell.py
@@ -14,7 +14,7 @@ def test_shell_command_exists(tmp_path, process):

    # Test that the command exists (will fail without input but should recognize command)
    result = subprocess.run(
-        ['archivebox', 'shell', '--help'],
+        ["archivebox", "shell", "--help"],
        capture_output=True,
        text=True,
        timeout=10,
@@ -29,11 +29,11 @@ def test_shell_c_executes_python(tmp_path, process):
    os.chdir(tmp_path)

    result = subprocess.run(
-        ['archivebox', 'shell', '-c', 'print("shell-ok")'],
+        ["archivebox", "shell", "-c", 'print("shell-ok")'],
        capture_output=True,
        text=True,
        timeout=30,
    )

    assert result.returncode == 0, result.stderr
-    assert 'shell-ok' in result.stdout
+    assert "shell-ok" in result.stdout
--- a/archivebox/tests/test_cli_snapshot.py
+++ b/archivebox/tests/test_cli_snapshot.py
@@ -25,29 +25,29 @@ class TestSnapshotCreate:
        url = create_test_url()

        stdout, stderr, code = run_archivebox_cmd(
-            ['snapshot', 'create', url],
+            ["snapshot", "create", url],
            data_dir=initialized_archive,
        )

        assert code == 0, f"Command failed: {stderr}"
-        assert 'Created' in stderr
+        assert "Created" in stderr

        records = parse_jsonl_output(stdout)
        assert len(records) == 1
-        assert records[0]['type'] == 'Snapshot'
-        assert records[0]['url'] == url
+        assert records[0]["type"] == "Snapshot"
+        assert records[0]["url"] == url

    def test_create_from_crawl_jsonl(self, initialized_archive):
        """Create snapshots from Crawl JSONL input."""
        url = create_test_url()

        # First create a crawl
-        stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], data_dir=initialized_archive)
+        stdout1, _, _ = run_archivebox_cmd(["crawl", "create", url], data_dir=initialized_archive)
        crawl = parse_jsonl_output(stdout1)[0]

        # Pipe crawl to snapshot create
        stdout2, stderr, code = run_archivebox_cmd(
-            ['snapshot', 'create'],
+            ["snapshot", "create"],
            stdin=json.dumps(crawl),
            data_dir=initialized_archive,
        )
@@ -56,34 +56,34 @@ class TestSnapshotCreate:

        records = parse_jsonl_output(stdout2)
        # Should have the Crawl passed through and the Snapshot created
-        types = [r.get('type') for r in records]
-        assert 'Crawl' in types
-        assert 'Snapshot' in types
+        types = [r.get("type") for r in records]
+        assert "Crawl" in types
+        assert "Snapshot" in types

-        snapshot = next(r for r in records if r['type'] == 'Snapshot')
-        assert snapshot['url'] == url
+        snapshot = next(r for r in records if r["type"] == "Snapshot")
+        assert snapshot["url"] == url

    def test_create_with_tag(self, initialized_archive):
        """Create snapshot with --tag flag."""
        url = create_test_url()

        stdout, stderr, code = run_archivebox_cmd(
-            ['snapshot', 'create', '--tag=test-tag', url],
+            ["snapshot", "create", "--tag=test-tag", url],
            data_dir=initialized_archive,
        )

        assert code == 0
        records = parse_jsonl_output(stdout)
-        assert 'test-tag' in records[0].get('tags', '')
+        assert "test-tag" in records[0].get("tags", "")

    def test_create_pass_through_other_types(self, initialized_archive):
        """Pass-through records of other types unchanged."""
-        tag_record = {'type': 'Tag', 'id': 'fake-tag-id', 'name': 'test'}
+        tag_record = {"type": "Tag", "id": "fake-tag-id", "name": "test"}
        url = create_test_url()
-        stdin = json.dumps(tag_record) + '\n' + json.dumps({'url': url})
+        stdin = json.dumps(tag_record) + "\n" + json.dumps({"url": url})

        stdout, stderr, code = run_archivebox_cmd(
-            ['snapshot', 'create'],
+            ["snapshot", "create"],
            stdin=stdin,
            data_dir=initialized_archive,
        )
@@ -91,16 +91,16 @@ class TestSnapshotCreate:
        assert code == 0
        records = parse_jsonl_output(stdout)

-        types = [r.get('type') for r in records]
-        assert 'Tag' in types
-        assert 'Snapshot' in types
+        types = [r.get("type") for r in records]
+        assert "Tag" in types
+        assert "Snapshot" in types

    def test_create_multiple_urls(self, initialized_archive):
        """Create snapshots from multiple URLs."""
        urls = [create_test_url() for _ in range(3)]

        stdout, stderr, code = run_archivebox_cmd(
-            ['snapshot', 'create'] + urls,
+            ["snapshot", "create"] + urls,
            data_dir=initialized_archive,
        )

@@ -108,7 +108,7 @@ class TestSnapshotCreate:
        records = parse_jsonl_output(stdout)
        assert len(records) == 3

-        created_urls = {r['url'] for r in records}
+        created_urls = {r["url"] for r in records}
        for url in urls:
            assert url in created_urls

@@ -119,65 +119,65 @@ class TestSnapshotList:
    def test_list_empty(self, initialized_archive):
        """List with no snapshots returns empty."""
        stdout, stderr, code = run_archivebox_cmd(
-            ['snapshot', 'list'],
+            ["snapshot", "list"],
            data_dir=initialized_archive,
        )

        assert code == 0
-        assert 'Listed 0 snapshots' in stderr
+        assert "Listed 0 snapshots" in stderr

    def test_list_returns_created(self, initialized_archive):
        """List returns previously created snapshots."""
        url = create_test_url()
-        run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
+        run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive)

        stdout, stderr, code = run_archivebox_cmd(
-            ['snapshot', 'list'],
+            ["snapshot", "list"],
            data_dir=initialized_archive,
        )

        assert code == 0
        records = parse_jsonl_output(stdout)
        assert len(records) >= 1
-        assert any(r.get('url') == url for r in records)
+        assert any(r.get("url") == url for r in records)

    def test_list_filter_by_status(self, initialized_archive):
        """Filter snapshots by status."""
        url = create_test_url()
-        run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
+        run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive)

        stdout, stderr, code = run_archivebox_cmd(
-            ['snapshot', 'list', '--status=queued'],
+            ["snapshot", "list", "--status=queued"],
            data_dir=initialized_archive,
        )

        assert code == 0
        records = parse_jsonl_output(stdout)
        for r in records:
-            assert r['status'] == 'queued'
+            assert r["status"] == "queued"

    def test_list_filter_by_url_contains(self, initialized_archive):
        """Filter snapshots by URL contains."""
-        url = create_test_url(domain='unique-domain-12345.com')
-        run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
+        url = create_test_url(domain="unique-domain-12345.com")
+        run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive)

        stdout, stderr, code = run_archivebox_cmd(
-            ['snapshot', 'list', '--url__icontains=unique-domain-12345'],
+            ["snapshot", "list", "--url__icontains=unique-domain-12345"],
            data_dir=initialized_archive,
        )

        assert code == 0
        records = parse_jsonl_output(stdout)
        assert len(records) == 1
-        assert 'unique-domain-12345' in records[0]['url']
+        assert "unique-domain-12345" in records[0]["url"]

    def test_list_with_limit(self, initialized_archive):
        """Limit number of results."""
        for _ in range(3):
-            run_archivebox_cmd(['snapshot', 'create', create_test_url()], data_dir=initialized_archive)
+            run_archivebox_cmd(["snapshot", "create", create_test_url()], data_dir=initialized_archive)

        stdout, stderr, code = run_archivebox_cmd(
-            ['snapshot', 'list', '--limit=2'],
+            ["snapshot", "list", "--limit=2"],
            data_dir=initialized_archive,
        )

@@ -185,6 +185,35 @@ class TestSnapshotList:
        records = parse_jsonl_output(stdout)
        assert len(records) == 2

+    def test_list_with_sort_and_limit(self, initialized_archive):
+        """Sorting should be applied before limiting."""
+        for _ in range(3):
+            run_archivebox_cmd(["snapshot", "create", create_test_url()], data_dir=initialized_archive)
+
+        stdout, stderr, code = run_archivebox_cmd(
+            ["snapshot", "list", "--limit=2", "--sort=-created_at"],
+            data_dir=initialized_archive,
+        )
+
+        assert code == 0, f"Command failed: {stderr}"
+        records = parse_jsonl_output(stdout)
+        assert len(records) == 2
+
+    def test_list_search_meta(self, initialized_archive):
+        """snapshot list should support metadata search mode."""
+        url = create_test_url(domain="meta-search-example.com")
+        run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive)
+
+        stdout, stderr, code = run_archivebox_cmd(
+            ["snapshot", "list", "--search=meta", "meta-search-example.com"],
+            data_dir=initialized_archive,
+        )
+
+        assert code == 0, f"Command failed: {stderr}"
+        records = parse_jsonl_output(stdout)
+        assert len(records) == 1
+        assert "meta-search-example.com" in records[0]["url"]
+

 class TestSnapshotUpdate:
    """Tests for `archivebox snapshot update`."""
@@ -192,35 +221,35 @@ class TestSnapshotUpdate:
    def test_update_status(self, initialized_archive):
        """Update snapshot status."""
        url = create_test_url()
-        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
+        stdout1, _, _ = run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive)
        snapshot = parse_jsonl_output(stdout1)[0]

        stdout2, stderr, code = run_archivebox_cmd(
-            ['snapshot', 'update', '--status=started'],
+            ["snapshot", "update", "--status=started"],
            stdin=json.dumps(snapshot),
            data_dir=initialized_archive,
        )

        assert code == 0
-        assert 'Updated 1 snapshots' in stderr
+        assert "Updated 1 snapshots" in stderr

        records = parse_jsonl_output(stdout2)
-        assert records[0]['status'] == 'started'
+        assert records[0]["status"] == "started"

    def test_update_add_tag(self, initialized_archive):
        """Update snapshot by adding tag."""
        url = create_test_url()
-        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
+        stdout1, _, _ = run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive)
        snapshot = parse_jsonl_output(stdout1)[0]

        stdout2, stderr, code = run_archivebox_cmd(
-            ['snapshot', 'update', '--tag=new-tag'],
+            ["snapshot", "update", "--tag=new-tag"],
            stdin=json.dumps(snapshot),
            data_dir=initialized_archive,
        )

        assert code == 0
-        assert 'Updated 1 snapshots' in stderr
+        assert "Updated 1 snapshots" in stderr


 class TestSnapshotDelete:
@@ -229,44 +258,44 @@ class TestSnapshotDelete:
    def test_delete_requires_yes(self, initialized_archive):
        """Delete requires --yes flag."""
        url = create_test_url()
-        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
+        stdout1, _, _ = run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive)
        snapshot = parse_jsonl_output(stdout1)[0]

        stdout, stderr, code = run_archivebox_cmd(
-            ['snapshot', 'delete'],
+            ["snapshot", "delete"],
            stdin=json.dumps(snapshot),
            data_dir=initialized_archive,
        )

        assert code == 1
-        assert '--yes' in stderr
+        assert "--yes" in stderr

    def test_delete_with_yes(self, initialized_archive):
        """Delete with --yes flag works."""
        url = create_test_url()
-        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
+        stdout1, _, _ = run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive)
        snapshot = parse_jsonl_output(stdout1)[0]

        stdout, stderr, code = run_archivebox_cmd(
-            ['snapshot', 'delete', '--yes'],
+            ["snapshot", "delete", "--yes"],
            stdin=json.dumps(snapshot),
            data_dir=initialized_archive,
        )

        assert code == 0
-        assert 'Deleted 1 snapshots' in stderr
+        assert "Deleted 1 snapshots" in stderr

    def test_delete_dry_run(self, initialized_archive):
        """Dry run shows what would be deleted."""
        url = create_test_url()
-        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
+        stdout1, _, _ = run_archivebox_cmd(["snapshot", "create", url], data_dir=initialized_archive)
        snapshot = parse_jsonl_output(stdout1)[0]

        stdout, stderr, code = run_archivebox_cmd(
-            ['snapshot', 'delete', '--dry-run'],
+            ["snapshot", "delete", "--dry-run"],
            stdin=json.dumps(snapshot),
            data_dir=initialized_archive,
        )

        assert code == 0
-        assert 'Would delete' in stderr
+        assert "Would delete" in stderr
--- a/archivebox/tests/test_cli_status.py
+++ b/archivebox/tests/test_cli_status.py
@@ -14,8 +14,8 @@ def _find_snapshot_dir(data_dir: Path, snapshot_id: str) -> Path | None:
    candidates = {snapshot_id}
    if len(snapshot_id) == 32:
        candidates.add(f"{snapshot_id[:8]}-{snapshot_id[8:12]}-{snapshot_id[12:16]}-{snapshot_id[16:20]}-{snapshot_id[20:]}")
-    elif len(snapshot_id) == 36 and '-' in snapshot_id:
-        candidates.add(snapshot_id.replace('-', ''))
+    elif len(snapshot_id) == 36 and "-" in snapshot_id:
+        candidates.add(snapshot_id.replace("-", ""))

    for needle in candidates:
        for path in data_dir.rglob(needle):
@@ -27,7 +27,7 @@ def _find_snapshot_dir(data_dir: Path, snapshot_id: str) -> Path | None:
 def test_status_runs_successfully(tmp_path, process):
    """Test that status command runs without error."""
    os.chdir(tmp_path)
-    result = subprocess.run(['archivebox', 'status'], capture_output=True, text=True)
+    result = subprocess.run(["archivebox", "status"], capture_output=True, text=True)

    assert result.returncode == 0
    assert len(result.stdout) > 100
@@ -36,11 +36,11 @@ def test_status_runs_successfully(tmp_path, process):
 def test_status_shows_zero_snapshots_in_empty_archive(tmp_path, process):
    """Test status shows 0 snapshots in empty archive."""
    os.chdir(tmp_path)
-    result = subprocess.run(['archivebox', 'status'], capture_output=True, text=True)
+    result = subprocess.run(["archivebox", "status"], capture_output=True, text=True)

    output = result.stdout
    # Should indicate empty/zero state
-    assert '0' in output
+    assert "0" in output


 def test_status_shows_correct_snapshot_count(tmp_path, process, disable_extractors_dict):
@@ -48,14 +48,14 @@ def test_status_shows_correct_snapshot_count(tmp_path, process, disable_extracto
    os.chdir(tmp_path)

    # Add 3 snapshots
-    for url in ['https://example.com', 'https://example.org', 'https://example.net']:
+    for url in ["https://example.com", "https://example.org", "https://example.net"]:
        subprocess.run(
-            ['archivebox', 'add', '--index-only', '--depth=0', url],
+            ["archivebox", "add", "--index-only", "--depth=0", url],
            capture_output=True,
            env=disable_extractors_dict,
        )

-    result = subprocess.run(['archivebox', 'status'], capture_output=True, text=True)
+    result = subprocess.run(["archivebox", "status"], capture_output=True, text=True)

    # Verify DB has 3 snapshots
    conn = sqlite3.connect("index.sqlite3")
@@ -65,7 +65,7 @@ def test_status_shows_correct_snapshot_count(tmp_path, process, disable_extracto

    assert db_count == 3
    # Status output should show 3
-    assert '3' in result.stdout
+    assert "3" in result.stdout


 def test_status_shows_archived_count(tmp_path, process, disable_extractors_dict):
@@ -73,25 +73,25 @@ def test_status_shows_archived_count(tmp_path, process, disable_extractors_dict)
    os.chdir(tmp_path)

    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )

-    result = subprocess.run(['archivebox', 'status'], capture_output=True, text=True)
+    result = subprocess.run(["archivebox", "status"], capture_output=True, text=True)

    # Should show archived/unarchived categories
-    assert 'archived' in result.stdout.lower() or 'queued' in result.stdout.lower()
+    assert "archived" in result.stdout.lower() or "queued" in result.stdout.lower()


 def test_status_shows_archive_directory_size(tmp_path, process):
    """Test status reports archive directory size."""
    os.chdir(tmp_path)
-    result = subprocess.run(['archivebox', 'status'], capture_output=True, text=True)
+    result = subprocess.run(["archivebox", "status"], capture_output=True, text=True)

    output = result.stdout
    # Should show size info
-    assert 'Size' in output or 'size' in output
+    assert "Size" in output or "size" in output


 def test_status_counts_archive_directories(tmp_path, process, disable_extractors_dict):
@@ -99,15 +99,15 @@ def test_status_counts_archive_directories(tmp_path, process, disable_extractors
    os.chdir(tmp_path)

    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )

-    result = subprocess.run(['archivebox', 'status'], capture_output=True, text=True)
+    result = subprocess.run(["archivebox", "status"], capture_output=True, text=True)

    # Should show directory count
-    assert 'present' in result.stdout.lower() or 'directories' in result.stdout
+    assert "present" in result.stdout.lower() or "directories" in result.stdout


 def test_status_detects_orphaned_directories(tmp_path, process, disable_extractors_dict):
@@ -116,7 +116,7 @@ def test_status_detects_orphaned_directories(tmp_path, process, disable_extracto

    # Add a snapshot
    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )
@@ -124,10 +124,10 @@ def test_status_detects_orphaned_directories(tmp_path, process, disable_extracto
    # Create an orphaned directory
    (tmp_path / "archive" / "fake_orphaned_dir").mkdir(parents=True, exist_ok=True)

-    result = subprocess.run(['archivebox', 'status'], capture_output=True, text=True)
+    result = subprocess.run(["archivebox", "status"], capture_output=True, text=True)

    # Should mention orphaned dirs
-    assert 'orphan' in result.stdout.lower() or '1' in result.stdout
+    assert "orphan" in result.stdout.lower() or "1" in result.stdout


 def test_status_counts_new_snapshot_output_dirs_as_archived(tmp_path, process, disable_extractors_dict):
@@ -137,7 +137,7 @@ def test_status_counts_new_snapshot_output_dirs_as_archived(tmp_path, process, d
    env["ARCHIVEBOX_ALLOW_NO_UNIX_SOCKETS"] = "true"

    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=env,
        check=True,
@@ -145,7 +145,7 @@ def test_status_counts_new_snapshot_output_dirs_as_archived(tmp_path, process, d

    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()
-    snapshot_id = c.execute("SELECT id FROM core_snapshot WHERE url = ?", ('https://example.com',)).fetchone()[0]
+    snapshot_id = c.execute("SELECT id FROM core_snapshot WHERE url = ?", ("https://example.com",)).fetchone()[0]
    conn.close()

    snapshot_dir = _find_snapshot_dir(tmp_path, str(snapshot_id))
@@ -154,21 +154,21 @@ def test_status_counts_new_snapshot_output_dirs_as_archived(tmp_path, process, d
    title_dir.mkdir(parents=True, exist_ok=True)
    (title_dir / "title.txt").write_text("Example Domain")

-    result = subprocess.run(['archivebox', 'status'], capture_output=True, text=True, env=env)
+    result = subprocess.run(["archivebox", "status"], capture_output=True, text=True, env=env)

    assert result.returncode == 0, result.stdout + result.stderr
-    assert 'archived: 1' in result.stdout
-    assert 'present: 1' in result.stdout
+    assert "archived: 1" in result.stdout
+    assert "present: 1" in result.stdout


 def test_status_shows_user_info(tmp_path, process):
    """Test status shows user/login information."""
    os.chdir(tmp_path)
-    result = subprocess.run(['archivebox', 'status'], capture_output=True, text=True)
+    result = subprocess.run(["archivebox", "status"], capture_output=True, text=True)

    output = result.stdout
    # Should show user section
-    assert 'user' in output.lower() or 'login' in output.lower()
+    assert "user" in output.lower() or "login" in output.lower()


 def test_status_reads_from_db_not_filesystem(tmp_path, process, disable_extractors_dict):
@@ -177,7 +177,7 @@ def test_status_reads_from_db_not_filesystem(tmp_path, process, disable_extracto

    # Add snapshot to DB
    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )
@@ -191,35 +191,35 @@ def test_status_reads_from_db_not_filesystem(tmp_path, process, disable_extracto
    assert db_count == 1

    # Status should reflect DB count
-    result = subprocess.run(['archivebox', 'status'], capture_output=True, text=True)
-    assert '1' in result.stdout
+    result = subprocess.run(["archivebox", "status"], capture_output=True, text=True)
+    assert "1" in result.stdout


 def test_status_shows_index_file_info(tmp_path, process):
    """Test status shows index file information."""
    os.chdir(tmp_path)
-    result = subprocess.run(['archivebox', 'status'], capture_output=True, text=True)
+    result = subprocess.run(["archivebox", "status"], capture_output=True, text=True)

    # Should mention index
-    assert 'index' in result.stdout.lower() or 'Index' in result.stdout
+    assert "index" in result.stdout.lower() or "Index" in result.stdout


 def test_status_help_lists_available_options(tmp_path, process):
    """Test that status --help works and documents the command."""
    os.chdir(tmp_path)
    result = subprocess.run(
-        ['archivebox', 'status', '--help'],
+        ["archivebox", "status", "--help"],
        capture_output=True,
        text=True,
    )

    assert result.returncode == 0
-    assert 'status' in result.stdout.lower() or 'statistic' in result.stdout.lower()
+    assert "status" in result.stdout.lower() or "statistic" in result.stdout.lower()


 def test_status_shows_data_directory_path(tmp_path, process):
    """Test that status reports which collection directory it is inspecting."""
    os.chdir(tmp_path)
-    result = subprocess.run(['archivebox', 'status'], capture_output=True, text=True)
+    result = subprocess.run(["archivebox", "status"], capture_output=True, text=True)

-    assert 'archive' in result.stdout.lower() or str(tmp_path) in result.stdout
+    assert "archive" in result.stdout.lower() or str(tmp_path) in result.stdout
--- a/archivebox/tests/test_cli_update.py
+++ b/archivebox/tests/test_cli_update.py
@@ -13,7 +13,7 @@ def test_update_runs_successfully_on_empty_archive(tmp_path, process):
    """Test that update runs without error on empty archive."""
    os.chdir(tmp_path)
    result = subprocess.run(
-        ['archivebox', 'update'],
+        ["archivebox", "update"],
        capture_output=True,
        text=True,
        timeout=30,
@@ -29,14 +29,14 @@ def test_update_reconciles_existing_snapshots(tmp_path, process, disable_extract

    # Add a snapshot (index-only for faster test)
    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
    )

    # Run update - should reconcile and queue
    result = subprocess.run(
-        ['archivebox', 'update'],
+        ["archivebox", "update"],
        capture_output=True,
        env=disable_extractors_dict,
        timeout=30,
@@ -51,13 +51,13 @@ def test_update_specific_snapshot_by_filter(tmp_path, process, disable_extractor

    # Add multiple snapshots
    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
        timeout=90,
    )
    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.org'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.org"],
        capture_output=True,
        env=disable_extractors_dict,
        timeout=90,
@@ -65,7 +65,7 @@ def test_update_specific_snapshot_by_filter(tmp_path, process, disable_extractor

    # Update with filter pattern (uses filter_patterns argument)
    result = subprocess.run(
-        ['archivebox', 'update', '--filter-type=substring', 'example.com'],
+        ["archivebox", "update", "--filter-type=substring", "example.com"],
        capture_output=True,
        env=disable_extractors_dict,
        timeout=30,
@@ -81,7 +81,7 @@ def test_update_preserves_snapshot_count(tmp_path, process, disable_extractors_d

    # Add snapshots
    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
        timeout=90,
@@ -97,7 +97,7 @@ def test_update_preserves_snapshot_count(tmp_path, process, disable_extractors_d

    # Run update (should reconcile + queue, not create new snapshots)
    subprocess.run(
-        ['archivebox', 'update'],
+        ["archivebox", "update"],
        capture_output=True,
        env=disable_extractors_dict,
        timeout=30,
@@ -118,7 +118,7 @@ def test_update_queues_snapshots_for_archiving(tmp_path, process, disable_extrac
    os.chdir(tmp_path)

    subprocess.run(
-        ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
+        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
        timeout=90,
@@ -126,7 +126,7 @@ def test_update_queues_snapshots_for_archiving(tmp_path, process, disable_extrac

    # Run update
    result = subprocess.run(
-        ['archivebox', 'update'],
+        ["archivebox", "update"],
        capture_output=True,
        env=disable_extractors_dict,
        timeout=30,
@@ -140,4 +140,4 @@ def test_update_queues_snapshots_for_archiving(tmp_path, process, disable_extrac
    status = c.execute("SELECT status FROM core_snapshot").fetchone()[0]
    conn.close()

-    assert status == 'queued'
+    assert status == "queued"
--- a/archivebox/tests/test_cli_version.py
+++ b/archivebox/tests/test_cli_version.py
@@ -67,56 +67,56 @@ def _extract_location_path(output: str, key: str) -> Path:
 def test_version_quiet_outputs_version_number(tmp_path):
    """Test that version --quiet outputs just the version number."""
    os.chdir(tmp_path)
-    result = subprocess.run(['archivebox', 'version', '--quiet'], capture_output=True, text=True)
+    result = subprocess.run(["archivebox", "version", "--quiet"], capture_output=True, text=True)

    assert result.returncode == 0
    version = result.stdout.strip()
    assert version
    # Version should be semver-ish format (e.g., 0.8.0)
-    parts = version.split('.')
+    parts = version.split(".")
    assert len(parts) >= 2


 def test_version_flag_outputs_version_number(tmp_path):
    """Test that top-level --version reports the package version."""
    os.chdir(tmp_path)
-    result = subprocess.run(['archivebox', '--version'], capture_output=True, text=True)
+    result = subprocess.run(["archivebox", "--version"], capture_output=True, text=True)

    assert result.returncode == 0
    version = result.stdout.strip()
    assert version
-    assert len(version.split('.')) >= 2
+    assert len(version.split(".")) >= 2


 def test_version_shows_system_info_in_initialized_dir(tmp_path, process):
    """Test that version shows system metadata in initialized directory."""
    os.chdir(tmp_path)
-    result = subprocess.run(['archivebox', 'version'], capture_output=True, text=True)
+    result = subprocess.run(["archivebox", "version"], capture_output=True, text=True)

    output = result.stdout
-    assert 'ArchiveBox' in output
+    assert "ArchiveBox" in output
    # Should show system info
-    assert any(x in output for x in ['ARCH=', 'OS=', 'PYTHON='])
+    assert any(x in output for x in ["ARCH=", "OS=", "PYTHON="])


 def test_version_shows_binaries_after_init(tmp_path, process):
    """Test that version shows binary dependencies in initialized directory."""
    os.chdir(tmp_path)
-    result = subprocess.run(['archivebox', 'version'], capture_output=True, text=True)
+    result = subprocess.run(["archivebox", "version"], capture_output=True, text=True)

    output = result.stdout
    # Should show binary section
-    assert 'Binary' in output or 'Dependencies' in output
+    assert "Binary" in output or "Dependencies" in output


 def test_version_shows_data_locations(tmp_path, process):
    """Test that version shows data directory locations."""
    os.chdir(tmp_path)
-    result = subprocess.run(['archivebox', 'version'], capture_output=True, text=True)
+    result = subprocess.run(["archivebox", "version"], capture_output=True, text=True)

    output = result.stdout
    # Should show paths
-    assert any(x in output for x in ['Data', 'Code', 'location'])
+    assert any(x in output for x in ["Data", "Code", "location"])


 def test_version_in_uninitialized_dir_still_works(tmp_path):
@@ -125,7 +125,7 @@ def test_version_in_uninitialized_dir_still_works(tmp_path):
    empty_dir.mkdir()
    os.chdir(empty_dir)

-    result = subprocess.run(['archivebox', 'version', '--quiet'], capture_output=True, text=True)
+    result = subprocess.run(["archivebox", "version", "--quiet"], capture_output=True, text=True)

    # Should still output version
    assert result.returncode == 0
@@ -164,15 +164,15 @@ def test_version_auto_selects_short_tmp_dir_for_deep_collection_path(tmp_path):
 def test_version_help_lists_quiet_flag(tmp_path):
    """Test that version --help documents the quiet output mode."""
    os.chdir(tmp_path)
-    result = subprocess.run(['archivebox', 'version', '--help'], capture_output=True, text=True)
+    result = subprocess.run(["archivebox", "version", "--help"], capture_output=True, text=True)

    assert result.returncode == 0
-    assert '--quiet' in result.stdout or '-q' in result.stdout
+    assert "--quiet" in result.stdout or "-q" in result.stdout


 def test_version_invalid_option_fails(tmp_path):
    """Test that invalid version options fail cleanly."""
    os.chdir(tmp_path)
-    result = subprocess.run(['archivebox', 'version', '--invalid-option'], capture_output=True, text=True)
+    result = subprocess.run(["archivebox", "version", "--invalid-option"], capture_output=True, text=True)

    assert result.returncode != 0
--- a/archivebox/tests/test_config.py
+++ b/archivebox/tests/test_config.py
@@ -7,19 +7,18 @@ import subprocess
 import pytest


-
 def test_config_shows_all_config_values(tmp_path, process):
    """Test that config without args shows all config values."""
    os.chdir(tmp_path)

    result = subprocess.run(
-        ['archivebox', 'config'],
+        ["archivebox", "config"],
        capture_output=True,
        text=True,
    )

    # Should show various config sections
-    assert 'TIMEOUT' in result.stdout or 'timeout' in result.stdout.lower()
+    assert "TIMEOUT" in result.stdout or "timeout" in result.stdout.lower()
    # Config should show some output
    assert len(result.stdout) > 100

@@ -29,13 +28,13 @@ def test_config_get_specific_key(tmp_path, process):
    os.chdir(tmp_path)

    result = subprocess.run(
-        ['archivebox', 'config', '--get', 'TIMEOUT'],
+        ["archivebox", "config", "--get", "TIMEOUT"],
        capture_output=True,
        text=True,
    )

    # Should show the TIMEOUT value
-    assert 'TIMEOUT' in result.stdout or result.returncode == 0
+    assert "TIMEOUT" in result.stdout or result.returncode == 0


 def test_config_set_value_writes_to_config_file(tmp_path, process):
@@ -44,18 +43,18 @@ def test_config_set_value_writes_to_config_file(tmp_path, process):

    # Set a config value
    result = subprocess.run(
-        ['archivebox', 'config', '--set', 'TIMEOUT=120'],
+        ["archivebox", "config", "--set", "TIMEOUT=120"],
        capture_output=True,
        text=True,
    )
    assert result.returncode == 0, result.stderr

    # Read the config file directly to verify it was written
-    config_file = tmp_path / 'ArchiveBox.conf'
+    config_file = tmp_path / "ArchiveBox.conf"
    if config_file.exists():
        config_content = config_file.read_text()
        # Config should contain the set value
-        assert 'TIMEOUT' in config_content or 'timeout' in config_content.lower()
+        assert "TIMEOUT" in config_content or "timeout" in config_content.lower()


 def test_config_set_and_get_roundtrip(tmp_path, process):
@@ -64,19 +63,19 @@ def test_config_set_and_get_roundtrip(tmp_path, process):

    # Set a value
    set_result = subprocess.run(
-        ['archivebox', 'config', '--set', 'TIMEOUT=999'],
+        ["archivebox", "config", "--set", "TIMEOUT=999"],
        capture_output=True,
        text=True,
    )

    # Verify set was successful
-    assert set_result.returncode == 0 or '999' in set_result.stdout
+    assert set_result.returncode == 0 or "999" in set_result.stdout

    # Read the config file directly to verify
-    config_file = tmp_path / 'ArchiveBox.conf'
+    config_file = tmp_path / "ArchiveBox.conf"
    if config_file.exists():
        config_content = config_file.read_text()
-        assert '999' in config_content or 'TIMEOUT' in config_content
+        assert "999" in config_content or "TIMEOUT" in config_content


 def test_config_search_finds_matching_keys(tmp_path, process):
@@ -84,13 +83,13 @@ def test_config_search_finds_matching_keys(tmp_path, process):
    os.chdir(tmp_path)

    result = subprocess.run(
-        ['archivebox', 'config', '--search', 'TIMEOUT'],
+        ["archivebox", "config", "--search", "TIMEOUT"],
        capture_output=True,
        text=True,
    )

    # Should find TIMEOUT-related config
-    assert 'TIMEOUT' in result.stdout or result.returncode == 0
+    assert "TIMEOUT" in result.stdout or result.returncode == 0


 def test_config_invalid_key_fails(tmp_path, process):
@@ -98,13 +97,13 @@ def test_config_invalid_key_fails(tmp_path, process):
    os.chdir(tmp_path)

    result = subprocess.run(
-        ['archivebox', 'config', '--set', 'INVALID_KEY_THAT_DOES_NOT_EXIST=value'],
+        ["archivebox", "config", "--set", "INVALID_KEY_THAT_DOES_NOT_EXIST=value"],
        capture_output=True,
        text=True,
    )

    # Should fail
-    assert result.returncode != 0 or 'failed' in result.stdout.lower()
+    assert result.returncode != 0 or "failed" in result.stdout.lower()


 def test_config_set_requires_equals_sign(tmp_path, process):
@@ -112,7 +111,7 @@ def test_config_set_requires_equals_sign(tmp_path, process):
    os.chdir(tmp_path)

    result = subprocess.run(
-        ['archivebox', 'config', '--set', 'TIMEOUT'],
+        ["archivebox", "config", "--set", "TIMEOUT"],
        capture_output=True,
        text=True,
    )
@@ -129,15 +128,15 @@ class TestConfigCLI:
        os.chdir(tmp_path)

        result = subprocess.run(
-            ['archivebox', 'config', '--help'],
+            ["archivebox", "config", "--help"],
            capture_output=True,
            text=True,
        )

        assert result.returncode == 0
-        assert '--get' in result.stdout
-        assert '--set' in result.stdout
+        assert "--get" in result.stdout
+        assert "--set" in result.stdout


-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
--- a/archivebox/tests/test_config_views.py
+++ b/archivebox/tests/test_config_views.py
@@ -17,310 +17,317 @@ def test_get_db_binaries_by_name_collapses_youtube_dl_aliases(monkeypatch):
    now = timezone.now()
    records = [
        SimpleNamespace(
-            name='youtube-dl',
-            version='',
-            binprovider='',
-            abspath='/usr/bin/youtube-dl',
+            name="youtube-dl",
+            version="",
+            binprovider="",
+            abspath="/usr/bin/youtube-dl",
            status=Binary.StatusChoices.INSTALLED,
            modified_at=now,
        ),
        SimpleNamespace(
-            name='yt-dlp',
-            version='2026.03.01',
-            binprovider='pip',
-            abspath='/usr/bin/yt-dlp',
+            name="yt-dlp",
+            version="2026.03.01",
+            binprovider="pip",
+            abspath="/usr/bin/yt-dlp",
            status=Binary.StatusChoices.INSTALLED,
            modified_at=now + timedelta(seconds=1),
        ),
    ]

-    monkeypatch.setattr(config_views.Binary, 'objects', SimpleNamespace(all=lambda: records))
+    monkeypatch.setattr(config_views.Binary, "objects", SimpleNamespace(all=lambda: records))

    binaries = config_views.get_db_binaries_by_name()

-    assert 'yt-dlp' in binaries
-    assert 'youtube-dl' not in binaries
-    assert binaries['yt-dlp'].version == '2026.03.01'
+    assert "yt-dlp" in binaries
+    assert "youtube-dl" not in binaries
+    assert binaries["yt-dlp"].version == "2026.03.01"


 def test_binaries_list_view_uses_db_version_and_hides_youtube_dl_alias(monkeypatch):
-    request = RequestFactory().get('/admin/environment/binaries/')
+    request = RequestFactory().get("/admin/environment/binaries/")
    request.user = SimpleNamespace(is_superuser=True)

    db_binary = SimpleNamespace(
-        name='youtube-dl',
-        version='2026.03.01',
-        binprovider='pip',
-        abspath='/usr/bin/yt-dlp',
+        name="youtube-dl",
+        version="2026.03.01",
+        binprovider="pip",
+        abspath="/usr/bin/yt-dlp",
        status=Binary.StatusChoices.INSTALLED,
-        sha256='',
+        sha256="",
        modified_at=timezone.now(),
    )

-    monkeypatch.setattr(config_views, 'get_db_binaries_by_name', lambda: {'yt-dlp': db_binary})
+    monkeypatch.setattr(config_views, "get_db_binaries_by_name", lambda: {"yt-dlp": db_binary})

    context = config_views.binaries_list_view.__wrapped__(request)

-    assert len(context['table']['Binary Name']) == 1
-    assert str(context['table']['Binary Name'][0].link_item) == 'yt-dlp'
-    assert context['table']['Found Version'][0] == '✅ 2026.03.01'
-    assert context['table']['Provided By'][0] == 'pip'
-    assert context['table']['Found Abspath'][0] == '/usr/bin/yt-dlp'
+    assert len(context["table"]["Binary Name"]) == 1
+    assert str(context["table"]["Binary Name"][0].link_item) == "yt-dlp"
+    assert context["table"]["Found Version"][0] == "✅ 2026.03.01"
+    assert context["table"]["Provided By"][0] == "pip"
+    assert context["table"]["Found Abspath"][0] == "/usr/bin/yt-dlp"


 def test_binaries_list_view_only_shows_persisted_records(monkeypatch):
-    request = RequestFactory().get('/admin/environment/binaries/')
+    request = RequestFactory().get("/admin/environment/binaries/")
    request.user = SimpleNamespace(is_superuser=True)

-    monkeypatch.setattr(config_views, 'get_db_binaries_by_name', lambda: {})
+    monkeypatch.setattr(config_views, "get_db_binaries_by_name", lambda: {})

    context = config_views.binaries_list_view.__wrapped__(request)

-    assert context['table']['Binary Name'] == []
-    assert context['table']['Found Version'] == []
-    assert context['table']['Provided By'] == []
-    assert context['table']['Found Abspath'] == []
+    assert context["table"]["Binary Name"] == []
+    assert context["table"]["Found Version"] == []
+    assert context["table"]["Provided By"] == []
+    assert context["table"]["Found Abspath"] == []


 def test_binary_detail_view_uses_canonical_db_record(monkeypatch):
-    request = RequestFactory().get('/admin/environment/binaries/youtube-dl/')
+    request = RequestFactory().get("/admin/environment/binaries/youtube-dl/")
    request.user = SimpleNamespace(is_superuser=True)

    db_binary = SimpleNamespace(
-        id='019d14cc-6c40-7793-8ff1-0f8bb050e8a3',
-        name='yt-dlp',
-        version='2026.03.01',
-        binprovider='pip',
-        abspath='/usr/bin/yt-dlp',
-        sha256='abc123',
+        id="019d14cc-6c40-7793-8ff1-0f8bb050e8a3",
+        name="yt-dlp",
+        version="2026.03.01",
+        binprovider="pip",
+        abspath="/usr/bin/yt-dlp",
+        sha256="abc123",
        status=Binary.StatusChoices.INSTALLED,
        modified_at=timezone.now(),
    )

-    monkeypatch.setattr(config_views, 'get_db_binaries_by_name', lambda: {'yt-dlp': db_binary})
+    monkeypatch.setattr(config_views, "get_db_binaries_by_name", lambda: {"yt-dlp": db_binary})

-    context = config_views.binary_detail_view.__wrapped__(request, key='youtube-dl')
-    section = context['data'][0]
+    context = config_views.binary_detail_view.__wrapped__(request, key="youtube-dl")
+    section = context["data"][0]

-    assert context['title'] == 'yt-dlp'
-    assert section['fields']['name'] == 'yt-dlp'
-    assert section['fields']['version'] == '2026.03.01'
-    assert section['fields']['binprovider'] == 'pip'
-    assert section['fields']['abspath'] == '/usr/bin/yt-dlp'
-    assert '/admin/machine/binary/019d14cc-6c40-7793-8ff1-0f8bb050e8a3/change/?_changelist_filters=q%3Dyt-dlp' in section['description']
+    assert context["title"] == "yt-dlp"
+    assert section["fields"]["name"] == "yt-dlp"
+    assert section["fields"]["version"] == "2026.03.01"
+    assert section["fields"]["binprovider"] == "pip"
+    assert section["fields"]["abspath"] == "/usr/bin/yt-dlp"
+    assert "/admin/machine/binary/019d14cc-6c40-7793-8ff1-0f8bb050e8a3/change/?_changelist_filters=q%3Dyt-dlp" in section["description"]


 def test_binary_detail_view_marks_unrecorded_binary(monkeypatch):
-    request = RequestFactory().get('/admin/environment/binaries/wget/')
+    request = RequestFactory().get("/admin/environment/binaries/wget/")
    request.user = SimpleNamespace(is_superuser=True)

-    monkeypatch.setattr(config_views, 'get_db_binaries_by_name', lambda: {})
+    monkeypatch.setattr(config_views, "get_db_binaries_by_name", lambda: {})

-    context = config_views.binary_detail_view.__wrapped__(request, key='wget')
-    section = context['data'][0]
+    context = config_views.binary_detail_view.__wrapped__(request, key="wget")
+    section = context["data"][0]

-    assert section['description'] == 'No persisted Binary record found'
-    assert section['fields']['status'] == 'unrecorded'
-    assert section['fields']['binprovider'] == 'not recorded'
+    assert section["description"] == "No persisted Binary record found"
+    assert section["fields"]["status"] == "unrecorded"
+    assert section["fields"]["binprovider"] == "not recorded"


 def test_plugin_detail_view_renders_config_in_dedicated_sections(monkeypatch):
-    request = RequestFactory().get('/admin/environment/plugins/builtin.example/')
+    request = RequestFactory().get("/admin/environment/plugins/builtin.example/")
    request.user = SimpleNamespace(is_superuser=True)

    plugin_config = {
-        'title': 'Example Plugin',
-        'description': 'Example config used to verify plugin metadata rendering.',
-        'type': 'object',
-        'required_plugins': ['chrome'],
-        'required_binaries': ['example-cli'],
-        'output_mimetypes': ['text/plain', 'application/json'],
-        'properties': {
-            'EXAMPLE_ENABLED': {
-                'type': 'boolean',
-                'description': 'Enable the example plugin.',
-                'x-fallback': 'CHECK_SSL_VALIDITY',
+        "title": "Example Plugin",
+        "description": "Example config used to verify plugin metadata rendering.",
+        "type": "object",
+        "required_plugins": ["chrome"],
+        "required_binaries": ["example-cli"],
+        "output_mimetypes": ["text/plain", "application/json"],
+        "properties": {
+            "EXAMPLE_ENABLED": {
+                "type": "boolean",
+                "description": "Enable the example plugin.",
+                "x-fallback": "CHECK_SSL_VALIDITY",
            },
-            'EXAMPLE_BINARY': {
-                'type': 'string',
-                'default': 'gallery-dl',
-                'description': 'Filesystem path for example output.',
-                'x-aliases': ['USE_EXAMPLE_BINARY'],
+            "EXAMPLE_BINARY": {
+                "type": "string",
+                "default": "gallery-dl",
+                "description": "Filesystem path for example output.",
+                "x-aliases": ["USE_EXAMPLE_BINARY"],
            },
        },
    }

-    monkeypatch.setattr(config_views, 'get_filesystem_plugins', lambda: {
-        'builtin.example': {
-            'id': 'builtin.example',
-            'name': 'example',
-            'source': 'builtin',
-            'path': '/plugins/example',
-            'hooks': ['on_Snapshot__01_example.py'],
-            'config': plugin_config,
-        }
-    })
-    monkeypatch.setattr(config_views, 'get_machine_admin_url', lambda: '/admin/machine/machine/test-machine/change/')
+    monkeypatch.setattr(
+        config_views,
+        "get_filesystem_plugins",
+        lambda: {
+            "builtin.example": {
+                "id": "builtin.example",
+                "name": "example",
+                "source": "builtin",
+                "path": "/plugins/example",
+                "hooks": ["on_Snapshot__01_example.py"],
+                "config": plugin_config,
+            },
+        },
+    )
+    monkeypatch.setattr(config_views, "get_machine_admin_url", lambda: "/admin/machine/machine/test-machine/change/")

-    context = config_views.plugin_detail_view.__wrapped__(request, key='builtin.example')
+    context = config_views.plugin_detail_view.__wrapped__(request, key="builtin.example")

-    assert context['title'] == 'example'
-    assert len(context['data']) == 5
+    assert context["title"] == "example"
+    assert len(context["data"]) == 5

-    summary_section, hooks_section, metadata_section, config_section, properties_section = context['data']
+    summary_section, hooks_section, metadata_section, config_section, properties_section = context["data"]

-    assert summary_section['fields'] == {
-        'id': 'builtin.example',
-        'name': 'example',
-        'source': 'builtin',
+    assert summary_section["fields"] == {
+        "id": "builtin.example",
+        "name": "example",
+        "source": "builtin",
    }
-    assert '/plugins/example' in summary_section['description']
-    assert 'https://archivebox.github.io/abx-plugins/#example' in summary_section['description']
+    assert "/plugins/example" in summary_section["description"]
+    assert "https://archivebox.github.io/abx-plugins/#example" in summary_section["description"]

-    assert hooks_section['name'] == 'Hooks'
-    assert hooks_section['fields'] == {}
-    assert 'https://github.com/ArchiveBox/abx-plugins/tree/main/abx_plugins/plugins/example/on_Snapshot__01_example.py' in hooks_section['description']
-    assert 'on_Snapshot__01_example.py' in hooks_section['description']
+    assert hooks_section["name"] == "Hooks"
+    assert hooks_section["fields"] == {}
+    assert (
+        "https://github.com/ArchiveBox/abx-plugins/tree/main/abx_plugins/plugins/example/on_Snapshot__01_example.py"
+        in hooks_section["description"]
+    )
+    assert "on_Snapshot__01_example.py" in hooks_section["description"]

-    assert metadata_section['name'] == 'Plugin Metadata'
-    assert metadata_section['fields'] == {}
-    assert 'Example Plugin' in metadata_section['description']
-    assert 'Example config used to verify plugin metadata rendering.' in metadata_section['description']
-    assert 'https://archivebox.github.io/abx-plugins/#chrome' in metadata_section['description']
-    assert '/admin/environment/binaries/example-cli/' in metadata_section['description']
-    assert 'text/plain' in metadata_section['description']
-    assert 'application/json' in metadata_section['description']
+    assert metadata_section["name"] == "Plugin Metadata"
+    assert metadata_section["fields"] == {}
+    assert "Example Plugin" in metadata_section["description"]
+    assert "Example config used to verify plugin metadata rendering." in metadata_section["description"]
+    assert "https://archivebox.github.io/abx-plugins/#chrome" in metadata_section["description"]
+    assert "/admin/environment/binaries/example-cli/" in metadata_section["description"]
+    assert "text/plain" in metadata_section["description"]
+    assert "application/json" in metadata_section["description"]

-    assert config_section['name'] == 'config.json'
-    assert config_section['fields'] == {}
-    assert '<pre style=' in config_section['description']
-    assert 'EXAMPLE_ENABLED' in config_section['description']
-    assert '<span style="color: #0550ae;">"properties"</span>' in config_section['description']
+    assert config_section["name"] == "config.json"
+    assert config_section["fields"] == {}
+    assert "<pre style=" in config_section["description"]
+    assert "EXAMPLE_ENABLED" in config_section["description"]
+    assert '<span style="color: #0550ae;">"properties"</span>' in config_section["description"]

-    assert properties_section['name'] == 'Config Properties'
-    assert properties_section['fields'] == {}
-    assert '/admin/machine/machine/test-machine/change/' in properties_section['description']
-    assert '/admin/machine/binary/' in properties_section['description']
-    assert '/admin/environment/binaries/' in properties_section['description']
-    assert 'EXAMPLE_ENABLED' in properties_section['description']
-    assert 'boolean' in properties_section['description']
-    assert 'Enable the example plugin.' in properties_section['description']
-    assert '/admin/environment/config/EXAMPLE_ENABLED/' in properties_section['description']
-    assert '/admin/environment/config/CHECK_SSL_VALIDITY/' in properties_section['description']
-    assert '/admin/environment/config/USE_EXAMPLE_BINARY/' in properties_section['description']
-    assert '/admin/environment/binaries/gallery-dl/' in properties_section['description']
-    assert 'EXAMPLE_BINARY' in properties_section['description']
+    assert properties_section["name"] == "Config Properties"
+    assert properties_section["fields"] == {}
+    assert "/admin/machine/machine/test-machine/change/" in properties_section["description"]
+    assert "/admin/machine/binary/" in properties_section["description"]
+    assert "/admin/environment/binaries/" in properties_section["description"]
+    assert "EXAMPLE_ENABLED" in properties_section["description"]
+    assert "boolean" in properties_section["description"]
+    assert "Enable the example plugin." in properties_section["description"]
+    assert "/admin/environment/config/EXAMPLE_ENABLED/" in properties_section["description"]
+    assert "/admin/environment/config/CHECK_SSL_VALIDITY/" in properties_section["description"]
+    assert "/admin/environment/config/USE_EXAMPLE_BINARY/" in properties_section["description"]
+    assert "/admin/environment/binaries/gallery-dl/" in properties_section["description"]
+    assert "EXAMPLE_BINARY" in properties_section["description"]


 def test_get_config_definition_link_keeps_core_config_search_link(monkeypatch):
-    monkeypatch.setattr(core_views, 'find_plugin_for_config_key', lambda key: None)
+    monkeypatch.setattr(core_views, "find_plugin_for_config_key", lambda key: None)

-    url, label = core_views.get_config_definition_link('CHECK_SSL_VALIDITY')
+    url, label = core_views.get_config_definition_link("CHECK_SSL_VALIDITY")

-    assert 'github.com/search' in url
-    assert 'CHECK_SSL_VALIDITY' in url
-    assert label == 'archivebox/config'
+    assert "github.com/search" in url
+    assert "CHECK_SSL_VALIDITY" in url
+    assert label == "archivebox/config"


 def test_get_config_definition_link_uses_plugin_config_json_for_plugin_options(monkeypatch):
-    plugin_dir = core_views.BUILTIN_PLUGINS_DIR / 'parse_dom_outlinks'
+    plugin_dir = core_views.BUILTIN_PLUGINS_DIR / "parse_dom_outlinks"

-    monkeypatch.setattr(core_views, 'find_plugin_for_config_key', lambda key: 'parse_dom_outlinks')
-    monkeypatch.setattr(core_views, 'iter_plugin_dirs', lambda: [plugin_dir])
+    monkeypatch.setattr(core_views, "find_plugin_for_config_key", lambda key: "parse_dom_outlinks")
+    monkeypatch.setattr(core_views, "iter_plugin_dirs", lambda: [plugin_dir])

-    url, label = core_views.get_config_definition_link('PARSE_DOM_OUTLINKS_ENABLED')
+    url, label = core_views.get_config_definition_link("PARSE_DOM_OUTLINKS_ENABLED")

-    assert url == 'https://github.com/ArchiveBox/abx-plugins/tree/main/abx_plugins/plugins/parse_dom_outlinks/config.json'
-    assert label == 'abx_plugins/plugins/parse_dom_outlinks/config.json'
+    assert url == "https://github.com/ArchiveBox/abx-plugins/tree/main/abx_plugins/plugins/parse_dom_outlinks/config.json"
+    assert label == "abx_plugins/plugins/parse_dom_outlinks/config.json"


 def test_live_config_value_view_renames_source_field_and_uses_plugin_definition_link(monkeypatch):
-    request = RequestFactory().get('/admin/environment/config/PARSE_DOM_OUTLINKS_ENABLED/')
+    request = RequestFactory().get("/admin/environment/config/PARSE_DOM_OUTLINKS_ENABLED/")
    request.user = SimpleNamespace(is_superuser=True)

-    monkeypatch.setattr(core_views, 'get_all_configs', lambda: {})
-    monkeypatch.setattr(core_views, 'get_flat_config', lambda: {})
-    monkeypatch.setattr(core_views, 'get_config', lambda: {'PARSE_DOM_OUTLINKS_ENABLED': True})
-    monkeypatch.setattr(core_views, 'find_config_default', lambda key: 'True')
-    monkeypatch.setattr(core_views, 'find_config_type', lambda key: 'bool')
-    monkeypatch.setattr(core_views, 'find_config_source', lambda key, merged: 'Default')
-    monkeypatch.setattr(core_views, 'key_is_safe', lambda key: True)
-    monkeypatch.setattr(core_views.CONSTANTS, 'CONFIG_FILE', SimpleNamespace(exists=lambda: False))
+    monkeypatch.setattr(core_views, "get_all_configs", lambda: {})
+    monkeypatch.setattr(core_views, "get_flat_config", lambda: {})
+    monkeypatch.setattr(core_views, "get_config", lambda: {"PARSE_DOM_OUTLINKS_ENABLED": True})
+    monkeypatch.setattr(core_views, "find_config_default", lambda key: "True")
+    monkeypatch.setattr(core_views, "find_config_type", lambda key: "bool")
+    monkeypatch.setattr(core_views, "find_config_source", lambda key, merged: "Default")
+    monkeypatch.setattr(core_views, "key_is_safe", lambda key: True)
+    monkeypatch.setattr(core_views.CONSTANTS, "CONFIG_FILE", SimpleNamespace(exists=lambda: False))

    from archivebox.machine.models import Machine
    from archivebox.config.configset import BaseConfigSet

-    monkeypatch.setattr(Machine, 'current', classmethod(lambda cls: SimpleNamespace(id='machine-id', config={})))
-    monkeypatch.setattr(BaseConfigSet, 'load_from_file', classmethod(lambda cls, path: {}))
+    monkeypatch.setattr(Machine, "current", classmethod(lambda cls: SimpleNamespace(id="machine-id", config={})))
+    monkeypatch.setattr(BaseConfigSet, "load_from_file", classmethod(lambda cls, path: {}))
    monkeypatch.setattr(
        core_views,
-        'get_config_definition_link',
+        "get_config_definition_link",
        lambda key: (
-            'https://github.com/ArchiveBox/abx-plugins/tree/main/abx_plugins/plugins/parse_dom_outlinks/config.json',
-            'abx_plugins/plugins/parse_dom_outlinks/config.json',
+            "https://github.com/ArchiveBox/abx-plugins/tree/main/abx_plugins/plugins/parse_dom_outlinks/config.json",
+            "abx_plugins/plugins/parse_dom_outlinks/config.json",
        ),
    )

-    context = core_views.live_config_value_view.__wrapped__(request, key='PARSE_DOM_OUTLINKS_ENABLED')
-    section = context['data'][0]
+    context = core_views.live_config_value_view.__wrapped__(request, key="PARSE_DOM_OUTLINKS_ENABLED")
+    section = context["data"][0]

-    assert 'Currently read from' in section['fields']
-    assert 'Source' not in section['fields']
-    assert section['fields']['Currently read from'] == 'Default'
-    assert 'abx_plugins/plugins/parse_dom_outlinks/config.json' in section['help_texts']['Type']
+    assert "Currently read from" in section["fields"]
+    assert "Source" not in section["fields"]
+    assert section["fields"]["Currently read from"] == "Default"
+    assert "abx_plugins/plugins/parse_dom_outlinks/config.json" in section["help_texts"]["Type"]


 def test_find_config_source_prefers_environment_over_machine_and_file(monkeypatch):
-    monkeypatch.setenv('CHECK_SSL_VALIDITY', 'false')
+    monkeypatch.setenv("CHECK_SSL_VALIDITY", "false")

    from archivebox.machine.models import Machine
    from archivebox.config.configset import BaseConfigSet

    monkeypatch.setattr(
        Machine,
-        'current',
-        classmethod(lambda cls: SimpleNamespace(id='machine-id', config={'CHECK_SSL_VALIDITY': 'true'})),
+        "current",
+        classmethod(lambda cls: SimpleNamespace(id="machine-id", config={"CHECK_SSL_VALIDITY": "true"})),
    )
    monkeypatch.setattr(
        BaseConfigSet,
-        'load_from_file',
-        classmethod(lambda cls, path: {'CHECK_SSL_VALIDITY': 'true'}),
+        "load_from_file",
+        classmethod(lambda cls, path: {"CHECK_SSL_VALIDITY": "true"}),
    )

-    assert core_views.find_config_source('CHECK_SSL_VALIDITY', {'CHECK_SSL_VALIDITY': False}) == 'Environment'
+    assert core_views.find_config_source("CHECK_SSL_VALIDITY", {"CHECK_SSL_VALIDITY": False}) == "Environment"


 def test_live_config_value_view_priority_text_matches_runtime_precedence(monkeypatch):
-    request = RequestFactory().get('/admin/environment/config/CHECK_SSL_VALIDITY/')
+    request = RequestFactory().get("/admin/environment/config/CHECK_SSL_VALIDITY/")
    request.user = SimpleNamespace(is_superuser=True)

-    monkeypatch.setattr(core_views, 'get_all_configs', lambda: {})
-    monkeypatch.setattr(core_views, 'get_flat_config', lambda: {'CHECK_SSL_VALIDITY': True})
-    monkeypatch.setattr(core_views, 'get_config', lambda: {'CHECK_SSL_VALIDITY': False})
-    monkeypatch.setattr(core_views, 'find_config_default', lambda key: 'True')
-    monkeypatch.setattr(core_views, 'find_config_type', lambda key: 'bool')
-    monkeypatch.setattr(core_views, 'key_is_safe', lambda key: True)
+    monkeypatch.setattr(core_views, "get_all_configs", lambda: {})
+    monkeypatch.setattr(core_views, "get_flat_config", lambda: {"CHECK_SSL_VALIDITY": True})
+    monkeypatch.setattr(core_views, "get_config", lambda: {"CHECK_SSL_VALIDITY": False})
+    monkeypatch.setattr(core_views, "find_config_default", lambda key: "True")
+    monkeypatch.setattr(core_views, "find_config_type", lambda key: "bool")
+    monkeypatch.setattr(core_views, "key_is_safe", lambda key: True)

    from archivebox.machine.models import Machine
    from archivebox.config.configset import BaseConfigSet

    monkeypatch.setattr(
        Machine,
-        'current',
-        classmethod(lambda cls: SimpleNamespace(id='machine-id', config={'CHECK_SSL_VALIDITY': 'true'})),
+        "current",
+        classmethod(lambda cls: SimpleNamespace(id="machine-id", config={"CHECK_SSL_VALIDITY": "true"})),
    )
    monkeypatch.setattr(
        BaseConfigSet,
-        'load_from_file',
-        classmethod(lambda cls, path: {'CHECK_SSL_VALIDITY': 'true'}),
+        "load_from_file",
+        classmethod(lambda cls, path: {"CHECK_SSL_VALIDITY": "true"}),
    )
-    monkeypatch.setattr(core_views.CONSTANTS, 'CONFIG_FILE', SimpleNamespace(exists=lambda: True))
-    monkeypatch.setenv('CHECK_SSL_VALIDITY', 'false')
+    monkeypatch.setattr(core_views.CONSTANTS, "CONFIG_FILE", SimpleNamespace(exists=lambda: True))
+    monkeypatch.setenv("CHECK_SSL_VALIDITY", "false")

-    context = core_views.live_config_value_view.__wrapped__(request, key='CHECK_SSL_VALIDITY')
-    section = context['data'][0]
+    context = core_views.live_config_value_view.__wrapped__(request, key="CHECK_SSL_VALIDITY")
+    section = context["data"][0]

-    assert section['fields']['Currently read from'] == 'Environment'
-    help_text = section['help_texts']['Currently read from']
-    assert help_text.index('Environment') < help_text.index('Machine') < help_text.index('Config File') < help_text.index('Default')
-    assert 'Configuration Sources (highest priority first):' in section['help_texts']['Value']
+    assert section["fields"]["Currently read from"] == "Environment"
+    help_text = section["help_texts"]["Currently read from"]
+    assert help_text.index("Environment") < help_text.index("Machine") < help_text.index("Config File") < help_text.index("Default")
+    assert "Configuration Sources (highest priority first):" in section["help_texts"]["Value"]
--- a/archivebox/tests/test_crawl.py
+++ b/archivebox/tests/test_crawl.py
@@ -8,19 +8,18 @@ import sqlite3
 import pytest


-
 def test_crawl_creates_crawl_object(tmp_path, process, disable_extractors_dict):
    """Test that crawl command creates a Crawl object."""
    os.chdir(tmp_path)

    subprocess.run(
-        ['archivebox', 'crawl', '--no-wait', 'https://example.com'],
+        ["archivebox", "crawl", "--no-wait", "https://example.com"],
        capture_output=True,
        text=True,
        env=disable_extractors_dict,
    )

-    conn = sqlite3.connect('index.sqlite3')
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()
    crawl = c.execute("SELECT id, max_depth FROM crawls_crawl ORDER BY created_at DESC LIMIT 1").fetchone()
    conn.close()
@@ -33,13 +32,13 @@ def test_crawl_depth_sets_max_depth_in_crawl(tmp_path, process, disable_extracto
    os.chdir(tmp_path)

    subprocess.run(
-        ['archivebox', 'crawl', '--depth=2', '--no-wait', 'https://example.com'],
+        ["archivebox", "crawl", "--depth=2", "--no-wait", "https://example.com"],
        capture_output=True,
        text=True,
        env=disable_extractors_dict,
    )

-    conn = sqlite3.connect('index.sqlite3')
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()
    crawl = c.execute("SELECT max_depth FROM crawls_crawl ORDER BY created_at DESC LIMIT 1").fetchone()
    conn.close()
@@ -53,16 +52,18 @@ def test_crawl_creates_snapshot_for_url(tmp_path, process, disable_extractors_di
    os.chdir(tmp_path)

    subprocess.run(
-        ['archivebox', 'crawl', '--no-wait', 'https://example.com'],
+        ["archivebox", "crawl", "--no-wait", "https://example.com"],
        capture_output=True,
        text=True,
        env=disable_extractors_dict,
    )

-    conn = sqlite3.connect('index.sqlite3')
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()
-    snapshot = c.execute("SELECT url FROM core_snapshot WHERE url = ?",
-                        ('https://example.com',)).fetchone()
+    snapshot = c.execute(
+        "SELECT url FROM core_snapshot WHERE url = ?",
+        ("https://example.com",),
+    ).fetchone()
    conn.close()

    assert snapshot is not None, "Snapshot should be created for input URL"
@@ -73,13 +74,13 @@ def test_crawl_links_snapshot_to_crawl(tmp_path, process, disable_extractors_dic
    os.chdir(tmp_path)

    subprocess.run(
-        ['archivebox', 'crawl', '--no-wait', 'https://example.com'],
+        ["archivebox", "crawl", "--no-wait", "https://example.com"],
        capture_output=True,
        text=True,
        env=disable_extractors_dict,
    )

-    conn = sqlite3.connect('index.sqlite3')
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()

    # Get the crawl ID
@@ -88,8 +89,10 @@ def test_crawl_links_snapshot_to_crawl(tmp_path, process, disable_extractors_dic
    crawl_id = crawl[0]

    # Check snapshot has correct crawl_id
-    snapshot = c.execute("SELECT crawl_id FROM core_snapshot WHERE url = ?",
-                        ('https://example.com',)).fetchone()
+    snapshot = c.execute(
+        "SELECT crawl_id FROM core_snapshot WHERE url = ?",
+        ("https://example.com",),
+    ).fetchone()
    conn.close()

    assert snapshot is not None
@@ -101,22 +104,26 @@ def test_crawl_multiple_urls_creates_multiple_snapshots(tmp_path, process, disab
    os.chdir(tmp_path)

    subprocess.run(
-        ['archivebox', 'crawl', '--no-wait',
-         'https://example.com',
-         'https://iana.org'],
+        [
+            "archivebox",
+            "crawl",
+            "--no-wait",
+            "https://example.com",
+            "https://iana.org",
+        ],
        capture_output=True,
        text=True,
        env=disable_extractors_dict,
    )

-    conn = sqlite3.connect('index.sqlite3')
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()
    urls = c.execute("SELECT url FROM core_snapshot ORDER BY url").fetchall()
    conn.close()

    urls = [u[0] for u in urls]
-    assert 'https://example.com' in urls
-    assert 'https://iana.org' in urls
+    assert "https://example.com" in urls
+    assert "https://iana.org" in urls


 def test_crawl_from_file_creates_snapshot(tmp_path, process, disable_extractors_dict):
@@ -124,17 +131,17 @@ def test_crawl_from_file_creates_snapshot(tmp_path, process, disable_extractors_
    os.chdir(tmp_path)

    # Write URLs to a file
-    urls_file = tmp_path / 'urls.txt'
-    urls_file.write_text('https://example.com\n')
+    urls_file = tmp_path / "urls.txt"
+    urls_file.write_text("https://example.com\n")

    subprocess.run(
-        ['archivebox', 'crawl', '--no-wait', str(urls_file)],
+        ["archivebox", "crawl", "--no-wait", str(urls_file)],
        capture_output=True,
        text=True,
        env=disable_extractors_dict,
    )

-    conn = sqlite3.connect('index.sqlite3')
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()
    snapshot = c.execute("SELECT url FROM core_snapshot").fetchone()
    conn.close()
@@ -148,19 +155,19 @@ def test_crawl_persists_input_urls_on_crawl(tmp_path, process, disable_extractor
    os.chdir(tmp_path)

    subprocess.run(
-        ['archivebox', 'crawl', '--no-wait', 'https://example.com'],
+        ["archivebox", "crawl", "--no-wait", "https://example.com"],
        capture_output=True,
        text=True,
        env=disable_extractors_dict,
    )

-    conn = sqlite3.connect('index.sqlite3')
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()
    crawl_urls = c.execute("SELECT urls FROM crawls_crawl ORDER BY created_at DESC LIMIT 1").fetchone()
    conn.close()

    assert crawl_urls is not None, "Crawl should be created for crawl input"
-    assert 'https://example.com' in crawl_urls[0], "Crawl should persist input URLs"
+    assert "https://example.com" in crawl_urls[0], "Crawl should persist input URLs"


 class TestCrawlCLI:
@@ -171,14 +178,14 @@ class TestCrawlCLI:
        os.chdir(tmp_path)

        result = subprocess.run(
-            ['archivebox', 'crawl', '--help'],
+            ["archivebox", "crawl", "--help"],
            capture_output=True,
            text=True,
        )

        assert result.returncode == 0
-        assert 'create' in result.stdout
+        assert "create" in result.stdout


-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
--- a/archivebox/tests/test_crawl_admin.py
+++ b/archivebox/tests/test_crawl_admin.py
@@ -14,75 +14,77 @@ pytestmark = pytest.mark.django_db


 User = get_user_model()
-ADMIN_HOST = 'admin.archivebox.localhost:8000'
+ADMIN_HOST = "admin.archivebox.localhost:8000"


@pytest.fixture
 def admin_user(db):
    return cast(UserManager, User.objects).create_superuser(
-        username='crawladmin',
-        email='crawladmin@test.com',
-        password='testpassword',
+        username="crawladmin",
+        email="crawladmin@test.com",
+        password="testpassword",
    )


@pytest.fixture
 def crawl(admin_user):
    return Crawl.objects.create(
-        urls='https://example.com\nhttps://example.org',
-        tags_str='alpha,beta',
+        urls="https://example.com\nhttps://example.org",
+        tags_str="alpha,beta",
        created_by=admin_user,
    )


 def test_crawl_admin_change_view_renders_tag_editor_widget(client, admin_user, crawl):
-    client.login(username='crawladmin', password='testpassword')
+    client.login(username="crawladmin", password="testpassword")

    response = client.get(
-        reverse('admin:crawls_crawl_change', args=[crawl.pk]),
+        reverse("admin:crawls_crawl_change", args=[crawl.pk]),
        HTTP_HOST=ADMIN_HOST,
    )

    assert response.status_code == 200
    assert b'name="tags_editor"' in response.content
-    assert b'tag-editor-container' in response.content
-    assert b'alpha' in response.content
-    assert b'beta' in response.content
+    assert b"tag-editor-container" in response.content
+    assert b"alpha" in response.content
+    assert b"beta" in response.content


 def test_crawl_admin_add_view_renders_url_filter_alias_fields(client, admin_user):
-    client.login(username='crawladmin', password='testpassword')
+    client.login(username="crawladmin", password="testpassword")

    response = client.get(
-        reverse('admin:crawls_crawl_add'),
+        reverse("admin:crawls_crawl_add"),
        HTTP_HOST=ADMIN_HOST,
    )

    assert response.status_code == 200
    assert b'name="url_filters_allowlist"' in response.content
    assert b'name="url_filters_denylist"' in response.content
-    assert b'Same domain only' in response.content
+    assert b"Same domain only" in response.content


 def test_crawl_admin_form_saves_tags_editor_to_tags_str(crawl, admin_user):
    form = CrawlAdminForm(
        data={
-            'created_at': crawl.created_at.strftime('%Y-%m-%d %H:%M:%S'),
-            'urls': crawl.urls,
-            'config': '{}',
-            'max_depth': '0',
-            'tags_editor': 'alpha, beta, Alpha, gamma',
-            'url_filters_allowlist': 'example.com\n*.example.com',
-            'url_filters_denylist': 'static.example.com',
-            'persona_id': '',
-            'label': '',
-            'notes': '',
-            'schedule': '',
-            'status': crawl.status,
-            'retry_at': crawl.retry_at.strftime('%Y-%m-%d %H:%M:%S'),
-            'created_by': str(admin_user.pk),
-            'num_uses_failed': '0',
-            'num_uses_succeeded': '0',
+            "created_at": crawl.created_at.strftime("%Y-%m-%d %H:%M:%S"),
+            "urls": crawl.urls,
+            "config": "{}",
+            "max_depth": "0",
+            "max_urls": "3",
+            "max_size": str(45 * 1024 * 1024),
+            "tags_editor": "alpha, beta, Alpha, gamma",
+            "url_filters_allowlist": "example.com\n*.example.com",
+            "url_filters_denylist": "static.example.com",
+            "persona_id": "",
+            "label": "",
+            "notes": "",
+            "schedule": "",
+            "status": crawl.status,
+            "retry_at": crawl.retry_at.strftime("%Y-%m-%d %H:%M:%S"),
+            "created_by": str(admin_user.pk),
+            "num_uses_failed": "0",
+            "num_uses_succeeded": "0",
        },
        instance=crawl,
    )
@@ -91,130 +93,140 @@ def test_crawl_admin_form_saves_tags_editor_to_tags_str(crawl, admin_user):

    updated = form.save()
    updated.refresh_from_db()
-    assert updated.tags_str == 'alpha,beta,gamma'
-    assert updated.config['URL_ALLOWLIST'] == 'example.com\n*.example.com'
-    assert updated.config['URL_DENYLIST'] == 'static.example.com'
+    assert updated.tags_str == "alpha,beta,gamma"
+    assert updated.max_urls == 3
+    assert updated.max_size == 45 * 1024 * 1024
+    assert updated.config["MAX_URLS"] == 3
+    assert updated.config["MAX_SIZE"] == 45 * 1024 * 1024
+    assert updated.config["URL_ALLOWLIST"] == "example.com\n*.example.com"
+    assert updated.config["URL_DENYLIST"] == "static.example.com"


 def test_crawl_admin_delete_snapshot_action_removes_snapshot_and_url(client, admin_user):
    crawl = Crawl.objects.create(
-        urls='https://example.com/remove-me',
+        urls="https://example.com/remove-me",
        created_by=admin_user,
    )
    snapshot = Snapshot.objects.create(
        crawl=crawl,
-        url='https://example.com/remove-me',
+        url="https://example.com/remove-me",
    )

-    client.login(username='crawladmin', password='testpassword')
+    client.login(username="crawladmin", password="testpassword")
    response = client.post(
-        reverse('admin:crawls_crawl_snapshot_delete', args=[crawl.pk, snapshot.pk]),
+        reverse("admin:crawls_crawl_snapshot_delete", args=[crawl.pk, snapshot.pk]),
        HTTP_HOST=ADMIN_HOST,
    )

    assert response.status_code == 200
-    assert response.json()['ok'] is True
+    assert response.json()["ok"] is True
    assert not Snapshot.objects.filter(pk=snapshot.pk).exists()

    crawl.refresh_from_db()
-    assert 'https://example.com/remove-me' not in crawl.urls
+    assert "https://example.com/remove-me" not in crawl.urls


 def test_crawl_admin_exclude_domain_action_prunes_urls_and_pending_snapshots(client, admin_user):
    crawl = Crawl.objects.create(
-        urls='\n'.join([
-            'https://cdn.example.com/asset.js',
-            'https://cdn.example.com/second.js',
-            'https://example.com/root',
-        ]),
+        urls="\n".join(
+            [
+                "https://cdn.example.com/asset.js",
+                "https://cdn.example.com/second.js",
+                "https://example.com/root",
+            ],
+        ),
        created_by=admin_user,
    )
    queued_snapshot = Snapshot.objects.create(
        crawl=crawl,
-        url='https://cdn.example.com/asset.js',
+        url="https://cdn.example.com/asset.js",
        status=Snapshot.StatusChoices.QUEUED,
    )
    preserved_snapshot = Snapshot.objects.create(
        crawl=crawl,
-        url='https://example.com/root',
+        url="https://example.com/root",
        status=Snapshot.StatusChoices.SEALED,
    )

-    client.login(username='crawladmin', password='testpassword')
+    client.login(username="crawladmin", password="testpassword")
    response = client.post(
-        reverse('admin:crawls_crawl_snapshot_exclude_domain', args=[crawl.pk, queued_snapshot.pk]),
+        reverse("admin:crawls_crawl_snapshot_exclude_domain", args=[crawl.pk, queued_snapshot.pk]),
        HTTP_HOST=ADMIN_HOST,
    )

    assert response.status_code == 200
    payload = response.json()
-    assert payload['ok'] is True
-    assert payload['domain'] == 'cdn.example.com'
+    assert payload["ok"] is True
+    assert payload["domain"] == "cdn.example.com"

    crawl.refresh_from_db()
-    assert crawl.get_url_denylist(use_effective_config=False) == ['cdn.example.com']
-    assert 'https://cdn.example.com/asset.js' not in crawl.urls
-    assert 'https://cdn.example.com/second.js' not in crawl.urls
-    assert 'https://example.com/root' in crawl.urls
+    assert crawl.get_url_denylist(use_effective_config=False) == ["cdn.example.com"]
+    assert "https://cdn.example.com/asset.js" not in crawl.urls
+    assert "https://cdn.example.com/second.js" not in crawl.urls
+    assert "https://example.com/root" in crawl.urls
    assert not Snapshot.objects.filter(pk=queued_snapshot.pk).exists()
    assert Snapshot.objects.filter(pk=preserved_snapshot.pk).exists()


 def test_snapshot_from_json_trims_markdown_suffixes_on_discovered_urls(crawl):
    snapshot = Snapshot.from_json(
-        {'url': 'https://docs.sweeting.me/s/youtube-favorites)**'},
-        overrides={'crawl': crawl},
+        {"url": "https://docs.sweeting.me/s/youtube-favorites)**"},
+        overrides={"crawl": crawl},
        queue_for_extraction=False,
    )

    assert snapshot is not None
-    assert snapshot.url == 'https://docs.sweeting.me/s/youtube-favorites'
+    assert snapshot.url == "https://docs.sweeting.me/s/youtube-favorites"


 def test_create_snapshots_from_urls_respects_url_allowlist_and_denylist(admin_user):
    crawl = Crawl.objects.create(
-        urls='\n'.join([
-            'https://example.com/root',
-            'https://static.example.com/app.js',
-            'https://other.test/page',
-        ]),
+        urls="\n".join(
+            [
+                "https://example.com/root",
+                "https://static.example.com/app.js",
+                "https://other.test/page",
+            ],
+        ),
        created_by=admin_user,
        config={
-            'URL_ALLOWLIST': 'example.com',
-            'URL_DENYLIST': 'static.example.com',
+            "URL_ALLOWLIST": "example.com",
+            "URL_DENYLIST": "static.example.com",
        },
    )

    created = crawl.create_snapshots_from_urls()

-    assert [snapshot.url for snapshot in created] == ['https://example.com/root']
+    assert [snapshot.url for snapshot in created] == ["https://example.com/root"]


 def test_url_filter_regex_lists_preserve_commas_and_split_on_newlines_only(admin_user):
    crawl = Crawl.objects.create(
-        urls='\n'.join([
-            'https://example.com/root',
-            'https://example.com/path,with,commas',
-            'https://other.test/page',
-        ]),
+        urls="\n".join(
+            [
+                "https://example.com/root",
+                "https://example.com/path,with,commas",
+                "https://other.test/page",
+            ],
+        ),
        created_by=admin_user,
        config={
-            'URL_ALLOWLIST': r'^https://example\.com/(root|path,with,commas)$' + '\n' + r'^https://other\.test/page$',
-            'URL_DENYLIST': r'^https://example\.com/path,with,commas$',
+            "URL_ALLOWLIST": r"^https://example\.com/(root|path,with,commas)$" + "\n" + r"^https://other\.test/page$",
+            "URL_DENYLIST": r"^https://example\.com/path,with,commas$",
        },
    )

    assert crawl.get_url_allowlist(use_effective_config=False) == [
-        r'^https://example\.com/(root|path,with,commas)$',
-        r'^https://other\.test/page$',
+        r"^https://example\.com/(root|path,with,commas)$",
+        r"^https://other\.test/page$",
    ]
    assert crawl.get_url_denylist(use_effective_config=False) == [
-        r'^https://example\.com/path,with,commas$',
+        r"^https://example\.com/path,with,commas$",
    ]

    created = crawl.create_snapshots_from_urls()

    assert [snapshot.url for snapshot in created] == [
-        'https://example.com/root',
-        'https://other.test/page',
+        "https://example.com/root",
+        "https://other.test/page",
    ]
--- a/archivebox/tests/test_hooks.py
+++ b/archivebox/tests/test_hooks.py
@@ -19,7 +19,7 @@ from pathlib import Path
 from unittest.mock import patch

 # Set up Django before importing any Django-dependent modules
-os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'archivebox.settings')
+os.environ.setdefault("DJANGO_SETTINGS_MODULE", "archivebox.settings")


 class TestBackgroundHookDetection(unittest.TestCase):
@@ -28,32 +28,38 @@ class TestBackgroundHookDetection(unittest.TestCase):
    def test_bg_js_suffix_detected(self):
        """Hooks with .bg.js suffix should be detected as background."""
        from archivebox.hooks import is_background_hook
-        self.assertTrue(is_background_hook('on_Snapshot__21_consolelog.daemon.bg.js'))
+
+        self.assertTrue(is_background_hook("on_Snapshot__21_consolelog.daemon.bg.js"))

    def test_bg_py_suffix_detected(self):
        """Hooks with .bg.py suffix should be detected as background."""
        from archivebox.hooks import is_background_hook
-        self.assertTrue(is_background_hook('on_Snapshot__24_responses.finite.bg.py'))
+
+        self.assertTrue(is_background_hook("on_Snapshot__24_responses.finite.bg.py"))

    def test_bg_sh_suffix_detected(self):
        """Hooks with .bg.sh suffix should be detected as background."""
        from archivebox.hooks import is_background_hook
-        self.assertTrue(is_background_hook('on_Snapshot__23_ssl.daemon.bg.sh'))
+
+        self.assertTrue(is_background_hook("on_Snapshot__23_ssl.daemon.bg.sh"))

    def test_legacy_background_suffix_detected(self):
        """Hooks with __background in stem should be detected (backwards compat)."""
        from archivebox.hooks import is_background_hook
-        self.assertTrue(is_background_hook('on_Snapshot__21_consolelog__background.js'))
+
+        self.assertTrue(is_background_hook("on_Snapshot__21_consolelog__background.js"))

    def test_foreground_hook_not_detected(self):
        """Hooks without .bg. or __background should NOT be detected as background."""
        from archivebox.hooks import is_background_hook
-        self.assertFalse(is_background_hook('on_Snapshot__11_favicon.js'))
+
+        self.assertFalse(is_background_hook("on_Snapshot__11_favicon.js"))

    def test_foreground_py_hook_not_detected(self):
        """Python hooks without .bg. should NOT be detected as background."""
        from archivebox.hooks import is_background_hook
-        self.assertFalse(is_background_hook('on_Snapshot__50_wget.py'))
+
+        self.assertFalse(is_background_hook("on_Snapshot__50_wget.py"))


 class TestJSONLParsing(unittest.TestCase):
@@ -63,56 +69,61 @@ class TestJSONLParsing(unittest.TestCase):
        """Clean JSONL format should be parsed correctly."""
        stdout = '{"type": "ArchiveResult", "status": "succeeded", "output_str": "Done"}'
        from archivebox.machine.models import Process
+
        records = Process.parse_records_from_text(stdout)

        self.assertEqual(len(records), 1)
-        self.assertEqual(records[0]['type'], 'ArchiveResult')
-        self.assertEqual(records[0]['status'], 'succeeded')
-        self.assertEqual(records[0]['output_str'], 'Done')
+        self.assertEqual(records[0]["type"], "ArchiveResult")
+        self.assertEqual(records[0]["status"], "succeeded")
+        self.assertEqual(records[0]["output_str"], "Done")

    def test_parse_multiple_jsonl_records(self):
        """Multiple JSONL records should all be parsed."""
-        stdout = '''{"type": "ArchiveResult", "status": "succeeded", "output_str": "Done"}
-{"type": "Binary", "name": "wget", "abspath": "/usr/bin/wget"}'''
+        stdout = """{"type": "ArchiveResult", "status": "succeeded", "output_str": "Done"}
+{"type": "Binary", "name": "wget", "abspath": "/usr/bin/wget"}"""
        from archivebox.machine.models import Process
+
        records = Process.parse_records_from_text(stdout)

        self.assertEqual(len(records), 2)
-        self.assertEqual(records[0]['type'], 'ArchiveResult')
-        self.assertEqual(records[1]['type'], 'Binary')
+        self.assertEqual(records[0]["type"], "ArchiveResult")
+        self.assertEqual(records[1]["type"], "Binary")

    def test_parse_jsonl_with_log_output(self):
        """JSONL should be extracted from mixed stdout with log lines."""
-        stdout = '''Starting hook execution...
+        stdout = """Starting hook execution...
 Processing URL: https://example.com
 {"type": "ArchiveResult", "status": "succeeded", "output_str": "Downloaded"}
-Hook completed successfully'''
+Hook completed successfully"""
        from archivebox.machine.models import Process
+
        records = Process.parse_records_from_text(stdout)

        self.assertEqual(len(records), 1)
-        self.assertEqual(records[0]['status'], 'succeeded')
+        self.assertEqual(records[0]["status"], "succeeded")

    def test_ignore_invalid_json(self):
        """Invalid JSON should be silently ignored."""
-        stdout = '''{"type": "ArchiveResult", "status": "succeeded"}
+        stdout = """{"type": "ArchiveResult", "status": "succeeded"}
 {invalid json here}
 not json at all
-{"type": "Binary", "name": "wget"}'''
+{"type": "Binary", "name": "wget"}"""
        from archivebox.machine.models import Process
+
        records = Process.parse_records_from_text(stdout)

        self.assertEqual(len(records), 2)

    def test_json_without_type_ignored(self):
        """JSON objects without 'type' field should be ignored."""
-        stdout = '''{"status": "succeeded", "output_str": "Done"}
-{"type": "ArchiveResult", "status": "succeeded"}'''
+        stdout = """{"status": "succeeded", "output_str": "Done"}
+{"type": "ArchiveResult", "status": "succeeded"}"""
        from archivebox.machine.models import Process
+
        records = Process.parse_records_from_text(stdout)

        self.assertEqual(len(records), 1)
-        self.assertEqual(records[0]['type'], 'ArchiveResult')
+        self.assertEqual(records[0]["type"], "ArchiveResult")


 class TestInstallHookEnvVarHandling(unittest.TestCase):
@@ -121,7 +132,7 @@ class TestInstallHookEnvVarHandling(unittest.TestCase):
    def setUp(self):
        """Set up test environment."""
        self.work_dir = Path(tempfile.mkdtemp())
-        self.test_hook = self.work_dir / 'test_hook.py'
+        self.test_hook = self.work_dir / "test_hook.py"

    def tearDown(self):
        """Clean up test environment."""
@@ -130,37 +141,37 @@ class TestInstallHookEnvVarHandling(unittest.TestCase):
    def test_binary_env_var_absolute_path_handling(self):
        """Install hooks should handle absolute paths in XYZ_BINARY."""
        # Test the logic that install hooks use
-        configured_binary = '/custom/path/to/wget2'
-        if '/' in configured_binary:
+        configured_binary = "/custom/path/to/wget2"
+        if "/" in configured_binary:
            bin_name = Path(configured_binary).name
        else:
            bin_name = configured_binary

-        self.assertEqual(bin_name, 'wget2')
+        self.assertEqual(bin_name, "wget2")

    def test_binary_env_var_name_only_handling(self):
        """Install hooks should handle binary names in XYZ_BINARY."""
        # Test the logic that install hooks use
-        configured_binary = 'wget2'
-        if '/' in configured_binary:
+        configured_binary = "wget2"
+        if "/" in configured_binary:
            bin_name = Path(configured_binary).name
        else:
            bin_name = configured_binary

-        self.assertEqual(bin_name, 'wget2')
+        self.assertEqual(bin_name, "wget2")

    def test_binary_env_var_empty_default(self):
        """Install hooks should use default when XYZ_BINARY is empty."""
-        configured_binary = ''
+        configured_binary = ""
        if configured_binary:
-            if '/' in configured_binary:
+            if "/" in configured_binary:
                bin_name = Path(configured_binary).name
            else:
                bin_name = configured_binary
        else:
-            bin_name = 'wget'  # default
+            bin_name = "wget"  # default

-        self.assertEqual(bin_name, 'wget')
+        self.assertEqual(bin_name, "wget")


 class TestHookDiscovery(unittest.TestCase):
@@ -169,22 +180,22 @@ class TestHookDiscovery(unittest.TestCase):
    def setUp(self):
        """Set up test plugin directory."""
        self.test_dir = Path(tempfile.mkdtemp())
-        self.plugins_dir = self.test_dir / 'plugins'
+        self.plugins_dir = self.test_dir / "plugins"
        self.plugins_dir.mkdir()

        # Create test plugin structure
-        wget_dir = self.plugins_dir / 'wget'
+        wget_dir = self.plugins_dir / "wget"
        wget_dir.mkdir()
-        (wget_dir / 'on_Snapshot__50_wget.py').write_text('# test hook')
-        (wget_dir / 'on_Crawl__10_wget_install.finite.bg.py').write_text('# install hook')
+        (wget_dir / "on_Snapshot__50_wget.py").write_text("# test hook")
+        (wget_dir / "on_Crawl__10_wget_install.finite.bg.py").write_text("# install hook")

-        chrome_dir = self.plugins_dir / 'chrome'
+        chrome_dir = self.plugins_dir / "chrome"
        chrome_dir.mkdir(exist_ok=True)
-        (chrome_dir / 'on_Snapshot__20_chrome_tab.daemon.bg.js').write_text('// background hook')
+        (chrome_dir / "on_Snapshot__20_chrome_tab.daemon.bg.js").write_text("// background hook")

-        consolelog_dir = self.plugins_dir / 'consolelog'
+        consolelog_dir = self.plugins_dir / "consolelog"
        consolelog_dir.mkdir()
-        (consolelog_dir / 'on_Snapshot__21_consolelog.daemon.bg.js').write_text('// background hook')
+        (consolelog_dir / "on_Snapshot__21_consolelog.daemon.bg.js").write_text("// background hook")

    def tearDown(self):
        """Clean up test directory."""
@@ -194,109 +205,118 @@ class TestHookDiscovery(unittest.TestCase):
        """discover_hooks() should find all hooks for an event."""
        # Use the local implementation since we can't easily mock BUILTIN_PLUGINS_DIR
        hooks = []
-        for ext in ('sh', 'py', 'js'):
-            pattern = f'*/on_Snapshot__*.{ext}'
+        for ext in ("sh", "py", "js"):
+            pattern = f"*/on_Snapshot__*.{ext}"
            hooks.extend(self.plugins_dir.glob(pattern))

        hooks = sorted(set(hooks), key=lambda p: p.name)

        self.assertEqual(len(hooks), 3)
        hook_names = [h.name for h in hooks]
-        self.assertIn('on_Snapshot__20_chrome_tab.daemon.bg.js', hook_names)
-        self.assertIn('on_Snapshot__21_consolelog.daemon.bg.js', hook_names)
-        self.assertIn('on_Snapshot__50_wget.py', hook_names)
+        self.assertIn("on_Snapshot__20_chrome_tab.daemon.bg.js", hook_names)
+        self.assertIn("on_Snapshot__21_consolelog.daemon.bg.js", hook_names)
+        self.assertIn("on_Snapshot__50_wget.py", hook_names)

    def test_discover_hooks_sorted_by_name(self):
        """Hooks should be sorted by filename (numeric prefix ordering)."""
        hooks = []
-        for ext in ('sh', 'py', 'js'):
-            pattern = f'*/on_Snapshot__*.{ext}'
+        for ext in ("sh", "py", "js"):
+            pattern = f"*/on_Snapshot__*.{ext}"
            hooks.extend(self.plugins_dir.glob(pattern))

        hooks = sorted(set(hooks), key=lambda p: p.name)

        # Check numeric ordering
-        self.assertEqual(hooks[0].name, 'on_Snapshot__20_chrome_tab.daemon.bg.js')
-        self.assertEqual(hooks[1].name, 'on_Snapshot__21_consolelog.daemon.bg.js')
-        self.assertEqual(hooks[2].name, 'on_Snapshot__50_wget.py')
+        self.assertEqual(hooks[0].name, "on_Snapshot__20_chrome_tab.daemon.bg.js")
+        self.assertEqual(hooks[1].name, "on_Snapshot__21_consolelog.daemon.bg.js")
+        self.assertEqual(hooks[2].name, "on_Snapshot__50_wget.py")

    def test_get_plugins_includes_non_snapshot_plugin_dirs(self):
        """get_plugins() should include binary-only plugins with standardized metadata."""
-        env_dir = self.plugins_dir / 'env'
+        env_dir = self.plugins_dir / "env"
        env_dir.mkdir()
-        (env_dir / 'on_Binary__15_env_discover.py').write_text('# binary hook')
-        (env_dir / 'config.json').write_text('{"type": "object", "properties": {}}')
+        (env_dir / "on_Binary__15_env_discover.py").write_text("# binary hook")
+        (env_dir / "config.json").write_text('{"type": "object", "properties": {}}')

        from archivebox import hooks as hooks_module

        hooks_module.get_plugins.cache_clear()
-        with patch.object(hooks_module, 'BUILTIN_PLUGINS_DIR', self.plugins_dir), patch.object(hooks_module, 'USER_PLUGINS_DIR', self.test_dir / 'user_plugins'):
+        with (
+            patch.object(hooks_module, "BUILTIN_PLUGINS_DIR", self.plugins_dir),
+            patch.object(hooks_module, "USER_PLUGINS_DIR", self.test_dir / "user_plugins"),
+        ):
            plugins = hooks_module.get_plugins()

-        self.assertIn('env', plugins)
+        self.assertIn("env", plugins)

    def test_discover_binary_hooks_ignores_plugins_whitelist(self):
        """Binary provider hooks should remain discoverable under --plugins filtering."""
-        singlefile_dir = self.plugins_dir / 'singlefile'
+        singlefile_dir = self.plugins_dir / "singlefile"
        singlefile_dir.mkdir()
-        (singlefile_dir / 'config.json').write_text(
+        (singlefile_dir / "config.json").write_text(
            json.dumps(
                {
                    "type": "object",
                    "required_plugins": ["chrome"],
                    "properties": {},
-                }
-            )
+                },
+            ),
        )

-        npm_dir = self.plugins_dir / 'npm'
+        npm_dir = self.plugins_dir / "npm"
        npm_dir.mkdir()
-        (npm_dir / 'on_Binary__10_npm_install.py').write_text('# npm binary hook')
-        (npm_dir / 'config.json').write_text('{"type": "object", "properties": {}}')
+        (npm_dir / "on_Binary__10_npm_install.py").write_text("# npm binary hook")
+        (npm_dir / "config.json").write_text('{"type": "object", "properties": {}}')

        from archivebox import hooks as hooks_module

        hooks_module.get_plugins.cache_clear()
-        with patch.object(hooks_module, 'BUILTIN_PLUGINS_DIR', self.plugins_dir), patch.object(hooks_module, 'USER_PLUGINS_DIR', self.test_dir / 'user_plugins'):
-            hooks = hooks_module.discover_hooks('Binary', config={'PLUGINS': 'singlefile'})
+        with (
+            patch.object(hooks_module, "BUILTIN_PLUGINS_DIR", self.plugins_dir),
+            patch.object(hooks_module, "USER_PLUGINS_DIR", self.test_dir / "user_plugins"),
+        ):
+            hooks = hooks_module.discover_hooks("Binary", config={"PLUGINS": "singlefile"})

        hook_names = [hook.name for hook in hooks]
-        self.assertIn('on_Binary__10_npm_install.py', hook_names)
+        self.assertIn("on_Binary__10_npm_install.py", hook_names)

    def test_discover_crawl_hooks_only_include_declared_plugin_dependencies(self):
        """Crawl hook discovery should include required_plugins without broadening to provider plugins."""
-        responses_dir = self.plugins_dir / 'responses'
+        responses_dir = self.plugins_dir / "responses"
        responses_dir.mkdir()
-        (responses_dir / 'config.json').write_text(
+        (responses_dir / "config.json").write_text(
            json.dumps(
                {
                    "type": "object",
                    "required_plugins": ["chrome"],
                    "properties": {},
-                }
-            )
+                },
+            ),
        )

-        chrome_dir = self.plugins_dir / 'chrome'
+        chrome_dir = self.plugins_dir / "chrome"
        chrome_dir.mkdir(exist_ok=True)
-        (chrome_dir / 'config.json').write_text('{"type": "object", "properties": {}}')
-        (chrome_dir / 'on_Crawl__70_chrome_install.finite.bg.py').write_text('# chrome crawl hook')
+        (chrome_dir / "config.json").write_text('{"type": "object", "properties": {}}')
+        (chrome_dir / "on_Crawl__70_chrome_install.finite.bg.py").write_text("# chrome crawl hook")

-        npm_dir = self.plugins_dir / 'npm'
+        npm_dir = self.plugins_dir / "npm"
        npm_dir.mkdir()
-        (npm_dir / 'on_Binary__10_npm_install.py').write_text('# npm binary hook')
-        (npm_dir / 'on_Crawl__00_npm_install.py').write_text('# npm crawl hook')
-        (npm_dir / 'config.json').write_text('{"type": "object", "properties": {}}')
+        (npm_dir / "on_Binary__10_npm_install.py").write_text("# npm binary hook")
+        (npm_dir / "on_Crawl__00_npm_install.py").write_text("# npm crawl hook")
+        (npm_dir / "config.json").write_text('{"type": "object", "properties": {}}')

        from archivebox import hooks as hooks_module

        hooks_module.get_plugins.cache_clear()
-        with patch.object(hooks_module, 'BUILTIN_PLUGINS_DIR', self.plugins_dir), patch.object(hooks_module, 'USER_PLUGINS_DIR', self.test_dir / 'user_plugins'):
-            hooks = hooks_module.discover_hooks('Crawl', config={'PLUGINS': 'responses'})
+        with (
+            patch.object(hooks_module, "BUILTIN_PLUGINS_DIR", self.plugins_dir),
+            patch.object(hooks_module, "USER_PLUGINS_DIR", self.test_dir / "user_plugins"),
+        ):
+            hooks = hooks_module.discover_hooks("Crawl", config={"PLUGINS": "responses"})

        hook_names = [hook.name for hook in hooks]
-        self.assertIn('on_Crawl__70_chrome_install.finite.bg.py', hook_names)
-        self.assertNotIn('on_Crawl__00_npm_install.py', hook_names)
+        self.assertIn("on_Crawl__70_chrome_install.finite.bg.py", hook_names)
+        self.assertNotIn("on_Crawl__00_npm_install.py", hook_names)


 class TestGetExtractorName(unittest.TestCase):
@@ -304,27 +324,29 @@ class TestGetExtractorName(unittest.TestCase):

    def test_strip_numeric_prefix(self):
        """Numeric prefix should be stripped from extractor name."""
+
        # Inline implementation of get_extractor_name
        def get_extractor_name(extractor: str) -> str:
-            parts = extractor.split('_', 1)
+            parts = extractor.split("_", 1)
            if len(parts) == 2 and parts[0].isdigit():
                return parts[1]
            return extractor

-        self.assertEqual(get_extractor_name('10_title'), 'title')
-        self.assertEqual(get_extractor_name('26_readability'), 'readability')
-        self.assertEqual(get_extractor_name('50_parse_html_urls'), 'parse_html_urls')
+        self.assertEqual(get_extractor_name("10_title"), "title")
+        self.assertEqual(get_extractor_name("26_readability"), "readability")
+        self.assertEqual(get_extractor_name("50_parse_html_urls"), "parse_html_urls")

    def test_no_prefix_unchanged(self):
        """Extractor without numeric prefix should be unchanged."""
+
        def get_extractor_name(extractor: str) -> str:
-            parts = extractor.split('_', 1)
+            parts = extractor.split("_", 1)
            if len(parts) == 2 and parts[0].isdigit():
                return parts[1]
            return extractor

-        self.assertEqual(get_extractor_name('title'), 'title')
-        self.assertEqual(get_extractor_name('readability'), 'readability')
+        self.assertEqual(get_extractor_name("title"), "title")
+        self.assertEqual(get_extractor_name("readability"), "readability")


 class TestHookExecution(unittest.TestCase):
@@ -340,14 +362,14 @@ class TestHookExecution(unittest.TestCase):

    def test_python_hook_execution(self):
        """Python hook should execute and output JSONL."""
-        hook_path = self.work_dir / 'test_hook.py'
-        hook_path.write_text('''#!/usr/bin/env python3
+        hook_path = self.work_dir / "test_hook.py"
+        hook_path.write_text("""#!/usr/bin/env python3
 import json
 print(json.dumps({"type": "ArchiveResult", "status": "succeeded", "output_str": "Test passed"}))
-''')
+""")

        result = subprocess.run(
-            ['python3', str(hook_path)],
+            ["python3", str(hook_path)],
            cwd=str(self.work_dir),
            capture_output=True,
            text=True,
@@ -355,24 +377,25 @@ print(json.dumps({"type": "ArchiveResult", "status": "succeeded", "output_str":

        self.assertEqual(result.returncode, 0)
        from archivebox.machine.models import Process
+
        records = Process.parse_records_from_text(result.stdout)
        self.assertTrue(records)
-        self.assertEqual(records[0]['type'], 'ArchiveResult')
-        self.assertEqual(records[0]['status'], 'succeeded')
+        self.assertEqual(records[0]["type"], "ArchiveResult")
+        self.assertEqual(records[0]["status"], "succeeded")

    def test_js_hook_execution(self):
        """JavaScript hook should execute and output JSONL."""
        # Skip if node not available
-        if shutil.which('node') is None:
-            self.skipTest('Node.js not available')
+        if shutil.which("node") is None:
+            self.skipTest("Node.js not available")

-        hook_path = self.work_dir / 'test_hook.js'
-        hook_path.write_text('''#!/usr/bin/env node
+        hook_path = self.work_dir / "test_hook.js"
+        hook_path.write_text("""#!/usr/bin/env node
 console.log(JSON.stringify({type: 'ArchiveResult', status: 'succeeded', output_str: 'JS test'}));
-''')
+""")

        result = subprocess.run(
-            ['node', str(hook_path)],
+            ["node", str(hook_path)],
            cwd=str(self.work_dir),
            capture_output=True,
            text=True,
@@ -380,15 +403,16 @@ console.log(JSON.stringify({type: 'ArchiveResult', status: 'succeeded', output_s

        self.assertEqual(result.returncode, 0)
        from archivebox.machine.models import Process
+
        records = Process.parse_records_from_text(result.stdout)
        self.assertTrue(records)
-        self.assertEqual(records[0]['type'], 'ArchiveResult')
-        self.assertEqual(records[0]['status'], 'succeeded')
+        self.assertEqual(records[0]["type"], "ArchiveResult")
+        self.assertEqual(records[0]["status"], "succeeded")

    def test_hook_receives_cli_args(self):
        """Hook should receive CLI arguments."""
-        hook_path = self.work_dir / 'test_hook.py'
-        hook_path.write_text('''#!/usr/bin/env python3
+        hook_path = self.work_dir / "test_hook.py"
+        hook_path.write_text("""#!/usr/bin/env python3
 import sys
 import json
 # Simple arg parsing
@@ -398,10 +422,10 @@ for arg in sys.argv[1:]:
        key, val = arg[2:].split('=', 1)
        args[key.replace('-', '_')] = val
 print(json.dumps({"type": "ArchiveResult", "status": "succeeded", "url": args.get("url", "")}))
-''')
+""")

        result = subprocess.run(
-            ['python3', str(hook_path), '--url=https://example.com'],
+            ["python3", str(hook_path), "--url=https://example.com"],
            cwd=str(self.work_dir),
            capture_output=True,
            text=True,
@@ -409,9 +433,10 @@ print(json.dumps({"type": "ArchiveResult", "status": "succeeded", "url": args.ge

        self.assertEqual(result.returncode, 0)
        from archivebox.machine.models import Process
+
        records = Process.parse_records_from_text(result.stdout)
        self.assertTrue(records)
-        self.assertEqual(records[0]['url'], 'https://example.com')
+        self.assertEqual(records[0]["url"], "https://example.com")


 class TestInstallHookOutput(unittest.TestCase):
@@ -427,35 +452,41 @@ class TestInstallHookOutput(unittest.TestCase):

    def test_install_hook_outputs_binary(self):
        """Install hook should output Binary JSONL when binary found."""
-        hook_output = json.dumps({
-            'type': 'Binary',
-            'name': 'wget',
-            'abspath': '/usr/bin/wget',
-            'version': '1.21.3',
-            'sha256': None,
-            'binprovider': 'apt',
-        })
+        hook_output = json.dumps(
+            {
+                "type": "Binary",
+                "name": "wget",
+                "abspath": "/usr/bin/wget",
+                "version": "1.21.3",
+                "sha256": None,
+                "binprovider": "apt",
+            },
+        )

        from archivebox.machine.models import Process
+
        data = Process.parse_records_from_text(hook_output)[0]
-        self.assertEqual(data['type'], 'Binary')
-        self.assertEqual(data['name'], 'wget')
-        self.assertTrue(data['abspath'].startswith('/'))
+        self.assertEqual(data["type"], "Binary")
+        self.assertEqual(data["name"], "wget")
+        self.assertTrue(data["abspath"].startswith("/"))

    def test_install_hook_outputs_machine_config(self):
        """Install hook should output Machine config update JSONL."""
-        hook_output = json.dumps({
-            'type': 'Machine',
-            'config': {
-                'WGET_BINARY': '/usr/bin/wget',
+        hook_output = json.dumps(
+            {
+                "type": "Machine",
+                "config": {
+                    "WGET_BINARY": "/usr/bin/wget",
+                },
            },
-        })
+        )

        from archivebox.machine.models import Process
+
        data = Process.parse_records_from_text(hook_output)[0]
-        self.assertEqual(data['type'], 'Machine')
-        self.assertIn('config', data)
-        self.assertEqual(data['config']['WGET_BINARY'], '/usr/bin/wget')
+        self.assertEqual(data["type"], "Machine")
+        self.assertIn("config", data)
+        self.assertEqual(data["config"]["WGET_BINARY"], "/usr/bin/wget")


 class TestSnapshotHookOutput(unittest.TestCase):
@@ -463,75 +494,90 @@ class TestSnapshotHookOutput(unittest.TestCase):

    def test_snapshot_hook_basic_output(self):
        """Snapshot hook should output clean ArchiveResult JSONL."""
-        hook_output = json.dumps({
-            'type': 'ArchiveResult',
-            'status': 'succeeded',
-            'output_str': 'Downloaded 5 files',
-        })
+        hook_output = json.dumps(
+            {
+                "type": "ArchiveResult",
+                "status": "succeeded",
+                "output_str": "Downloaded 5 files",
+            },
+        )

        from archivebox.machine.models import Process
+
        data = Process.parse_records_from_text(hook_output)[0]
-        self.assertEqual(data['type'], 'ArchiveResult')
-        self.assertEqual(data['status'], 'succeeded')
-        self.assertIn('output_str', data)
+        self.assertEqual(data["type"], "ArchiveResult")
+        self.assertEqual(data["status"], "succeeded")
+        self.assertIn("output_str", data)

    def test_snapshot_hook_with_cmd(self):
        """Snapshot hook should include cmd for binary FK lookup."""
-        hook_output = json.dumps({
-            'type': 'ArchiveResult',
-            'status': 'succeeded',
-            'output_str': 'Archived with wget',
-            'cmd': ['/usr/bin/wget', '-p', '-k', 'https://example.com'],
-        })
+        hook_output = json.dumps(
+            {
+                "type": "ArchiveResult",
+                "status": "succeeded",
+                "output_str": "Archived with wget",
+                "cmd": ["/usr/bin/wget", "-p", "-k", "https://example.com"],
+            },
+        )

        from archivebox.machine.models import Process
+
        data = Process.parse_records_from_text(hook_output)[0]
-        self.assertEqual(data['type'], 'ArchiveResult')
-        self.assertIsInstance(data['cmd'], list)
-        self.assertEqual(data['cmd'][0], '/usr/bin/wget')
+        self.assertEqual(data["type"], "ArchiveResult")
+        self.assertIsInstance(data["cmd"], list)
+        self.assertEqual(data["cmd"][0], "/usr/bin/wget")

    def test_snapshot_hook_with_output_json(self):
        """Snapshot hook can include structured metadata in output_json."""
-        hook_output = json.dumps({
-            'type': 'ArchiveResult',
-            'status': 'succeeded',
-            'output_str': 'Got headers',
-            'output_json': {
-                'content-type': 'text/html',
-                'server': 'nginx',
-                'status-code': 200,
+        hook_output = json.dumps(
+            {
+                "type": "ArchiveResult",
+                "status": "succeeded",
+                "output_str": "Got headers",
+                "output_json": {
+                    "content-type": "text/html",
+                    "server": "nginx",
+                    "status-code": 200,
+                },
            },
-        })
+        )

        from archivebox.machine.models import Process
+
        data = Process.parse_records_from_text(hook_output)[0]
-        self.assertEqual(data['type'], 'ArchiveResult')
-        self.assertIsInstance(data['output_json'], dict)
-        self.assertEqual(data['output_json']['status-code'], 200)
+        self.assertEqual(data["type"], "ArchiveResult")
+        self.assertIsInstance(data["output_json"], dict)
+        self.assertEqual(data["output_json"]["status-code"], 200)

    def test_snapshot_hook_skipped_status(self):
        """Snapshot hook should support skipped status."""
-        hook_output = json.dumps({
-            'type': 'ArchiveResult',
-            'status': 'skipped',
-            'output_str': 'SAVE_WGET=False',
-        })
+        hook_output = json.dumps(
+            {
+                "type": "ArchiveResult",
+                "status": "skipped",
+                "output_str": "SAVE_WGET=False",
+            },
+        )

        from archivebox.machine.models import Process
+
        data = Process.parse_records_from_text(hook_output)[0]
-        self.assertEqual(data['status'], 'skipped')
+        self.assertEqual(data["status"], "skipped")

    def test_snapshot_hook_failed_status(self):
        """Snapshot hook should support failed status."""
-        hook_output = json.dumps({
-            'type': 'ArchiveResult',
-            'status': 'failed',
-            'output_str': '404 Not Found',
-        })
+        hook_output = json.dumps(
+            {
+                "type": "ArchiveResult",
+                "status": "failed",
+                "output_str": "404 Not Found",
+            },
+        )

        from archivebox.machine.models import Process
+
        data = Process.parse_records_from_text(hook_output)[0]
-        self.assertEqual(data['status'], 'failed')
+        self.assertEqual(data["status"], "failed")


 class TestPluginMetadata(unittest.TestCase):
@@ -540,16 +586,16 @@ class TestPluginMetadata(unittest.TestCase):
    def test_plugin_name_added(self):
        """run_hook() should add plugin name to records."""
        # Simulate what run_hook() does
-        script = Path('/abx_plugins/plugins/wget/on_Snapshot__50_wget.py')
+        script = Path("/abx_plugins/plugins/wget/on_Snapshot__50_wget.py")
        plugin_name = script.parent.name

-        record = {'type': 'ArchiveResult', 'status': 'succeeded'}
-        record['plugin'] = plugin_name
-        record['plugin_hook'] = str(script)
+        record = {"type": "ArchiveResult", "status": "succeeded"}
+        record["plugin"] = plugin_name
+        record["plugin_hook"] = str(script)

-        self.assertEqual(record['plugin'], 'wget')
-        self.assertIn('on_Snapshot__50_wget.py', record['plugin_hook'])
+        self.assertEqual(record["plugin"], "wget")
+        self.assertIn("on_Snapshot__50_wget.py", record["plugin_hook"])


-if __name__ == '__main__':
+if __name__ == "__main__":
    unittest.main()
--- a/archivebox/tests/test_machine_models.py
+++ b/archivebox/tests/test_machine_models.py
@@ -30,6 +30,7 @@ from archivebox.machine.models import (
    ProcessMachine,
    MACHINE_RECHECK_INTERVAL,
    PID_REUSE_WINDOW,
+    PROCESS_TIMEOUT_GRACE,
 )


@@ -39,6 +40,7 @@ class TestMachineModel(TestCase):
    def setUp(self):
        """Reset cached machine between tests."""
        import archivebox.machine.models as models
+
        models._CURRENT_MACHINE = None

    def test_machine_current_creates_machine(self):
@@ -49,7 +51,7 @@ class TestMachineModel(TestCase):
        self.assertIsNotNone(machine.id)
        self.assertIsNotNone(machine.guid)
        self.assertEqual(machine.hostname, os.uname().nodename)
-        self.assertIn(machine.os_family, ['linux', 'darwin', 'windows', 'freebsd'])
+        self.assertIn(machine.os_family, ["linux", "darwin", "windows", "freebsd"])

    def test_machine_current_returns_cached(self):
        """Machine.current() should return cached machine within recheck interval."""
@@ -78,8 +80,8 @@ class TestMachineModel(TestCase):
        """Machine.from_json() should update machine config."""
        Machine.current()  # Ensure machine exists
        record = {
-            'config': {
-                'WGET_BINARY': '/usr/bin/wget',
+            "config": {
+                "WGET_BINARY": "/usr/bin/wget",
            },
        }

@@ -87,15 +89,15 @@ class TestMachineModel(TestCase):

        self.assertIsNotNone(result)
        assert result is not None
-        self.assertEqual(result.config.get('WGET_BINARY'), '/usr/bin/wget')
+        self.assertEqual(result.config.get("WGET_BINARY"), "/usr/bin/wget")

    def test_machine_from_jsonl_strips_legacy_chromium_version(self):
        """Machine.from_json() should ignore legacy browser version keys."""
        Machine.current()  # Ensure machine exists
        record = {
-            'config': {
-                'WGET_BINARY': '/usr/bin/wget',
-                'CHROMIUM_VERSION': '123.4.5',
+            "config": {
+                "WGET_BINARY": "/usr/bin/wget",
+                "CHROMIUM_VERSION": "123.4.5",
            },
        }

@@ -103,12 +105,12 @@ class TestMachineModel(TestCase):

        self.assertIsNotNone(result)
        assert result is not None
-        self.assertEqual(result.config.get('WGET_BINARY'), '/usr/bin/wget')
-        self.assertNotIn('CHROMIUM_VERSION', result.config)
+        self.assertEqual(result.config.get("WGET_BINARY"), "/usr/bin/wget")
+        self.assertNotIn("CHROMIUM_VERSION", result.config)

    def test_machine_from_jsonl_invalid(self):
        """Machine.from_json() should return None for invalid records."""
-        result = Machine.from_json({'invalid': 'record'})
+        result = Machine.from_json({"invalid": "record"})
        self.assertIsNone(result)

    def test_machine_current_strips_legacy_chromium_version(self):
@@ -117,16 +119,16 @@ class TestMachineModel(TestCase):

        machine = Machine.current()
        machine.config = {
-            'CHROME_BINARY': '/tmp/chromium',
-            'CHROMIUM_VERSION': '123.4.5',
+            "CHROME_BINARY": "/tmp/chromium",
+            "CHROMIUM_VERSION": "123.4.5",
        }
-        machine.save(update_fields=['config'])
+        machine.save(update_fields=["config"])
        models._CURRENT_MACHINE = machine

        refreshed = Machine.current()

-        self.assertEqual(refreshed.config.get('CHROME_BINARY'), '/tmp/chromium')
-        self.assertNotIn('CHROMIUM_VERSION', refreshed.config)
+        self.assertEqual(refreshed.config.get("CHROME_BINARY"), "/tmp/chromium")
+        self.assertNotIn("CHROMIUM_VERSION", refreshed.config)

    def test_machine_manager_current(self):
        """Machine.objects.current() should return current machine."""
@@ -141,6 +143,7 @@ class TestNetworkInterfaceModel(TestCase):
    def setUp(self):
        """Reset cached interface between tests."""
        import archivebox.machine.models as models
+
        models._CURRENT_MACHINE = None
        models._CURRENT_INTERFACE = None

@@ -170,24 +173,24 @@ class TestNetworkInterfaceModel(TestCase):
        import archivebox.machine.models as models

        first = {
-            'mac_address': 'aa:bb:cc:dd:ee:01',
-            'ip_public': '1.1.1.1',
-            'ip_local': '192.168.1.10',
-            'dns_server': '8.8.8.8',
-            'hostname': 'host-a',
-            'iface': 'en0',
-            'isp': 'ISP A',
-            'city': 'City',
-            'region': 'Region',
-            'country': 'Country',
+            "mac_address": "aa:bb:cc:dd:ee:01",
+            "ip_public": "1.1.1.1",
+            "ip_local": "192.168.1.10",
+            "dns_server": "8.8.8.8",
+            "hostname": "host-a",
+            "iface": "en0",
+            "isp": "ISP A",
+            "city": "City",
+            "region": "Region",
+            "country": "Country",
        }
        second = {
            **first,
-            'ip_public': '2.2.2.2',
-            'ip_local': '10.0.0.5',
+            "ip_public": "2.2.2.2",
+            "ip_local": "10.0.0.5",
        }

-        with patch.object(models, 'get_host_network', side_effect=[first, second]):
+        with patch.object(models, "get_host_network", side_effect=[first, second]):
            interface1 = NetworkInterface.current(refresh=True)
            interface2 = NetworkInterface.current(refresh=True)

@@ -202,6 +205,7 @@ class TestBinaryModel(TestCase):
    def setUp(self):
        """Reset cached binaries and create a machine."""
        import archivebox.machine.models as models
+
        models._CURRENT_MACHINE = None
        models._CURRENT_BINARIES = {}
        self.machine = Machine.current()
@@ -210,22 +214,23 @@ class TestBinaryModel(TestCase):
        """Binary should be created with default values."""
        binary = Binary.objects.create(
            machine=self.machine,
-            name='wget',
-            binproviders='apt,brew,env',
+            name="wget",
+            binproviders="apt,brew,env",
        )

        self.assertIsNotNone(binary.id)
-        self.assertEqual(binary.name, 'wget')
+        self.assertEqual(binary.name, "wget")
        self.assertEqual(binary.status, Binary.StatusChoices.QUEUED)
        self.assertFalse(binary.is_valid)

    def test_binary_is_valid(self):
-        """Binary.is_valid should be True when abspath and version are set."""
+        """Binary.is_valid should be True for installed binaries with a resolved path."""
        binary = Binary.objects.create(
            machine=self.machine,
-            name='wget',
-            abspath='/usr/bin/wget',
-            version='1.21',
+            name="wget",
+            abspath="/usr/bin/wget",
+            version="1.21",
+            status=Binary.StatusChoices.INSTALLED,
        )

        self.assertTrue(binary.is_valid)
@@ -233,25 +238,26 @@ class TestBinaryModel(TestCase):
    def test_binary_manager_get_valid_binary(self):
        """BinaryManager.get_valid_binary() should find valid binaries."""
        # Create invalid binary (no abspath)
-        Binary.objects.create(machine=self.machine, name='wget')
+        Binary.objects.create(machine=self.machine, name="wget")

        # Create valid binary
        Binary.objects.create(
            machine=self.machine,
-            name='wget',
-            abspath='/usr/bin/wget',
-            version='1.21',
+            name="wget",
+            abspath="/usr/bin/wget",
+            version="1.21",
+            status=Binary.StatusChoices.INSTALLED,
        )

-        result = cast(BinaryManager, Binary.objects).get_valid_binary('wget')
+        result = cast(BinaryManager, Binary.objects).get_valid_binary("wget")

        self.assertIsNotNone(result)
        assert result is not None
-        self.assertEqual(result.abspath, '/usr/bin/wget')
+        self.assertEqual(result.abspath, "/usr/bin/wget")

    def test_binary_update_and_requeue(self):
        """Binary.update_and_requeue() should update fields and save."""
-        binary = Binary.objects.create(machine=self.machine, name='test')
+        binary = Binary.objects.create(machine=self.machine, name="test")
        old_modified = binary.modified_at

        binary.update_and_requeue(
@@ -266,16 +272,18 @@ class TestBinaryModel(TestCase):
    def test_binary_from_json_preserves_install_args_overrides(self):
        """Binary.from_json() should persist canonical install_args overrides unchanged."""
        overrides = {
-            'apt': {'install_args': ['chromium']},
-            'npm': {'install_args': 'puppeteer'},
-            'custom': {'install_args': ['bash', '-lc', 'echo ok']},
+            "apt": {"install_args": ["chromium"]},
+            "npm": {"install_args": "puppeteer"},
+            "custom": {"install_args": ["bash", "-lc", "echo ok"]},
        }

-        binary = Binary.from_json({
-            'name': 'chrome',
-            'binproviders': 'apt,npm,custom',
-            'overrides': overrides,
-        })
+        binary = Binary.from_json(
+            {
+                "name": "chrome",
+                "binproviders": "apt,npm,custom",
+                "overrides": overrides,
+            },
+        )

        self.assertIsNotNone(binary)
        assert binary is not None
@@ -284,15 +292,17 @@ class TestBinaryModel(TestCase):
    def test_binary_from_json_does_not_coerce_legacy_override_shapes(self):
        """Binary.from_json() should no longer translate legacy non-dict provider overrides."""
        overrides = {
-            'apt': ['chromium'],
-            'npm': 'puppeteer',
+            "apt": ["chromium"],
+            "npm": "puppeteer",
        }

-        binary = Binary.from_json({
-            'name': 'chrome',
-            'binproviders': 'apt,npm',
-            'overrides': overrides,
-        })
+        binary = Binary.from_json(
+            {
+                "name": "chrome",
+                "binproviders": "apt,npm",
+                "overrides": overrides,
+            },
+        )

        self.assertIsNotNone(binary)
        assert binary is not None
@@ -300,23 +310,25 @@ class TestBinaryModel(TestCase):

    def test_binary_from_json_prefers_published_readability_package(self):
        """Binary.from_json() should rewrite readability's npm git URL to the published package."""
-        binary = Binary.from_json({
-            'name': 'readability-extractor',
-            'binproviders': 'env,npm',
-            'overrides': {
-                'npm': {
-                    'install_args': ['https://github.com/ArchiveBox/readability-extractor'],
+        binary = Binary.from_json(
+            {
+                "name": "readability-extractor",
+                "binproviders": "env,npm",
+                "overrides": {
+                    "npm": {
+                        "install_args": ["https://github.com/ArchiveBox/readability-extractor"],
+                    },
                },
            },
-        })
+        )

        self.assertIsNotNone(binary)
        assert binary is not None
        self.assertEqual(
            binary.overrides,
            {
-                'npm': {
-                    'install_args': ['readability-extractor'],
+                "npm": {
+                    "install_args": ["readability-extractor"],
                },
            },
        )
@@ -328,12 +340,13 @@ class TestBinaryStateMachine(TestCase):
    def setUp(self):
        """Create a machine and binary for state machine tests."""
        import archivebox.machine.models as models
+
        models._CURRENT_MACHINE = None
        self.machine = Machine.current()
        self.binary = Binary.objects.create(
            machine=self.machine,
-            name='test-binary',
-            binproviders='env',
+            name="test-binary",
+            binproviders="env",
        )

    def test_binary_state_machine_initial_state(self):
@@ -346,7 +359,7 @@ class TestBinaryStateMachine(TestCase):
        sm = BinaryMachine(self.binary)
        self.assertTrue(sm.can_install())

-        self.binary.binproviders = ''
+        self.binary.binproviders = ""
        self.binary.save()
        sm = BinaryMachine(self.binary)
        self.assertFalse(sm.can_install())
@@ -358,6 +371,7 @@ class TestProcessModel(TestCase):
    def setUp(self):
        """Create a machine for process tests."""
        import archivebox.machine.models as models
+
        models._CURRENT_MACHINE = None
        models._CURRENT_PROCESS = None
        self.machine = Machine.current()
@@ -366,12 +380,12 @@ class TestProcessModel(TestCase):
        """Process should be created with default values."""
        process = Process.objects.create(
            machine=self.machine,
-            cmd=['echo', 'hello'],
-            pwd='/tmp',
+            cmd=["echo", "hello"],
+            pwd="/tmp",
        )

        self.assertIsNotNone(process.id)
-        self.assertEqual(process.cmd, ['echo', 'hello'])
+        self.assertEqual(process.cmd, ["echo", "hello"])
        self.assertEqual(process.status, Process.StatusChoices.QUEUED)
        self.assertIsNone(process.pid)
        self.assertIsNone(process.exit_code)
@@ -380,20 +394,20 @@ class TestProcessModel(TestCase):
        """Process.to_json() should serialize correctly."""
        process = Process.objects.create(
            machine=self.machine,
-            cmd=['echo', 'hello'],
-            pwd='/tmp',
+            cmd=["echo", "hello"],
+            pwd="/tmp",
            timeout=60,
        )
        json_data = process.to_json()

-        self.assertEqual(json_data['type'], 'Process')
-        self.assertEqual(json_data['cmd'], ['echo', 'hello'])
-        self.assertEqual(json_data['pwd'], '/tmp')
-        self.assertEqual(json_data['timeout'], 60)
+        self.assertEqual(json_data["type"], "Process")
+        self.assertEqual(json_data["cmd"], ["echo", "hello"])
+        self.assertEqual(json_data["pwd"], "/tmp")
+        self.assertEqual(json_data["timeout"], 60)

    def test_process_update_and_requeue(self):
        """Process.update_and_requeue() should update fields and save."""
-        process = Process.objects.create(machine=self.machine, cmd=['test'])
+        process = Process.objects.create(machine=self.machine, cmd=["test"])

        process.update_and_requeue(
            status=Process.StatusChoices.RUNNING,
@@ -413,6 +427,7 @@ class TestProcessCurrent(TestCase):
    def setUp(self):
        """Reset caches."""
        import archivebox.machine.models as models
+
        models._CURRENT_MACHINE = None
        models._CURRENT_PROCESS = None

@@ -437,25 +452,25 @@ class TestProcessCurrent(TestCase):

    def test_process_detect_type_runner(self):
        """_detect_process_type should detect the background runner command."""
-        with patch('sys.argv', ['archivebox', 'run', '--daemon']):
+        with patch("sys.argv", ["archivebox", "run", "--daemon"]):
            result = Process._detect_process_type()
            self.assertEqual(result, Process.TypeChoices.ORCHESTRATOR)

    def test_process_detect_type_runner_watch(self):
        """runner_watch should be classified as a worker, not the orchestrator itself."""
-        with patch('sys.argv', ['archivebox', 'manage', 'runner_watch', '--pidfile=/tmp/runserver.pid']):
+        with patch("sys.argv", ["archivebox", "manage", "runner_watch", "--pidfile=/tmp/runserver.pid"]):
            result = Process._detect_process_type()
            self.assertEqual(result, Process.TypeChoices.WORKER)

    def test_process_detect_type_cli(self):
        """_detect_process_type should detect CLI commands."""
-        with patch('sys.argv', ['archivebox', 'add', 'http://example.com']):
+        with patch("sys.argv", ["archivebox", "add", "http://example.com"]):
            result = Process._detect_process_type()
            self.assertEqual(result, Process.TypeChoices.CLI)

    def test_process_detect_type_binary(self):
        """_detect_process_type should detect non-ArchiveBox subprocesses as binary processes."""
-        with patch('sys.argv', ['/usr/bin/wget', 'https://example.com']):
+        with patch("sys.argv", ["/usr/bin/wget", "https://example.com"]):
            result = Process._detect_process_type()
            self.assertEqual(result, Process.TypeChoices.BINARY)

@@ -463,7 +478,7 @@ class TestProcessCurrent(TestCase):
        """Process.proc should accept a script recorded in DB when wrapped by an interpreter in psutil."""
        proc = Process.objects.create(
            machine=Machine.current(),
-            cmd=['/tmp/on_Crawl__90_chrome_launch.daemon.bg.js', '--url=https://example.com/'],
+            cmd=["/tmp/on_Crawl__90_chrome_launch.daemon.bg.js", "--url=https://example.com/"],
            pid=12345,
            status=Process.StatusChoices.RUNNING,
            started_at=timezone.now(),
@@ -472,12 +487,12 @@ class TestProcessCurrent(TestCase):
        os_proc = Mock()
        os_proc.create_time.return_value = proc.started_at.timestamp()
        os_proc.cmdline.return_value = [
-            'node',
-            '/tmp/on_Crawl__90_chrome_launch.daemon.bg.js',
-            '--url=https://example.com/',
+            "node",
+            "/tmp/on_Crawl__90_chrome_launch.daemon.bg.js",
+            "--url=https://example.com/",
        ]

-        with patch('archivebox.machine.models.psutil.Process', return_value=os_proc):
+        with patch("archivebox.machine.models.psutil.Process", return_value=os_proc):
            self.assertIs(proc.proc, os_proc)


@@ -487,6 +502,7 @@ class TestProcessHierarchy(TestCase):
    def setUp(self):
        """Create machine."""
        import archivebox.machine.models as models
+
        models._CURRENT_MACHINE = None
        self.machine = Machine.current()

@@ -561,6 +577,7 @@ class TestProcessLifecycle(TestCase):
    def setUp(self):
        """Create machine."""
        import archivebox.machine.models as models
+
        models._CURRENT_MACHINE = None
        self.machine = Machine.current()

@@ -643,6 +660,7 @@ class TestProcessClassMethods(TestCase):
    def setUp(self):
        """Create machine."""
        import archivebox.machine.models as models
+
        models._CURRENT_MACHINE = None
        self.machine = Machine.current()

@@ -689,6 +707,77 @@ class TestProcessClassMethods(TestCase):
        stale.refresh_from_db()
        self.assertEqual(stale.status, Process.StatusChoices.EXITED)

+    def test_cleanup_stale_running_marks_timed_out_rows_exited(self):
+        """cleanup_stale_running should retire RUNNING rows that exceed timeout + grace."""
+        stale = Process.objects.create(
+            machine=self.machine,
+            status=Process.StatusChoices.RUNNING,
+            pid=999998,
+            timeout=5,
+            started_at=timezone.now() - PROCESS_TIMEOUT_GRACE - timedelta(seconds=10),
+        )
+
+        cleaned = Process.cleanup_stale_running()
+
+        self.assertGreaterEqual(cleaned, 1)
+        stale.refresh_from_db()
+        self.assertEqual(stale.status, Process.StatusChoices.EXITED)
+
+    def test_cleanup_stale_running_marks_timed_out_live_hooks_exited(self):
+        """Timed-out live hook rows should be retired in the DB without trying to kill the process."""
+        stale = Process.objects.create(
+            machine=self.machine,
+            process_type=Process.TypeChoices.HOOK,
+            status=Process.StatusChoices.RUNNING,
+            pid=os.getpid(),
+            timeout=5,
+            started_at=timezone.now() - PROCESS_TIMEOUT_GRACE - timedelta(seconds=10),
+        )
+
+        with (
+            patch.object(Process, "poll", return_value=None),
+            patch.object(Process, "kill_tree") as kill_tree,
+            patch.object(Process, "terminate") as terminate,
+        ):
+            cleaned = Process.cleanup_stale_running()
+
+        self.assertGreaterEqual(cleaned, 1)
+        stale.refresh_from_db()
+        self.assertEqual(stale.status, Process.StatusChoices.EXITED)
+        kill_tree.assert_not_called()
+        terminate.assert_not_called()
+
+    def test_cleanup_orphaned_workers_marks_dead_root_children_exited(self):
+        """cleanup_orphaned_workers should retire rows whose CLI/orchestrator root is gone."""
+        import psutil
+        from datetime import datetime
+
+        started_at = datetime.fromtimestamp(psutil.Process(os.getpid()).create_time(), tz=timezone.get_current_timezone())
+        parent = Process.objects.create(
+            machine=self.machine,
+            process_type=Process.TypeChoices.CLI,
+            status=Process.StatusChoices.RUNNING,
+            pid=999997,
+            started_at=timezone.now() - timedelta(minutes=5),
+        )
+        child = Process.objects.create(
+            machine=self.machine,
+            parent=parent,
+            process_type=Process.TypeChoices.HOOK,
+            status=Process.StatusChoices.RUNNING,
+            pid=os.getpid(),
+            started_at=started_at,
+        )
+
+        with patch.object(Process, "kill_tree") as kill_tree, patch.object(Process, "terminate") as terminate:
+            cleaned = Process.cleanup_orphaned_workers()
+
+        self.assertEqual(cleaned, 1)
+        child.refresh_from_db()
+        self.assertEqual(child.status, Process.StatusChoices.EXITED)
+        kill_tree.assert_not_called()
+        terminate.assert_not_called()
+

 class TestProcessStateMachine(TestCase):
    """Test the ProcessMachine state machine."""
@@ -696,12 +785,13 @@ class TestProcessStateMachine(TestCase):
    def setUp(self):
        """Create a machine and process for state machine tests."""
        import archivebox.machine.models as models
+
        models._CURRENT_MACHINE = None
        self.machine = Machine.current()
        self.process = Process.objects.create(
            machine=self.machine,
-            cmd=['echo', 'test'],
-            pwd='/tmp',
+            cmd=["echo", "test"],
+            pwd="/tmp",
        )

    def test_process_state_machine_initial_state(self):
@@ -730,5 +820,5 @@ class TestProcessStateMachine(TestCase):
        self.assertTrue(sm.is_exited())


-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
--- a/archivebox/tests/test_migrations_04_to_09.py
+++ b/archivebox/tests/test_migrations_04_to_09.py
@@ -31,7 +31,7 @@ class TestMigrationFrom04x(unittest.TestCase):
    def setUp(self):
        """Create a temporary directory with 0.4.x schema and data."""
        self.work_dir = Path(tempfile.mkdtemp())
-        self.db_path = self.work_dir / 'index.sqlite3'
+        self.db_path = self.work_dir / "index.sqlite3"

        # Create directory structure
        create_data_dir_structure(self.work_dir)
@@ -50,9 +50,9 @@ class TestMigrationFrom04x(unittest.TestCase):

    def test_migration_preserves_snapshot_count(self):
        """Migration should preserve all snapshots from 0.4.x."""
-        expected_count = len(self.original_data['snapshots'])
+        expected_count = len(self.original_data["snapshots"])

-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        ok, msg = verify_snapshot_count(self.db_path, expected_count)
@@ -60,9 +60,9 @@ class TestMigrationFrom04x(unittest.TestCase):

    def test_migration_preserves_snapshot_urls(self):
        """Migration should preserve all snapshot URLs from 0.4.x."""
-        expected_urls = [s['url'] for s in self.original_data['snapshots']]
+        expected_urls = [s["url"] for s in self.original_data["snapshots"]]

-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        ok, msg = verify_snapshot_urls(self.db_path, expected_urls)
@@ -70,14 +70,14 @@ class TestMigrationFrom04x(unittest.TestCase):

    def test_migration_converts_string_tags_to_model(self):
        """Migration should convert comma-separated tags to Tag model instances."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        # Collect unique tags from original data
        original_tags = set()
-        for tags_str in cast(list[str], self.original_data['tags_str']):
+        for tags_str in cast(list[str], self.original_data["tags_str"]):
            if tags_str:
-                for tag in tags_str.split(','):
+                for tag in tags_str.split(","):
                    original_tags.add(tag.strip())

        # Tags should have been created
@@ -86,7 +86,7 @@ class TestMigrationFrom04x(unittest.TestCase):

    def test_migration_preserves_snapshot_titles(self):
        """Migration should preserve all snapshot titles."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        conn = sqlite3.connect(str(self.db_path))
@@ -95,43 +95,46 @@ class TestMigrationFrom04x(unittest.TestCase):
        actual = {row[0]: row[1] for row in cursor.fetchall()}
        conn.close()

-        for snapshot in self.original_data['snapshots']:
+        for snapshot in self.original_data["snapshots"]:
            self.assertEqual(
-                actual.get(snapshot['url']),
-                snapshot['title'],
-                f"Title mismatch for {snapshot['url']}"
+                actual.get(snapshot["url"]),
+                snapshot["title"],
+                f"Title mismatch for {snapshot['url']}",
            )

    def test_status_works_after_migration(self):
        """Status command should work after migration."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

-        result = run_archivebox(self.work_dir, ['status'])
+        result = run_archivebox(self.work_dir, ["status"])
        self.assertEqual(result.returncode, 0, f"Status failed after migration: {result.stderr}")

    def test_list_works_after_migration(self):
        """List command should work and show ALL migrated snapshots."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

-        result = run_archivebox(self.work_dir, ['list'])
+        result = run_archivebox(self.work_dir, ["list"])
        self.assertEqual(result.returncode, 0, f"List failed after migration: {result.stderr}")

        # Verify ALL snapshots appear in output
        output = result.stdout + result.stderr
-        for snapshot in self.original_data['snapshots']:
-            url_fragment = snapshot['url'][:30]
-            self.assertIn(url_fragment, output,
-                         f"Snapshot {snapshot['url']} not found in list output")
+        for snapshot in self.original_data["snapshots"]:
+            url_fragment = snapshot["url"][:30]
+            self.assertIn(
+                url_fragment,
+                output,
+                f"Snapshot {snapshot['url']} not found in list output",
+            )

    def test_add_works_after_migration(self):
        """Adding new URLs should work after migration from 0.4.x."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        # Try to add a new URL after migration
-        result = run_archivebox(self.work_dir, ['add', '--index-only', 'https://example.com/new-page'], timeout=45)
+        result = run_archivebox(self.work_dir, ["add", "--index-only", "https://example.com/new-page"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Add failed after migration: {result.stderr}")

        # Verify snapshot was added
@@ -145,7 +148,7 @@ class TestMigrationFrom04x(unittest.TestCase):

    def test_new_schema_elements_created(self):
        """Migration should create new 0.9.x schema elements."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        conn = sqlite3.connect(str(self.db_path))
@@ -155,25 +158,25 @@ class TestMigrationFrom04x(unittest.TestCase):
        conn.close()

        # New tables should exist
-        self.assertIn('crawls_crawl', tables, "crawls_crawl table not created")
-        self.assertIn('core_tag', tables, "core_tag table not created")
-        self.assertIn('core_archiveresult', tables, "core_archiveresult table not created")
+        self.assertIn("crawls_crawl", tables, "crawls_crawl table not created")
+        self.assertIn("core_tag", tables, "core_tag table not created")
+        self.assertIn("core_archiveresult", tables, "core_archiveresult table not created")

    def test_snapshots_have_new_fields(self):
        """Migrated snapshots should have new 0.9.x fields."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        conn = sqlite3.connect(str(self.db_path))
        cursor = conn.cursor()
-        cursor.execute('PRAGMA table_info(core_snapshot)')
+        cursor.execute("PRAGMA table_info(core_snapshot)")
        columns = {row[1] for row in cursor.fetchall()}
        conn.close()

-        required_columns = {'status', 'depth', 'created_at', 'modified_at'}
+        required_columns = {"status", "depth", "created_at", "modified_at"}
        for col in required_columns:
            self.assertIn(col, columns, f"Snapshot missing new column: {col}")


-if __name__ == '__main__':
+if __name__ == "__main__":
    unittest.main()
--- a/archivebox/tests/test_migrations_07_to_09.py
+++ b/archivebox/tests/test_migrations_07_to_09.py
@@ -35,7 +35,7 @@ class TestMigrationFrom07x(unittest.TestCase):
    def setUp(self):
        """Create a temporary directory with 0.7.x schema and data."""
        self.work_dir = Path(tempfile.mkdtemp())
-        self.db_path = self.work_dir / 'index.sqlite3'
+        self.db_path = self.work_dir / "index.sqlite3"

        # Create directory structure
        create_data_dir_structure(self.work_dir)
@@ -54,9 +54,9 @@ class TestMigrationFrom07x(unittest.TestCase):

    def test_migration_preserves_snapshot_count(self):
        """Migration should preserve all snapshots."""
-        expected_count = len(self.original_data['snapshots'])
+        expected_count = len(self.original_data["snapshots"])

-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        ok, msg = verify_snapshot_count(self.db_path, expected_count)
@@ -64,9 +64,9 @@ class TestMigrationFrom07x(unittest.TestCase):

    def test_migration_preserves_snapshot_urls(self):
        """Migration should preserve all snapshot URLs."""
-        expected_urls = [s['url'] for s in self.original_data['snapshots']]
+        expected_urls = [s["url"] for s in self.original_data["snapshots"]]

-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        ok, msg = verify_snapshot_urls(self.db_path, expected_urls)
@@ -74,9 +74,9 @@ class TestMigrationFrom07x(unittest.TestCase):

    def test_migration_preserves_snapshot_titles(self):
        """Migration should preserve all snapshot titles."""
-        expected_titles = {s['url']: s['title'] for s in self.original_data['snapshots']}
+        expected_titles = {s["url"]: s["title"] for s in self.original_data["snapshots"]}

-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        ok, msg = verify_snapshot_titles(self.db_path, expected_titles)
@@ -84,9 +84,9 @@ class TestMigrationFrom07x(unittest.TestCase):

    def test_migration_preserves_tags(self):
        """Migration should preserve all tags."""
-        expected_count = len(self.original_data['tags'])
+        expected_count = len(self.original_data["tags"])

-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        ok, msg = verify_tag_count(self.db_path, expected_count)
@@ -94,9 +94,9 @@ class TestMigrationFrom07x(unittest.TestCase):

    def test_migration_preserves_archiveresults(self):
        """Migration should preserve all archive results."""
-        expected_count = len(self.original_data['archiveresults'])
+        expected_count = len(self.original_data["archiveresults"])

-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        ok, msg = verify_archiveresult_count(self.db_path, expected_count)
@@ -104,7 +104,7 @@ class TestMigrationFrom07x(unittest.TestCase):

    def test_migration_preserves_foreign_keys(self):
        """Migration should maintain foreign key relationships."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        ok, msg = verify_foreign_keys(self.db_path)
@@ -112,41 +112,41 @@ class TestMigrationFrom07x(unittest.TestCase):

    def test_status_works_after_migration(self):
        """Status command should work after migration."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

-        result = run_archivebox(self.work_dir, ['status'])
+        result = run_archivebox(self.work_dir, ["status"])
        self.assertEqual(result.returncode, 0, f"Status failed after migration: {result.stderr}")

    def test_search_works_after_migration(self):
        """Search command should find ALL migrated snapshots."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

-        result = run_archivebox(self.work_dir, ['search'])
+        result = run_archivebox(self.work_dir, ["search"])
        self.assertEqual(result.returncode, 0, f"Search failed after migration: {result.stderr}")

        # Verify ALL snapshots appear in output
        output = result.stdout + result.stderr
-        ok, msg = verify_all_snapshots_in_output(output, self.original_data['snapshots'])
+        ok, msg = verify_all_snapshots_in_output(output, self.original_data["snapshots"])
        self.assertTrue(ok, msg)

    def test_list_works_after_migration(self):
        """List command should work and show ALL migrated data."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

-        result = run_archivebox(self.work_dir, ['snapshot', 'list'])
+        result = run_archivebox(self.work_dir, ["snapshot", "list"])
        self.assertEqual(result.returncode, 0, f"List failed after migration: {result.stderr}")

        # Verify ALL snapshots appear in output
        output = result.stdout + result.stderr
-        ok, msg = verify_all_snapshots_in_output(output, self.original_data['snapshots'])
+        ok, msg = verify_all_snapshots_in_output(output, self.original_data["snapshots"])
        self.assertTrue(ok, msg)

    def test_new_schema_elements_created_after_migration(self):
        """Migration should create new 0.9.x schema elements (crawls_crawl, etc.)."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        conn = sqlite3.connect(str(self.db_path))
@@ -158,29 +158,29 @@ class TestMigrationFrom07x(unittest.TestCase):
        conn.close()

        # 0.9.x should have crawls_crawl table
-        self.assertIn('crawls_crawl', tables, "crawls_crawl table not created during migration")
+        self.assertIn("crawls_crawl", tables, "crawls_crawl table not created during migration")

    def test_snapshots_have_new_fields_after_migration(self):
        """Migrated snapshots should have new 0.9.x fields (status, depth, etc.)."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        conn = sqlite3.connect(str(self.db_path))
        cursor = conn.cursor()

        # Check snapshot table has new columns
-        cursor.execute('PRAGMA table_info(core_snapshot)')
+        cursor.execute("PRAGMA table_info(core_snapshot)")
        columns = {row[1] for row in cursor.fetchall()}
        conn.close()

        # 0.9.x snapshots should have status, depth, created_at, modified_at
-        required_new_columns = {'status', 'depth', 'created_at', 'modified_at'}
+        required_new_columns = {"status", "depth", "created_at", "modified_at"}
        for col in required_new_columns:
            self.assertIn(col, columns, f"Snapshot missing new column: {col}")

    def test_add_works_after_migration(self):
        """Adding new URLs should work after migration from 0.7.x."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        # Verify that init created the crawls_crawl table before proceeding
@@ -192,7 +192,7 @@ class TestMigrationFrom07x(unittest.TestCase):
        self.assertTrue(table_exists, f"Init failed to create crawls_crawl table. Init stderr: {result.stderr[-500:]}")

        # Try to add a new URL after migration (use --index-only for speed)
-        result = run_archivebox(self.work_dir, ['add', '--index-only', 'https://example.com/new-page'], timeout=45)
+        result = run_archivebox(self.work_dir, ["add", "--index-only", "https://example.com/new-page"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Add failed after migration: {result.stderr}")

        # Verify a Crawl was created for the new URL
@@ -206,7 +206,7 @@ class TestMigrationFrom07x(unittest.TestCase):

    def test_archiveresult_status_preserved_after_migration(self):
        """Migration should preserve archive result status values."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        conn = sqlite3.connect(str(self.db_path))
@@ -218,35 +218,39 @@ class TestMigrationFrom07x(unittest.TestCase):
        conn.close()

        # Original data has known status distribution: succeeded, failed, skipped
-        self.assertIn('succeeded', status_counts, "Should have succeeded results")
-        self.assertIn('failed', status_counts, "Should have failed results")
-        self.assertIn('skipped', status_counts, "Should have skipped results")
+        self.assertIn("succeeded", status_counts, "Should have succeeded results")
+        self.assertIn("failed", status_counts, "Should have failed results")
+        self.assertIn("skipped", status_counts, "Should have skipped results")

    def test_version_works_after_migration(self):
        """Version command should work after migration."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

-        result = run_archivebox(self.work_dir, ['version'])
+        result = run_archivebox(self.work_dir, ["version"])
        self.assertEqual(result.returncode, 0, f"Version failed after migration: {result.stderr}")

        # Should show version info
        output = result.stdout + result.stderr
-        self.assertTrue('ArchiveBox' in output or 'version' in output.lower(),
-                       f"Version output missing expected content: {output[:500]}")
+        self.assertTrue(
+            "ArchiveBox" in output or "version" in output.lower(),
+            f"Version output missing expected content: {output[:500]}",
+        )

    def test_help_works_after_migration(self):
        """Help command should work after migration."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

-        result = run_archivebox(self.work_dir, ['help'])
+        result = run_archivebox(self.work_dir, ["help"])
        self.assertEqual(result.returncode, 0, f"Help failed after migration: {result.stderr}")

        # Should show available commands
        output = result.stdout + result.stderr
-        self.assertTrue('add' in output.lower() and 'status' in output.lower(),
-                       f"Help output missing expected commands: {output[:500]}")
+        self.assertTrue(
+            "add" in output.lower() and "status" in output.lower(),
+            f"Help output missing expected commands: {output[:500]}",
+        )


 class TestMigrationDataIntegrity07x(unittest.TestCase):
@@ -255,7 +259,7 @@ class TestMigrationDataIntegrity07x(unittest.TestCase):
    def test_no_duplicate_snapshots_after_migration(self):
        """Migration should not create duplicate snapshots."""
        work_dir = Path(tempfile.mkdtemp())
-        db_path = work_dir / 'index.sqlite3'
+        db_path = work_dir / "index.sqlite3"

        try:
            create_data_dir_structure(work_dir)
@@ -264,7 +268,7 @@ class TestMigrationDataIntegrity07x(unittest.TestCase):
            conn.close()
            seed_0_7_data(db_path)

-            result = run_archivebox(work_dir, ['init'], timeout=45)
+            result = run_archivebox(work_dir, ["init"], timeout=45)
            self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

            # Check for duplicate URLs
@@ -285,7 +289,7 @@ class TestMigrationDataIntegrity07x(unittest.TestCase):
    def test_no_orphaned_archiveresults_after_migration(self):
        """Migration should not leave orphaned ArchiveResults."""
        work_dir = Path(tempfile.mkdtemp())
-        db_path = work_dir / 'index.sqlite3'
+        db_path = work_dir / "index.sqlite3"

        try:
            create_data_dir_structure(work_dir)
@@ -294,7 +298,7 @@ class TestMigrationDataIntegrity07x(unittest.TestCase):
            conn.close()
            seed_0_7_data(db_path)

-            result = run_archivebox(work_dir, ['init'], timeout=45)
+            result = run_archivebox(work_dir, ["init"], timeout=45)
            self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

            ok, msg = verify_foreign_keys(db_path)
@@ -306,7 +310,7 @@ class TestMigrationDataIntegrity07x(unittest.TestCase):
    def test_timestamps_preserved_after_migration(self):
        """Migration should preserve original timestamps."""
        work_dir = Path(tempfile.mkdtemp())
-        db_path = work_dir / 'index.sqlite3'
+        db_path = work_dir / "index.sqlite3"

        try:
            create_data_dir_structure(work_dir)
@@ -315,9 +319,9 @@ class TestMigrationDataIntegrity07x(unittest.TestCase):
            conn.close()
            original_data = seed_0_7_data(db_path)

-            original_timestamps = {s['url']: s['timestamp'] for s in original_data['snapshots']}
+            original_timestamps = {s["url"]: s["timestamp"] for s in original_data["snapshots"]}

-            result = run_archivebox(work_dir, ['init'], timeout=45)
+            result = run_archivebox(work_dir, ["init"], timeout=45)
            self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

            conn = sqlite3.connect(str(db_path))
@@ -328,8 +332,9 @@ class TestMigrationDataIntegrity07x(unittest.TestCase):

            for url, original_ts in original_timestamps.items():
                self.assertEqual(
-                    migrated_timestamps.get(url), original_ts,
-                    f"Timestamp changed for {url}: {original_ts} -> {migrated_timestamps.get(url)}"
+                    migrated_timestamps.get(url),
+                    original_ts,
+                    f"Timestamp changed for {url}: {original_ts} -> {migrated_timestamps.get(url)}",
                )

        finally:
@@ -338,7 +343,7 @@ class TestMigrationDataIntegrity07x(unittest.TestCase):
    def test_tag_associations_preserved_after_migration(self):
        """Migration should preserve snapshot-tag associations."""
        work_dir = Path(tempfile.mkdtemp())
-        db_path = work_dir / 'index.sqlite3'
+        db_path = work_dir / "index.sqlite3"

        try:
            create_data_dir_structure(work_dir)
@@ -354,7 +359,7 @@ class TestMigrationDataIntegrity07x(unittest.TestCase):
            original_count = cursor.fetchone()[0]
            conn.close()

-            result = run_archivebox(work_dir, ['init'], timeout=45)
+            result = run_archivebox(work_dir, ["init"], timeout=45)
            self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

            # Count tag associations after migration
@@ -364,12 +369,15 @@ class TestMigrationDataIntegrity07x(unittest.TestCase):
            migrated_count = cursor.fetchone()[0]
            conn.close()

-            self.assertEqual(migrated_count, original_count,
-                           f"Tag associations changed: {original_count} -> {migrated_count}")
+            self.assertEqual(
+                migrated_count,
+                original_count,
+                f"Tag associations changed: {original_count} -> {migrated_count}",
+            )

        finally:
            shutil.rmtree(work_dir, ignore_errors=True)


-if __name__ == '__main__':
+if __name__ == "__main__":
    unittest.main()
--- a/archivebox/tests/test_migrations_08_to_09.py
+++ b/archivebox/tests/test_migrations_08_to_09.py
@@ -39,7 +39,7 @@ class TestMigrationFrom08x(unittest.TestCase):
    def setUp(self):
        """Create a temporary directory with 0.8.x schema and data."""
        self.work_dir = Path(tempfile.mkdtemp())
-        self.db_path = self.work_dir / 'index.sqlite3'
+        self.db_path = self.work_dir / "index.sqlite3"

        # Create directory structure
        create_data_dir_structure(self.work_dir)
@@ -58,9 +58,9 @@ class TestMigrationFrom08x(unittest.TestCase):

    def test_migration_preserves_snapshot_count(self):
        """Migration should preserve all snapshots from 0.8.x."""
-        expected_count = len(self.original_data['snapshots'])
+        expected_count = len(self.original_data["snapshots"])

-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        ok, msg = verify_snapshot_count(self.db_path, expected_count)
@@ -68,9 +68,9 @@ class TestMigrationFrom08x(unittest.TestCase):

    def test_migration_preserves_snapshot_urls(self):
        """Migration should preserve all snapshot URLs from 0.8.x."""
-        expected_urls = [s['url'] for s in self.original_data['snapshots']]
+        expected_urls = [s["url"] for s in self.original_data["snapshots"]]

-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        ok, msg = verify_snapshot_urls(self.db_path, expected_urls)
@@ -78,14 +78,14 @@ class TestMigrationFrom08x(unittest.TestCase):

    def test_migration_preserves_crawls(self):
        """Migration should preserve all Crawl records and create default crawl if needed."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        # Count snapshots with NULL crawl_id in original data
-        snapshots_without_crawl = sum(1 for s in self.original_data['snapshots'] if s['crawl_id'] is None)
+        snapshots_without_crawl = sum(1 for s in self.original_data["snapshots"] if s["crawl_id"] is None)

        # Expected count: original crawls + 1 default crawl if any snapshots had NULL crawl_id
-        expected_count = len(self.original_data['crawls'])
+        expected_count = len(self.original_data["crawls"])
        if snapshots_without_crawl > 0:
            expected_count += 1  # Migration 0024 creates a default crawl

@@ -94,42 +94,47 @@ class TestMigrationFrom08x(unittest.TestCase):

    def test_migration_preserves_snapshot_crawl_links(self):
        """Migration should preserve snapshot-to-crawl relationships and assign default crawl to orphans."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        conn = sqlite3.connect(str(self.db_path))
        cursor = conn.cursor()

        # Check EVERY snapshot has a crawl_id after migration
-        for snapshot in self.original_data['snapshots']:
-            cursor.execute("SELECT crawl_id FROM core_snapshot WHERE url = ?", (snapshot['url'],))
+        for snapshot in self.original_data["snapshots"]:
+            cursor.execute("SELECT crawl_id FROM core_snapshot WHERE url = ?", (snapshot["url"],))
            row = cursor.fetchone()
            self.assertIsNotNone(row, f"Snapshot {snapshot['url']} not found after migration")

-            if snapshot['crawl_id'] is not None:
+            if snapshot["crawl_id"] is not None:
                # Snapshots that had a crawl should keep it
-                self.assertEqual(row[0], snapshot['crawl_id'],
-                    f"Crawl ID changed for {snapshot['url']}: expected {snapshot['crawl_id']}, got {row[0]}")
+                self.assertEqual(
+                    row[0],
+                    snapshot["crawl_id"],
+                    f"Crawl ID changed for {snapshot['url']}: expected {snapshot['crawl_id']}, got {row[0]}",
+                )
            else:
                # Snapshots without a crawl should now have one (the default crawl)
-                self.assertIsNotNone(row[0],
-                    f"Snapshot {snapshot['url']} should have been assigned to default crawl but has NULL")
+                self.assertIsNotNone(
+                    row[0],
+                    f"Snapshot {snapshot['url']} should have been assigned to default crawl but has NULL",
+                )

        conn.close()

    def test_migration_preserves_tags(self):
        """Migration should preserve all tags."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

-        ok, msg = verify_tag_count(self.db_path, len(self.original_data['tags']))
+        ok, msg = verify_tag_count(self.db_path, len(self.original_data["tags"]))
        self.assertTrue(ok, msg)

    def test_migration_preserves_archiveresults(self):
        """Migration should preserve all archive results."""
-        expected_count = len(self.original_data['archiveresults'])
+        expected_count = len(self.original_data["archiveresults"])

-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        ok, msg = verify_archiveresult_count(self.db_path, expected_count)
@@ -137,7 +142,7 @@ class TestMigrationFrom08x(unittest.TestCase):

    def test_migration_preserves_archiveresult_status(self):
        """Migration should preserve archive result status values."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        conn = sqlite3.connect(str(self.db_path))
@@ -149,49 +154,49 @@ class TestMigrationFrom08x(unittest.TestCase):
        conn.close()

        # Original data has known status distribution: succeeded, failed, skipped
-        self.assertIn('succeeded', status_counts, "Should have succeeded results")
-        self.assertIn('failed', status_counts, "Should have failed results")
-        self.assertIn('skipped', status_counts, "Should have skipped results")
+        self.assertIn("succeeded", status_counts, "Should have succeeded results")
+        self.assertIn("failed", status_counts, "Should have failed results")
+        self.assertIn("skipped", status_counts, "Should have skipped results")

    def test_status_works_after_migration(self):
        """Status command should work after migration."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

-        result = run_archivebox(self.work_dir, ['status'])
+        result = run_archivebox(self.work_dir, ["status"])
        self.assertEqual(result.returncode, 0, f"Status failed after migration: {result.stderr}")

    def test_list_works_after_migration(self):
        """List command should work and show ALL migrated data."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

-        result = run_archivebox(self.work_dir, ['snapshot', 'list'])
+        result = run_archivebox(self.work_dir, ["snapshot", "list"])
        self.assertEqual(result.returncode, 0, f"List failed after migration: {result.stderr}")

        # Verify ALL snapshots appear in output
        output = result.stdout + result.stderr
-        ok, msg = verify_all_snapshots_in_output(output, self.original_data['snapshots'])
+        ok, msg = verify_all_snapshots_in_output(output, self.original_data["snapshots"])
        self.assertTrue(ok, msg)

    def test_search_works_after_migration(self):
        """Search command should find ALL migrated snapshots."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

-        result = run_archivebox(self.work_dir, ['search'])
+        result = run_archivebox(self.work_dir, ["search"])
        self.assertEqual(result.returncode, 0, f"Search failed after migration: {result.stderr}")

        # Verify ALL snapshots appear in output
        output = result.stdout + result.stderr
-        ok, msg = verify_all_snapshots_in_output(output, self.original_data['snapshots'])
+        ok, msg = verify_all_snapshots_in_output(output, self.original_data["snapshots"])
        self.assertTrue(ok, msg)

    def test_migration_preserves_snapshot_titles(self):
        """Migration should preserve all snapshot titles."""
-        expected_titles = {s['url']: s['title'] for s in self.original_data['snapshots']}
+        expected_titles = {s["url"]: s["title"] for s in self.original_data["snapshots"]}

-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        ok, msg = verify_snapshot_titles(self.db_path, expected_titles)
@@ -199,7 +204,7 @@ class TestMigrationFrom08x(unittest.TestCase):

    def test_migration_preserves_foreign_keys(self):
        """Migration should maintain foreign key relationships."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        ok, msg = verify_foreign_keys(self.db_path)
@@ -207,7 +212,7 @@ class TestMigrationFrom08x(unittest.TestCase):

    def test_migration_removes_seed_id_column(self):
        """Migration should remove seed_id column from archivebox.crawls.crawl."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        conn = sqlite3.connect(str(self.db_path))
@@ -216,12 +221,15 @@ class TestMigrationFrom08x(unittest.TestCase):
        columns = [row[1] for row in cursor.fetchall()]
        conn.close()

-        self.assertNotIn('seed_id', columns,
-            f"seed_id column should have been removed by migration. Columns: {columns}")
+        self.assertNotIn(
+            "seed_id",
+            columns,
+            f"seed_id column should have been removed by migration. Columns: {columns}",
+        )

    def test_migration_removes_seed_table(self):
        """Migration should remove crawls_seed table."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        conn = sqlite3.connect(str(self.db_path))
@@ -234,10 +242,13 @@ class TestMigrationFrom08x(unittest.TestCase):

    def test_add_works_after_migration(self):
        """Adding new URLs should work after migration from 0.8.x."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        # Check that init actually ran and applied migrations
-        self.assertIn('Applying', result.stdout + result.stderr,
-            f"Init did not apply migrations. stdout: {result.stdout[:500]}, stderr: {result.stderr[:500]}")
+        self.assertIn(
+            "Applying",
+            result.stdout + result.stderr,
+            f"Init did not apply migrations. stdout: {result.stdout[:500]}, stderr: {result.stderr[:500]}",
+        )
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        # Count existing crawls
@@ -248,7 +259,7 @@ class TestMigrationFrom08x(unittest.TestCase):
        conn.close()

        # Try to add a new URL after migration (use --index-only for speed)
-        result = run_archivebox(self.work_dir, ['add', '--index-only', 'https://example.com/new-page'], timeout=45)
+        result = run_archivebox(self.work_dir, ["add", "--index-only", "https://example.com/new-page"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Add failed after migration: {result.stderr}")

        # Verify a new Crawl was created
@@ -258,35 +269,40 @@ class TestMigrationFrom08x(unittest.TestCase):
        new_crawl_count = cursor.fetchone()[0]
        conn.close()

-        self.assertGreater(new_crawl_count, initial_crawl_count,
-                          f"No new Crawl created when adding URL. Add stderr: {result.stderr[-500:]}")
+        self.assertGreater(
+            new_crawl_count,
+            initial_crawl_count,
+            f"No new Crawl created when adding URL. Add stderr: {result.stderr[-500:]}",
+        )

    def test_version_works_after_migration(self):
        """Version command should work after migration."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

-        result = run_archivebox(self.work_dir, ['version'])
+        result = run_archivebox(self.work_dir, ["version"])
        self.assertEqual(result.returncode, 0, f"Version failed after migration: {result.stderr}")

        # Should show version info
        output = result.stdout + result.stderr
-        self.assertTrue('ArchiveBox' in output or 'version' in output.lower(),
-                       f"Version output missing expected content: {output[:500]}")
+        self.assertTrue(
+            "ArchiveBox" in output or "version" in output.lower(),
+            f"Version output missing expected content: {output[:500]}",
+        )

    def test_migration_creates_process_records(self):
        """Migration should create Process records for all ArchiveResults."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        # Verify Process records created
-        expected_count = len(self.original_data['archiveresults'])
+        expected_count = len(self.original_data["archiveresults"])
        ok, msg = verify_process_migration(self.db_path, expected_count)
        self.assertTrue(ok, msg)

    def test_migration_creates_binary_records(self):
        """Migration should create Binary records from cmd_version data."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        conn = sqlite3.connect(str(self.db_path))
@@ -297,15 +313,18 @@ class TestMigrationFrom08x(unittest.TestCase):
        binary_count = cursor.fetchone()[0]

        # Should have at least one binary per unique extractor
-        extractors = set(ar['extractor'] for ar in self.original_data['archiveresults'])
-        self.assertGreaterEqual(binary_count, len(extractors),
-                              f"Expected at least {len(extractors)} Binaries, got {binary_count}")
+        extractors = {ar["extractor"] for ar in self.original_data["archiveresults"]}
+        self.assertGreaterEqual(
+            binary_count,
+            len(extractors),
+            f"Expected at least {len(extractors)} Binaries, got {binary_count}",
+        )

        conn.close()

    def test_migration_preserves_cmd_data(self):
        """Migration should preserve cmd data in Process.cmd field."""
-        result = run_archivebox(self.work_dir, ['init'], timeout=45)
+        result = run_archivebox(self.work_dir, ["init"], timeout=45)
        self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

        conn = sqlite3.connect(str(self.db_path))
@@ -316,9 +335,12 @@ class TestMigrationFrom08x(unittest.TestCase):
        cmd_records = cursor.fetchall()

        # All Processes should have non-empty cmd (test data has json.dumps([extractor, '--version']))
-        expected_count = len(self.original_data['archiveresults'])
-        self.assertEqual(len(cmd_records), expected_count,
-                        f"Expected {expected_count} Processes with cmd, got {len(cmd_records)}")
+        expected_count = len(self.original_data["archiveresults"])
+        self.assertEqual(
+            len(cmd_records),
+            expected_count,
+            f"Expected {expected_count} Processes with cmd, got {len(cmd_records)}",
+        )

        conn.close()

@@ -329,7 +351,7 @@ class TestMigrationDataIntegrity08x(unittest.TestCase):
    def test_no_duplicate_snapshots_after_migration(self):
        """Migration should not create duplicate snapshots."""
        work_dir = Path(tempfile.mkdtemp())
-        db_path = work_dir / 'index.sqlite3'
+        db_path = work_dir / "index.sqlite3"

        try:
            create_data_dir_structure(work_dir)
@@ -338,7 +360,7 @@ class TestMigrationDataIntegrity08x(unittest.TestCase):
            conn.close()
            seed_0_8_data(db_path)

-            result = run_archivebox(work_dir, ['init'], timeout=45)
+            result = run_archivebox(work_dir, ["init"], timeout=45)
            self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

            # Check for duplicate URLs
@@ -359,7 +381,7 @@ class TestMigrationDataIntegrity08x(unittest.TestCase):
    def test_no_orphaned_archiveresults_after_migration(self):
        """Migration should not leave orphaned ArchiveResults."""
        work_dir = Path(tempfile.mkdtemp())
-        db_path = work_dir / 'index.sqlite3'
+        db_path = work_dir / "index.sqlite3"

        try:
            create_data_dir_structure(work_dir)
@@ -368,7 +390,7 @@ class TestMigrationDataIntegrity08x(unittest.TestCase):
            conn.close()
            seed_0_8_data(db_path)

-            result = run_archivebox(work_dir, ['init'], timeout=45)
+            result = run_archivebox(work_dir, ["init"], timeout=45)
            self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

            ok, msg = verify_foreign_keys(db_path)
@@ -380,7 +402,7 @@ class TestMigrationDataIntegrity08x(unittest.TestCase):
    def test_timestamps_preserved_after_migration(self):
        """Migration should preserve original timestamps."""
        work_dir = Path(tempfile.mkdtemp())
-        db_path = work_dir / 'index.sqlite3'
+        db_path = work_dir / "index.sqlite3"

        try:
            create_data_dir_structure(work_dir)
@@ -389,9 +411,9 @@ class TestMigrationDataIntegrity08x(unittest.TestCase):
            conn.close()
            original_data = seed_0_8_data(db_path)

-            original_timestamps = {s['url']: s['timestamp'] for s in original_data['snapshots']}
+            original_timestamps = {s["url"]: s["timestamp"] for s in original_data["snapshots"]}

-            result = run_archivebox(work_dir, ['init'], timeout=45)
+            result = run_archivebox(work_dir, ["init"], timeout=45)
            self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

            conn = sqlite3.connect(str(db_path))
@@ -402,8 +424,9 @@ class TestMigrationDataIntegrity08x(unittest.TestCase):

            for url, original_ts in original_timestamps.items():
                self.assertEqual(
-                    migrated_timestamps.get(url), original_ts,
-                    f"Timestamp changed for {url}: {original_ts} -> {migrated_timestamps.get(url)}"
+                    migrated_timestamps.get(url),
+                    original_ts,
+                    f"Timestamp changed for {url}: {original_ts} -> {migrated_timestamps.get(url)}",
                )

        finally:
@@ -412,7 +435,7 @@ class TestMigrationDataIntegrity08x(unittest.TestCase):
    def test_crawl_data_preserved_after_migration(self):
        """Migration should preserve crawl metadata (urls, label, status)."""
        work_dir = Path(tempfile.mkdtemp())
-        db_path = work_dir / 'index.sqlite3'
+        db_path = work_dir / "index.sqlite3"

        try:
            create_data_dir_structure(work_dir)
@@ -421,19 +444,19 @@ class TestMigrationDataIntegrity08x(unittest.TestCase):
            conn.close()
            original_data = seed_0_8_data(db_path)

-            result = run_archivebox(work_dir, ['init'], timeout=45)
+            result = run_archivebox(work_dir, ["init"], timeout=45)
            self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

            conn = sqlite3.connect(str(db_path))
            cursor = conn.cursor()

            # Check each crawl's data is preserved
-            for crawl in original_data['crawls']:
-                cursor.execute("SELECT urls, label FROM crawls_crawl WHERE id = ?", (crawl['id'],))
+            for crawl in original_data["crawls"]:
+                cursor.execute("SELECT urls, label FROM crawls_crawl WHERE id = ?", (crawl["id"],))
                row = cursor.fetchone()
                self.assertIsNotNone(row, f"Crawl {crawl['id']} not found after migration")
-                self.assertEqual(row[0], crawl['urls'], f"URLs mismatch for crawl {crawl['id']}")
-                self.assertEqual(row[1], crawl['label'], f"Label mismatch for crawl {crawl['id']}")
+                self.assertEqual(row[0], crawl["urls"], f"URLs mismatch for crawl {crawl['id']}")
+                self.assertEqual(row[1], crawl["label"], f"Label mismatch for crawl {crawl['id']}")

            conn.close()

@@ -443,7 +466,7 @@ class TestMigrationDataIntegrity08x(unittest.TestCase):
    def test_tag_associations_preserved_after_migration(self):
        """Migration should preserve snapshot-tag associations."""
        work_dir = Path(tempfile.mkdtemp())
-        db_path = work_dir / 'index.sqlite3'
+        db_path = work_dir / "index.sqlite3"

        try:
            create_data_dir_structure(work_dir)
@@ -459,7 +482,7 @@ class TestMigrationDataIntegrity08x(unittest.TestCase):
            original_count = cursor.fetchone()[0]
            conn.close()

-            result = run_archivebox(work_dir, ['init'], timeout=45)
+            result = run_archivebox(work_dir, ["init"], timeout=45)
            self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

            # Count tag associations after migration
@@ -469,8 +492,11 @@ class TestMigrationDataIntegrity08x(unittest.TestCase):
            migrated_count = cursor.fetchone()[0]
            conn.close()

-            self.assertEqual(migrated_count, original_count,
-                           f"Tag associations changed: {original_count} -> {migrated_count}")
+            self.assertEqual(
+                migrated_count,
+                original_count,
+                f"Tag associations changed: {original_count} -> {migrated_count}",
+            )

        finally:
            shutil.rmtree(work_dir, ignore_errors=True)
@@ -482,7 +508,7 @@ class TestFilesystemMigration08to09(unittest.TestCase):
    def setUp(self):
        """Create a temporary directory for testing."""
        self.work_dir = Path(tempfile.mkdtemp())
-        self.db_path = self.work_dir / 'index.sqlite3'
+        self.db_path = self.work_dir / "index.sqlite3"

    def tearDown(self):
        """Clean up temporary directory."""
@@ -500,12 +526,13 @@ class TestFilesystemMigration08to09(unittest.TestCase):
        5. Old archive/timestamp/ directories are cleaned up
        """
        # Use the real 0.7.2 database which has actual ArchiveResults with files
-        gold_db = Path('/Users/squash/Local/Code/archiveboxes/archivebox-migration-path/archivebox-v0.7.2/data')
+        gold_db = Path("/Users/squash/Local/Code/archiveboxes/archivebox-migration-path/archivebox-v0.7.2/data")
        if not gold_db.exists():
            self.skipTest(f"Gold standard database not found at {gold_db}")

        # Copy gold database to test directory
        import shutil
+
        for item in gold_db.iterdir():
            if item.is_dir():
                shutil.copytree(item, self.work_dir / item.name, dirs_exist_ok=True)
@@ -513,23 +540,23 @@ class TestFilesystemMigration08to09(unittest.TestCase):
                shutil.copy2(item, self.work_dir / item.name)

        # Count archive directories and files BEFORE migration
-        archive_dir = self.work_dir / 'archive'
-        dirs_before = list(archive_dir.glob('*')) if archive_dir.exists() else []
+        archive_dir = self.work_dir / "archive"
+        dirs_before = list(archive_dir.glob("*")) if archive_dir.exists() else []
        dirs_before_count = len([d for d in dirs_before if d.is_dir()])

        # Count total files in all archive directories
        files_before = []
        for d in dirs_before:
            if d.is_dir():
-                files_before.extend([f for f in d.rglob('*') if f.is_file()])
+                files_before.extend([f for f in d.rglob("*") if f.is_file()])
        files_before_count = len(files_before)

        # Sample some specific files to check they're preserved
        sample_files = [
-            'favicon.ico',
-            'screenshot.png',
-            'singlefile.html',
-            'headers.json',
+            "favicon.ico",
+            "screenshot.png",
+            "singlefile.html",
+            "headers.json",
        ]
        sample_paths_before = {}
        for d in dirs_before:
@@ -544,17 +571,17 @@ class TestFilesystemMigration08to09(unittest.TestCase):
        print(f"[*] Sample files found: {len(sample_paths_before)}")

        # Run init to trigger migration
-        result = run_archivebox(self.work_dir, ['init'], timeout=60)
+        result = run_archivebox(self.work_dir, ["init"], timeout=60)
        self.assertEqual(result.returncode, 0, f"Init (migration) failed: {result.stderr}")

        # Count archive directories and files AFTER migration
-        dirs_after = list(archive_dir.glob('*')) if archive_dir.exists() else []
+        dirs_after = list(archive_dir.glob("*")) if archive_dir.exists() else []
        dirs_after_count = len([d for d in dirs_after if d.is_dir()])

        files_after = []
        for d in dirs_after:
            if d.is_dir():
-                files_after.extend([f for f in d.rglob('*') if f.is_file()])
+                files_after.extend([f for f in d.rglob("*") if f.is_file()])
        files_after_count = len(files_after)

        # Verify sample files still exist
@@ -571,26 +598,32 @@ class TestFilesystemMigration08to09(unittest.TestCase):
        print(f"[*] Sample files found: {len(sample_paths_after)}")

        # Verify files still in old structure after migration (not moved yet)
-        self.assertEqual(dirs_before_count, dirs_after_count,
-                        f"Archive directories lost during migration: {dirs_before_count} -> {dirs_after_count}")
-        self.assertEqual(files_before_count, files_after_count,
-                        f"Files lost during migration: {files_before_count} -> {files_after_count}")
+        self.assertEqual(
+            dirs_before_count,
+            dirs_after_count,
+            f"Archive directories lost during migration: {dirs_before_count} -> {dirs_after_count}",
+        )
+        self.assertEqual(
+            files_before_count,
+            files_after_count,
+            f"Files lost during migration: {files_before_count} -> {files_after_count}",
+        )

        # Run update to trigger filesystem reorganization
        print("\n[*] Running archivebox update to reorganize filesystem...")
-        result = run_archivebox(self.work_dir, ['update'], timeout=120)
+        result = run_archivebox(self.work_dir, ["update"], timeout=120)
        self.assertEqual(result.returncode, 0, f"Update failed: {result.stderr}")

        # Check new filesystem structure
        # New structure: users/username/snapshots/YYYYMMDD/example.com/snap-uuid-here/output.ext
-        users_dir = self.work_dir / 'users'
+        users_dir = self.work_dir / "users"
        snapshots_base = None

        if users_dir.exists():
            # Find the snapshots directory
            for user_dir in users_dir.iterdir():
                if user_dir.is_dir():
-                    user_snapshots = user_dir / 'snapshots'
+                    user_snapshots = user_dir / "snapshots"
                    if user_snapshots.exists():
                        snapshots_base = user_snapshots
                        break
@@ -610,7 +643,7 @@ class TestFilesystemMigration08to09(unittest.TestCase):
                            for snap_dir in domain_dir.iterdir():
                                if snap_dir.is_dir():
                                    # Files are directly in snap-uuid/ directory (no plugin subdirs)
-                                    for f in snap_dir.rglob('*'):
+                                    for f in snap_dir.rglob("*"):
                                        if f.is_file():
                                            files_new_structure.append(f)
                                            # Track sample files
@@ -622,15 +655,15 @@ class TestFilesystemMigration08to09(unittest.TestCase):
        print(f"[*] Sample files in new structure: {len(new_sample_files)}")

        # Check old structure (should be gone or empty)
-        old_archive_dir = self.work_dir / 'archive'
+        old_archive_dir = self.work_dir / "archive"
        old_files_remaining = []
        unmigrated_dirs = []
        if old_archive_dir.exists():
-            for d in old_archive_dir.glob('*'):
+            for d in old_archive_dir.glob("*"):
                # Only count REAL directories, not symlinks (symlinks are the migrated ones)
-                if d.is_dir(follow_symlinks=False) and d.name.replace('.', '').isdigit():
+                if d.is_dir(follow_symlinks=False) and d.name.replace(".", "").isdigit():
                    # This is a timestamp directory (old structure)
-                    files_in_dir = [f for f in d.rglob('*') if f.is_file()]
+                    files_in_dir = [f for f in d.rglob("*") if f.is_file()]
                    if files_in_dir:
                        unmigrated_dirs.append((d.name, len(files_in_dir)))
                        old_files_remaining.extend(files_in_dir)
@@ -641,30 +674,48 @@ class TestFilesystemMigration08to09(unittest.TestCase):
            print(f"[*] Unmigrated directories: {unmigrated_dirs}")

        # CRITICAL: Verify files were moved to new structure
-        self.assertGreater(files_new_count, 0,
-                          "No files found in new structure after update")
+        self.assertGreater(
+            files_new_count,
+            0,
+            "No files found in new structure after update",
+        )

        # CRITICAL: Verify old structure is cleaned up
-        self.assertEqual(old_files_count, 0,
-                        f"Old structure not cleaned up: {old_files_count} files still in archive/timestamp/ directories")
+        self.assertEqual(
+            old_files_count,
+            0,
+            f"Old structure not cleaned up: {old_files_count} files still in archive/timestamp/ directories",
+        )

        # CRITICAL: Verify all files were moved (total count should match)
        total_after_update = files_new_count + old_files_count
-        self.assertEqual(files_before_count, total_after_update,
-                        f"Files lost during reorganization: {files_before_count} before → {total_after_update} after")
+        self.assertEqual(
+            files_before_count,
+            total_after_update,
+            f"Files lost during reorganization: {files_before_count} before → {total_after_update} after",
+        )

        # CRITICAL: Verify sample files exist in new structure
-        self.assertGreater(len(new_sample_files), 0,
-                          "Sample files not found in new structure")
+        self.assertGreater(
+            len(new_sample_files),
+            0,
+            "Sample files not found in new structure",
+        )

        # Verify new path format
        for path_key, file_path in new_sample_files.items():
            # Path should contain: snapshots/YYYYMMDD/domain/snap-uuid/plugin/file
            path_parts = file_path.parts
-            self.assertIn('snapshots', path_parts,
-                         f"New path should contain 'snapshots': {file_path}")
-            self.assertIn('users', path_parts,
-                         f"New path should contain 'users': {file_path}")
+            self.assertIn(
+                "snapshots",
+                path_parts,
+                f"New path should contain 'snapshots': {file_path}",
+            )
+            self.assertIn(
+                "users",
+                path_parts,
+                f"New path should contain 'users': {file_path}",
+            )
            print(f"    ✓ {path_key} → {file_path.relative_to(self.work_dir)}")

        # Verify Process and Binary records were created
@@ -692,24 +743,33 @@ class TestFilesystemMigration08to09(unittest.TestCase):

        # Verify data migration happened correctly
        # The 0.7.2 gold database has 44 ArchiveResults
-        self.assertEqual(archiveresult_count, 44,
-                        f"Expected 44 ArchiveResults from 0.7.2 database, got {archiveresult_count}")
+        self.assertEqual(
+            archiveresult_count,
+            44,
+            f"Expected 44 ArchiveResults from 0.7.2 database, got {archiveresult_count}",
+        )

        # Each ArchiveResult should create one Process record
-        self.assertEqual(process_count, 44,
-                        f"Expected 44 Process records (1 per ArchiveResult), got {process_count}")
+        self.assertEqual(
+            process_count,
+            44,
+            f"Expected 44 Process records (1 per ArchiveResult), got {process_count}",
+        )

        # The 44 ArchiveResults use 7 unique binaries (curl, wget, etc.)
-        self.assertEqual(binary_count, 7,
-                        f"Expected 7 unique Binary records, got {binary_count}")
+        self.assertEqual(
+            binary_count,
+            7,
+            f"Expected 7 unique Binary records, got {binary_count}",
+        )

        # ALL ArchiveResults should be linked to Process records
-        self.assertEqual(linked_count, 44,
-                        f"Expected all 44 ArchiveResults linked to Process, got {linked_count}")
+        self.assertEqual(
+            linked_count,
+            44,
+            f"Expected all 44 ArchiveResults linked to Process, got {linked_count}",
+        )


-
-
-
-if __name__ == '__main__':
+if __name__ == "__main__":
    unittest.main()
--- a/archivebox/tests/test_migrations_fresh.py
+++ b/archivebox/tests/test_migrations_fresh.py
@@ -22,13 +22,13 @@ class TestFreshInstall(unittest.TestCase):
        work_dir = Path(tempfile.mkdtemp())

        try:
-            result = run_archivebox(work_dir, ['init'])
+            result = run_archivebox(work_dir, ["init"])
            self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

            # Verify database was created
-            self.assertTrue((work_dir / 'index.sqlite3').exists(), "Database not created")
+            self.assertTrue((work_dir / "index.sqlite3").exists(), "Database not created")
            # Verify archive directory exists
-            self.assertTrue((work_dir / 'archive').is_dir(), "Archive dir not created")
+            self.assertTrue((work_dir / "archive").is_dir(), "Archive dir not created")

        finally:
            shutil.rmtree(work_dir, ignore_errors=True)
@@ -38,10 +38,10 @@ class TestFreshInstall(unittest.TestCase):
        work_dir = Path(tempfile.mkdtemp())

        try:
-            result = run_archivebox(work_dir, ['init'])
+            result = run_archivebox(work_dir, ["init"])
            self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

-            result = run_archivebox(work_dir, ['status'])
+            result = run_archivebox(work_dir, ["status"])
            self.assertEqual(result.returncode, 0, f"Status failed: {result.stderr}")

        finally:
@@ -52,14 +52,14 @@ class TestFreshInstall(unittest.TestCase):
        work_dir = Path(tempfile.mkdtemp())

        try:
-            result = run_archivebox(work_dir, ['init'])
+            result = run_archivebox(work_dir, ["init"])
            self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

            # Add a URL with --index-only for speed
-            result = run_archivebox(work_dir, ['add', '--index-only', 'https://example.com'])
+            result = run_archivebox(work_dir, ["add", "--index-only", "https://example.com"])
            self.assertEqual(result.returncode, 0, f"Add command failed: {result.stderr}")

-            conn = sqlite3.connect(str(work_dir / 'index.sqlite3'))
+            conn = sqlite3.connect(str(work_dir / "index.sqlite3"))
            cursor = conn.cursor()

            # Verify a Crawl was created
@@ -82,18 +82,18 @@ class TestFreshInstall(unittest.TestCase):
        work_dir = Path(tempfile.mkdtemp())

        try:
-            result = run_archivebox(work_dir, ['init'])
+            result = run_archivebox(work_dir, ["init"])
            self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

-            result = run_archivebox(work_dir, ['add', '--index-only', 'https://example.com'])
+            result = run_archivebox(work_dir, ["add", "--index-only", "https://example.com"])
            self.assertEqual(result.returncode, 0, f"Add failed: {result.stderr}")

-            result = run_archivebox(work_dir, ['list'])
+            result = run_archivebox(work_dir, ["list"])
            self.assertEqual(result.returncode, 0, f"List failed: {result.stderr}")

            # Verify the URL appears in output
            output = result.stdout + result.stderr
-            self.assertIn('example.com', output, f"Added URL not in list output: {output[:500]}")
+            self.assertIn("example.com", output, f"Added URL not in list output: {output[:500]}")

        finally:
            shutil.rmtree(work_dir, ignore_errors=True)
@@ -103,10 +103,10 @@ class TestFreshInstall(unittest.TestCase):
        work_dir = Path(tempfile.mkdtemp())

        try:
-            result = run_archivebox(work_dir, ['init'])
+            result = run_archivebox(work_dir, ["init"])
            self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

-            conn = sqlite3.connect(str(work_dir / 'index.sqlite3'))
+            conn = sqlite3.connect(str(work_dir / "index.sqlite3"))
            cursor = conn.cursor()
            cursor.execute("SELECT COUNT(*) FROM django_migrations")
            count = cursor.fetchone()[0]
@@ -123,16 +123,16 @@ class TestFreshInstall(unittest.TestCase):
        work_dir = Path(tempfile.mkdtemp())

        try:
-            result = run_archivebox(work_dir, ['init'])
+            result = run_archivebox(work_dir, ["init"])
            self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

-            conn = sqlite3.connect(str(work_dir / 'index.sqlite3'))
+            conn = sqlite3.connect(str(work_dir / "index.sqlite3"))
            cursor = conn.cursor()
            cursor.execute("SELECT name FROM django_migrations WHERE app='core' ORDER BY name")
            migrations = [row[0] for row in cursor.fetchall()]
            conn.close()

-            self.assertIn('0001_initial', migrations)
+            self.assertIn("0001_initial", migrations)

        finally:
            shutil.rmtree(work_dir, ignore_errors=True)
@@ -146,16 +146,16 @@ class TestSchemaIntegrity(unittest.TestCase):
        work_dir = Path(tempfile.mkdtemp())

        try:
-            result = run_archivebox(work_dir, ['init'])
+            result = run_archivebox(work_dir, ["init"])
            self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

-            conn = sqlite3.connect(str(work_dir / 'index.sqlite3'))
+            conn = sqlite3.connect(str(work_dir / "index.sqlite3"))
            cursor = conn.cursor()
-            cursor.execute('PRAGMA table_info(core_snapshot)')
+            cursor.execute("PRAGMA table_info(core_snapshot)")
            columns = {row[1] for row in cursor.fetchall()}
            conn.close()

-            required = {'id', 'url', 'timestamp', 'title', 'status', 'created_at', 'modified_at'}
+            required = {"id", "url", "timestamp", "title", "status", "created_at", "modified_at"}
            for col in required:
                self.assertIn(col, columns, f"Missing column: {col}")

@@ -167,16 +167,16 @@ class TestSchemaIntegrity(unittest.TestCase):
        work_dir = Path(tempfile.mkdtemp())

        try:
-            result = run_archivebox(work_dir, ['init'])
+            result = run_archivebox(work_dir, ["init"])
            self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

-            conn = sqlite3.connect(str(work_dir / 'index.sqlite3'))
+            conn = sqlite3.connect(str(work_dir / "index.sqlite3"))
            cursor = conn.cursor()
-            cursor.execute('PRAGMA table_info(core_archiveresult)')
+            cursor.execute("PRAGMA table_info(core_archiveresult)")
            columns = {row[1] for row in cursor.fetchall()}
            conn.close()

-            required = {'id', 'snapshot_id', 'plugin', 'status', 'created_at', 'modified_at'}
+            required = {"id", "snapshot_id", "plugin", "status", "created_at", "modified_at"}
            for col in required:
                self.assertIn(col, columns, f"Missing column: {col}")

@@ -188,16 +188,16 @@ class TestSchemaIntegrity(unittest.TestCase):
        work_dir = Path(tempfile.mkdtemp())

        try:
-            result = run_archivebox(work_dir, ['init'])
+            result = run_archivebox(work_dir, ["init"])
            self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

-            conn = sqlite3.connect(str(work_dir / 'index.sqlite3'))
+            conn = sqlite3.connect(str(work_dir / "index.sqlite3"))
            cursor = conn.cursor()
-            cursor.execute('PRAGMA table_info(core_tag)')
+            cursor.execute("PRAGMA table_info(core_tag)")
            columns = {row[1] for row in cursor.fetchall()}
            conn.close()

-            required = {'id', 'name', 'slug'}
+            required = {"id", "name", "slug"}
            for col in required:
                self.assertIn(col, columns, f"Missing column: {col}")

@@ -209,21 +209,21 @@ class TestSchemaIntegrity(unittest.TestCase):
        work_dir = Path(tempfile.mkdtemp())

        try:
-            result = run_archivebox(work_dir, ['init'])
+            result = run_archivebox(work_dir, ["init"])
            self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

-            conn = sqlite3.connect(str(work_dir / 'index.sqlite3'))
+            conn = sqlite3.connect(str(work_dir / "index.sqlite3"))
            cursor = conn.cursor()
-            cursor.execute('PRAGMA table_info(crawls_crawl)')
+            cursor.execute("PRAGMA table_info(crawls_crawl)")
            columns = {row[1] for row in cursor.fetchall()}
            conn.close()

-            required = {'id', 'urls', 'status', 'created_at', 'created_by_id'}
+            required = {"id", "urls", "status", "created_at", "created_by_id"}
            for col in required:
                self.assertIn(col, columns, f"Missing column: {col}")

            # seed_id should NOT exist (removed in 0.9.x)
-            self.assertNotIn('seed_id', columns, "seed_id column should not exist in 0.9.x")
+            self.assertNotIn("seed_id", columns, "seed_id column should not exist in 0.9.x")

        finally:
            shutil.rmtree(work_dir, ignore_errors=True)
@@ -237,17 +237,17 @@ class TestMultipleSnapshots(unittest.TestCase):
        work_dir = Path(tempfile.mkdtemp())

        try:
-            result = run_archivebox(work_dir, ['init'])
+            result = run_archivebox(work_dir, ["init"])
            self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

            # Add URLs one at a time
-            result = run_archivebox(work_dir, ['add', '--index-only', 'https://example.com'])
+            result = run_archivebox(work_dir, ["add", "--index-only", "https://example.com"])
            self.assertEqual(result.returncode, 0, f"Add 1 failed: {result.stderr}")

-            result = run_archivebox(work_dir, ['add', '--index-only', 'https://example.org'])
+            result = run_archivebox(work_dir, ["add", "--index-only", "https://example.org"])
            self.assertEqual(result.returncode, 0, f"Add 2 failed: {result.stderr}")

-            conn = sqlite3.connect(str(work_dir / 'index.sqlite3'))
+            conn = sqlite3.connect(str(work_dir / "index.sqlite3"))
            cursor = conn.cursor()

            # Verify snapshots were created
@@ -270,13 +270,13 @@ class TestMultipleSnapshots(unittest.TestCase):
        work_dir = Path(tempfile.mkdtemp())

        try:
-            result = run_archivebox(work_dir, ['init'])
+            result = run_archivebox(work_dir, ["init"])
            self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")

-            result = run_archivebox(work_dir, ['add', '--index-only', 'https://example.com'])
+            result = run_archivebox(work_dir, ["add", "--index-only", "https://example.com"])
            self.assertEqual(result.returncode, 0, f"Add failed: {result.stderr}")

-            conn = sqlite3.connect(str(work_dir / 'index.sqlite3'))
+            conn = sqlite3.connect(str(work_dir / "index.sqlite3"))
            cursor = conn.cursor()

            # Check that snapshot has a crawl_id
@@ -291,5 +291,5 @@ class TestMultipleSnapshots(unittest.TestCase):
            shutil.rmtree(work_dir, ignore_errors=True)


-if __name__ == '__main__':
+if __name__ == "__main__":
    unittest.main()
--- a/archivebox/tests/test_persona_runtime.py
+++ b/archivebox/tests/test_persona_runtime.py
@@ -53,23 +53,23 @@ def test_persona_prepare_runtime_for_crawl_clones_and_cleans_profile(initialized
            'template_dir_recorded': (runtime_root / 'template_dir.txt').read_text().strip(),
            'chrome_binary_recorded': (runtime_root / 'chrome_binary.txt').read_text().strip(),
        }))
-        """
+        """,
    )

    stdout, stderr, code = run_python_cwd(script, cwd=initialized_archive, timeout=60)
    assert code == 0, stderr

    payload = json.loads(stdout.strip().splitlines()[-1])
-    assert payload['runtime_root_exists'] is True
-    assert payload['runtime_profile_exists'] is True
-    assert payload['runtime_downloads_exists'] is True
-    assert payload['preferences_copied'] is True
-    assert payload['singleton_removed'] is True
-    assert payload['cache_removed'] is True
-    assert payload['log_removed'] is True
-    assert payload['persona_name_recorded'] == 'Default'
-    assert payload['template_dir_recorded'].endswith('/personas/Default/chrome_user_data')
-    assert payload['chrome_binary_recorded'] == '/Applications/Chromium.app/Contents/MacOS/Chromium'
+    assert payload["runtime_root_exists"] is True
+    assert payload["runtime_profile_exists"] is True
+    assert payload["runtime_downloads_exists"] is True
+    assert payload["preferences_copied"] is True
+    assert payload["singleton_removed"] is True
+    assert payload["cache_removed"] is True
+    assert payload["log_removed"] is True
+    assert payload["persona_name_recorded"] == "Default"
+    assert payload["template_dir_recorded"].endswith("/personas/Default/chrome_user_data")
+    assert payload["chrome_binary_recorded"] == "/Applications/Chromium.app/Contents/MacOS/Chromium"


 def test_persona_cleanup_runtime_for_crawl_removes_only_runtime_copy(initialized_archive):
@@ -102,15 +102,15 @@ def test_persona_cleanup_runtime_for_crawl_removes_only_runtime_copy(initialized
            'runtime_removed': not runtime_root.exists(),
            'template_still_exists': (template_dir / 'Default' / 'Preferences').exists(),
        }))
-        """
+        """,
    )

    stdout, stderr, code = run_python_cwd(script, cwd=initialized_archive, timeout=60)
    assert code == 0, stderr

    payload = json.loads(stdout.strip().splitlines()[-1])
-    assert payload['runtime_removed'] is True
-    assert payload['template_still_exists'] is True
+    assert payload["runtime_removed"] is True
+    assert payload["template_still_exists"] is True


 def test_crawl_resolve_persona_raises_for_missing_persona_id(initialized_archive):
@@ -135,15 +135,15 @@ def test_crawl_resolve_persona_raises_for_missing_persona_id(initialized_archive
            print(json.dumps({'raised': True, 'message': str(err)}))
        else:
            raise SystemExit('resolve_persona unexpectedly succeeded')
-        """
+        """,
    )

    stdout, stderr, code = run_python_cwd(script, cwd=initialized_archive, timeout=60)
    assert code == 0, stderr

    payload = json.loads(stdout.strip().splitlines()[-1])
-    assert payload['raised'] is True
-    assert 'references missing Persona' in payload['message']
+    assert payload["raised"] is True
+    assert "references missing Persona" in payload["message"]


 def test_get_config_raises_for_missing_persona_id(initialized_archive):
@@ -169,12 +169,12 @@ def test_get_config_raises_for_missing_persona_id(initialized_archive):
            print(json.dumps({'raised': True, 'message': str(err)}))
        else:
            raise SystemExit('get_config unexpectedly succeeded')
-        """
+        """,
    )

    stdout, stderr, code = run_python_cwd(script, cwd=initialized_archive, timeout=60)
    assert code == 0, stderr

    payload = json.loads(stdout.strip().splitlines()[-1])
-    assert payload['raised'] is True
-    assert 'references missing Persona' in payload['message']
+    assert payload["raised"] is True
+    assert "references missing Persona" in payload["message"]
--- a/archivebox/tests/test_process_runtime_paths.py
+++ b/archivebox/tests/test_process_runtime_paths.py
@@ -3,7 +3,7 @@ import unittest
 from pathlib import Path


-os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'archivebox.settings')
+os.environ.setdefault("DJANGO_SETTINGS_MODULE", "archivebox.settings")


 from archivebox.machine.models import Process
@@ -13,26 +13,25 @@ class TestProcessRuntimePaths(unittest.TestCase):
    def test_hook_processes_use_isolated_runtime_dir(self):
        process = Process(
            process_type=Process.TypeChoices.HOOK,
-            pwd='/tmp/archive/example/chrome',
-            cmd=['node', '/plugins/chrome/on_Snapshot__11_chrome_wait.js', '--url=https://example.com'],
+            pwd="/tmp/archive/example/chrome",
+            cmd=["node", "/plugins/chrome/on_Snapshot__11_chrome_wait.js", "--url=https://example.com"],
        )

-        expected_dir = Path('/tmp/archive/example/chrome/.hooks/on_Snapshot__11_chrome_wait.js')
+        expected_dir = Path("/tmp/archive/example/chrome/.hooks/on_Snapshot__11_chrome_wait.js")
        self.assertEqual(process.runtime_dir, expected_dir)
-        self.assertEqual(process.stdout_file, expected_dir / 'stdout.log')
-        self.assertEqual(process.stderr_file, expected_dir / 'stderr.log')
-        self.assertEqual(process.pid_file, expected_dir / 'process.pid')
+        self.assertEqual(process.stdout_file, expected_dir / "stdout.log")
+        self.assertEqual(process.stderr_file, expected_dir / "stderr.log")
+        self.assertEqual(process.pid_file, expected_dir / "process.pid")

    def test_non_hook_processes_keep_runtime_files_in_pwd(self):
        process = Process(
            process_type=Process.TypeChoices.WORKER,
-            pwd='/tmp/archive/example',
-            cmd=['archivebox', 'run', '--snapshot-id', '123'],
+            pwd="/tmp/archive/example",
+            cmd=["archivebox", "run", "--snapshot-id", "123"],
        )

-        expected_dir = Path('/tmp/archive/example')
+        expected_dir = Path("/tmp/archive/example")
        self.assertEqual(process.runtime_dir, expected_dir)
-        self.assertEqual(process.stdout_file, expected_dir / 'stdout.log')
-        self.assertEqual(process.stderr_file, expected_dir / 'stderr.log')
-        self.assertEqual(process.pid_file, expected_dir / 'process.pid')
-
+        self.assertEqual(process.stdout_file, expected_dir / "stdout.log")
+        self.assertEqual(process.stderr_file, expected_dir / "stderr.log")
+        self.assertEqual(process.pid_file, expected_dir / "process.pid")
--- a/archivebox/tests/test_recursive_crawl.py
+++ b/archivebox/tests/test_recursive_crawl.py
@@ -11,7 +11,6 @@ from pathlib import Path
 import pytest


-
 def wait_for_db_condition(timeout, condition, interval=0.5):
    deadline = time.time() + timeout
    while time.time() < deadline:
@@ -45,9 +44,7 @@ def run_add_until(args, env, condition, timeout=120):
        env=env,
    )

-    assert wait_for_db_condition(timeout=timeout, condition=condition), (
-        f"Timed out waiting for condition while running: {' '.join(args)}"
-    )
+    assert wait_for_db_condition(timeout=timeout, condition=condition), f"Timed out waiting for condition while running: {' '.join(args)}"
    return stop_process(proc)


@@ -60,26 +57,28 @@ def test_background_hooks_dont_block_parser_extractors(tmp_path, process, recurs

    # Enable only parser extractors and background hooks for this test
    env = os.environ.copy()
-    env.update({
-        # Disable most extractors
-        "SAVE_WGET": "false",
-        "SAVE_SINGLEFILE": "false",
-        "SAVE_READABILITY": "false",
-        "SAVE_MERCURY": "false",
-        "SAVE_HTMLTOTEXT": "false",
-        "SAVE_PDF": "false",
-        "SAVE_SCREENSHOT": "false",
-        "SAVE_DOM": "false",
-        "SAVE_HEADERS": "false",
-        "SAVE_GIT": "false",
-        "SAVE_YTDLP": "false",
-        "SAVE_ARCHIVEDOTORG": "false",
-        "SAVE_TITLE": "false",
-        "SAVE_FAVICON": "true",
-    })
+    env.update(
+        {
+            # Disable most extractors
+            "SAVE_WGET": "false",
+            "SAVE_SINGLEFILE": "false",
+            "SAVE_READABILITY": "false",
+            "SAVE_MERCURY": "false",
+            "SAVE_HTMLTOTEXT": "false",
+            "SAVE_PDF": "false",
+            "SAVE_SCREENSHOT": "false",
+            "SAVE_DOM": "false",
+            "SAVE_HEADERS": "false",
+            "SAVE_GIT": "false",
+            "SAVE_YTDLP": "false",
+            "SAVE_ARCHIVEDOTORG": "false",
+            "SAVE_TITLE": "false",
+            "SAVE_FAVICON": "true",
+        },
+    )

    proc = subprocess.Popen(
-        ['archivebox', 'add', '--depth=1', '--plugins=favicon,parse_html_urls', recursive_test_site['root_url']],
+        ["archivebox", "add", "--depth=1", "--plugins=favicon,parse_html_urls", recursive_test_site["root_url"]],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        text=True,
@@ -88,9 +87,12 @@ def test_background_hooks_dont_block_parser_extractors(tmp_path, process, recurs

    assert wait_for_db_condition(
        timeout=120,
-        condition=lambda c: c.execute(
-            "SELECT COUNT(*) FROM core_archiveresult WHERE plugin LIKE 'parse_%_urls' AND status IN ('started', 'succeeded', 'failed')"
-        ).fetchone()[0] > 0,
+        condition=lambda c: (
+            c.execute(
+                "SELECT COUNT(*) FROM core_archiveresult WHERE plugin LIKE 'parse_%_urls' AND status IN ('started', 'succeeded', 'failed')",
+            ).fetchone()[0]
+            > 0
+        ),
    ), "Parser extractors never progressed beyond queued status"
    stdout, stderr = stop_process(proc)

@@ -99,18 +101,18 @@ def test_background_hooks_dont_block_parser_extractors(tmp_path, process, recurs
    if stdout:
        print(f"\n=== STDOUT (last 2000 chars) ===\n{stdout[-2000:]}\n=== END STDOUT ===\n")

-    conn = sqlite3.connect('index.sqlite3')
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()

    snapshots = c.execute("SELECT url, depth, status FROM core_snapshot").fetchall()
    bg_hooks = c.execute(
-        "SELECT plugin, status FROM core_archiveresult WHERE plugin IN ('favicon', 'consolelog', 'ssl', 'responses', 'redirects', 'staticfile') ORDER BY plugin"
+        "SELECT plugin, status FROM core_archiveresult WHERE plugin IN ('favicon', 'consolelog', 'ssl', 'responses', 'redirects', 'staticfile') ORDER BY plugin",
    ).fetchall()
    parser_extractors = c.execute(
-        "SELECT plugin, status FROM core_archiveresult WHERE plugin LIKE 'parse_%_urls' ORDER BY plugin"
+        "SELECT plugin, status FROM core_archiveresult WHERE plugin LIKE 'parse_%_urls' ORDER BY plugin",
    ).fetchall()
    all_extractors = c.execute(
-        "SELECT plugin, status FROM core_archiveresult ORDER BY plugin"
+        "SELECT plugin, status FROM core_archiveresult ORDER BY plugin",
    ).fetchall()

    conn.close()
@@ -122,14 +124,13 @@ def test_background_hooks_dont_block_parser_extractors(tmp_path, process, recurs
    )

    assert len(all_extractors) > 0, (
-        f"Should have extractors created for snapshot. "
-        f"If this fails, Snapshot.run() may not have started. "
-        f"Got: {all_extractors}"
+        f"Should have extractors created for snapshot. If this fails, Snapshot.run() may not have started. Got: {all_extractors}"
    )

    parser_statuses = [status for _, status in parser_extractors]
-    assert 'started' in parser_statuses or 'succeeded' in parser_statuses or 'failed' in parser_statuses, \
+    assert "started" in parser_statuses or "succeeded" in parser_statuses or "failed" in parser_statuses, (
        f"Parser extractors should have run, got statuses: {parser_statuses}. Background hooks: {bg_hooks}"
+    )


 def test_parser_extractors_emit_snapshot_jsonl(tmp_path, process, recursive_test_site):
@@ -137,26 +138,28 @@ def test_parser_extractors_emit_snapshot_jsonl(tmp_path, process, recursive_test
    os.chdir(tmp_path)

    env = os.environ.copy()
-    env.update({
-        "SAVE_WGET": "false",
-        "SAVE_SINGLEFILE": "false",
-        "SAVE_READABILITY": "false",
-        "SAVE_MERCURY": "false",
-        "SAVE_HTMLTOTEXT": "false",
-        "SAVE_PDF": "false",
-        "SAVE_SCREENSHOT": "false",
-        "SAVE_DOM": "false",
-        "SAVE_HEADERS": "false",
-        "SAVE_GIT": "false",
-        "SAVE_YTDLP": "false",
-        "SAVE_ARCHIVEDOTORG": "false",
-        "SAVE_TITLE": "false",
-        "SAVE_FAVICON": "false",
-        "USE_CHROME": "false",
-    })
+    env.update(
+        {
+            "SAVE_WGET": "false",
+            "SAVE_SINGLEFILE": "false",
+            "SAVE_READABILITY": "false",
+            "SAVE_MERCURY": "false",
+            "SAVE_HTMLTOTEXT": "false",
+            "SAVE_PDF": "false",
+            "SAVE_SCREENSHOT": "false",
+            "SAVE_DOM": "false",
+            "SAVE_HEADERS": "false",
+            "SAVE_GIT": "false",
+            "SAVE_YTDLP": "false",
+            "SAVE_ARCHIVEDOTORG": "false",
+            "SAVE_TITLE": "false",
+            "SAVE_FAVICON": "false",
+            "USE_CHROME": "false",
+        },
+    )

    result = subprocess.run(
-        ['archivebox', 'add', '--depth=0', '--plugins=wget,parse_html_urls', recursive_test_site['root_url']],
+        ["archivebox", "add", "--depth=0", "--plugins=wget,parse_html_urls", recursive_test_site["root_url"]],
        capture_output=True,
        text=True,
        env=env,
@@ -164,11 +167,11 @@ def test_parser_extractors_emit_snapshot_jsonl(tmp_path, process, recursive_test
    )
    assert result.returncode == 0, result.stderr

-    conn = sqlite3.connect('index.sqlite3')
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()

    parse_html = c.execute(
-        "SELECT id, status, output_str FROM core_archiveresult WHERE plugin LIKE '%parse_html_urls' ORDER BY id LIMIT 1"
+        "SELECT id, status, output_str FROM core_archiveresult WHERE plugin LIKE '%parse_html_urls' ORDER BY id LIMIT 1",
    ).fetchone()

    conn.close()
@@ -177,11 +180,10 @@ def test_parser_extractors_emit_snapshot_jsonl(tmp_path, process, recursive_test
        status = parse_html[1]
        output = parse_html[2] or ""

-        assert status in ['started', 'succeeded', 'failed'], \
-            f"60_parse_html_urls should have run, got status: {status}"
+        assert status in ["started", "succeeded", "failed"], f"60_parse_html_urls should have run, got status: {status}"

-        if status == 'succeeded' and output:
-            assert 'parsed' in output.lower(), "Parser summary should report parsed URLs"
+        if status == "succeeded" and output:
+            assert "parsed" in output.lower(), "Parser summary should report parsed URLs"

    urls_jsonl_files = list(Path("users/system/snapshots").rglob("parse_html_urls/**/urls.jsonl"))
    assert urls_jsonl_files, "parse_html_urls should write urls.jsonl output"
@@ -192,8 +194,7 @@ def test_parser_extractors_emit_snapshot_jsonl(tmp_path, process, recursive_test
            records.append(json.loads(line))

    assert records, "urls.jsonl should contain parsed Snapshot records"
-    assert all(record.get("type") == "Snapshot" for record in records), \
-        f"Expected Snapshot JSONL records, got: {records}"
+    assert all(record.get("type") == "Snapshot" for record in records), f"Expected Snapshot JSONL records, got: {records}"


 def test_recursive_crawl_creates_child_snapshots(tmp_path, process, recursive_test_site):
@@ -201,27 +202,29 @@ def test_recursive_crawl_creates_child_snapshots(tmp_path, process, recursive_te
    os.chdir(tmp_path)

    env = os.environ.copy()
-    env.update({
-        "URL_ALLOWLIST": r"127\.0\.0\.1[:/].*",
-        "SAVE_READABILITY": "false",
-        "SAVE_SINGLEFILE": "false",
-        "SAVE_MERCURY": "false",
-        "SAVE_SCREENSHOT": "false",
-        "SAVE_PDF": "false",
-        "SAVE_HEADERS": "false",
-        "SAVE_ARCHIVEDOTORG": "false",
-        "SAVE_GIT": "false",
-        "SAVE_YTDLP": "false",
-        "SAVE_TITLE": "false",
-    })
+    env.update(
+        {
+            "URL_ALLOWLIST": r"127\.0\.0\.1[:/].*",
+            "SAVE_READABILITY": "false",
+            "SAVE_SINGLEFILE": "false",
+            "SAVE_MERCURY": "false",
+            "SAVE_SCREENSHOT": "false",
+            "SAVE_PDF": "false",
+            "SAVE_HEADERS": "false",
+            "SAVE_ARCHIVEDOTORG": "false",
+            "SAVE_GIT": "false",
+            "SAVE_YTDLP": "false",
+            "SAVE_TITLE": "false",
+        },
+    )

    stdout, stderr = run_add_until(
-        ['archivebox', 'add', '--depth=1', '--plugins=wget,parse_html_urls', recursive_test_site['root_url']],
+        ["archivebox", "add", "--depth=1", "--plugins=wget,parse_html_urls", recursive_test_site["root_url"]],
        env=env,
        timeout=120,
        condition=lambda c: (
            c.execute("SELECT COUNT(*) FROM core_snapshot WHERE depth = 0").fetchone()[0] >= 1
-            and c.execute("SELECT COUNT(*) FROM core_snapshot WHERE depth = 1").fetchone()[0] >= len(recursive_test_site['child_urls'])
+            and c.execute("SELECT COUNT(*) FROM core_snapshot WHERE depth = 1").fetchone()[0] >= len(recursive_test_site["child_urls"])
        ),
    )

@@ -230,26 +233,26 @@ def test_recursive_crawl_creates_child_snapshots(tmp_path, process, recursive_te
    if stdout:
        print(f"\n=== STDOUT (last 2000 chars) ===\n{stdout[-2000:]}\n=== END STDOUT ===\n")

-    conn = sqlite3.connect('index.sqlite3')
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()

    all_snapshots = c.execute("SELECT url, depth FROM core_snapshot").fetchall()
    root_snapshot = c.execute(
-        "SELECT id, url, depth, parent_snapshot_id FROM core_snapshot WHERE depth = 0 ORDER BY created_at LIMIT 1"
+        "SELECT id, url, depth, parent_snapshot_id FROM core_snapshot WHERE depth = 0 ORDER BY created_at LIMIT 1",
    ).fetchone()
    child_snapshots = c.execute(
-        "SELECT id, url, depth, parent_snapshot_id FROM core_snapshot WHERE depth = 1"
+        "SELECT id, url, depth, parent_snapshot_id FROM core_snapshot WHERE depth = 1",
    ).fetchall()
    crawl = c.execute(
-        "SELECT id, max_depth FROM crawls_crawl ORDER BY created_at DESC LIMIT 1"
+        "SELECT id, max_depth FROM crawls_crawl ORDER BY created_at DESC LIMIT 1",
    ).fetchone()
    parser_status = c.execute(
        "SELECT plugin, status FROM core_archiveresult WHERE snapshot_id = ? AND plugin LIKE 'parse_%_urls'",
-        (root_snapshot[0] if root_snapshot else '',)
+        (root_snapshot[0] if root_snapshot else "",),
    ).fetchall()
    started_extractors = c.execute(
        "SELECT plugin, status FROM core_archiveresult WHERE snapshot_id = ? AND status = 'started'",
-        (root_snapshot[0] if root_snapshot else '',)
+        (root_snapshot[0] if root_snapshot else "",),
    ).fetchall()

    conn.close()
@@ -260,13 +263,13 @@ def test_recursive_crawl_creates_child_snapshots(tmp_path, process, recursive_te
    assert crawl is not None, "Crawl should be created"
    assert crawl[1] == 1, f"Crawl max_depth should be 1, got {crawl[1]}"

-    assert len(child_snapshots) > 0, \
+    assert len(child_snapshots) > 0, (
        f"Child snapshots should be created from monadical.com links. Parser status: {parser_status}. Started extractors blocking: {started_extractors}"
+    )

    for child_id, child_url, child_depth, parent_id in child_snapshots:
        assert child_depth == 1, f"Child snapshot should have depth=1, got {child_depth}"
-        assert parent_id == root_id, \
-            f"Child snapshot {child_url} should have parent_snapshot_id={root_id}, got {parent_id}"
+        assert parent_id == root_id, f"Child snapshot {child_url} should have parent_snapshot_id={root_id}, got {parent_id}"


 def test_recursive_crawl_respects_depth_limit(tmp_path, process, disable_extractors_dict, recursive_test_site):
@@ -277,45 +280,45 @@ def test_recursive_crawl_respects_depth_limit(tmp_path, process, disable_extract
    env["URL_ALLOWLIST"] = r"127\.0\.0\.1[:/].*"

    stdout, stderr = run_add_until(
-        ['archivebox', 'add', '--depth=1', '--plugins=wget,parse_html_urls', recursive_test_site['root_url']],
+        ["archivebox", "add", "--depth=1", "--plugins=wget,parse_html_urls", recursive_test_site["root_url"]],
        env=env,
        timeout=120,
        condition=lambda c: (
            c.execute("SELECT COUNT(*) FROM core_snapshot WHERE depth = 0").fetchone()[0] >= 1
-            and c.execute("SELECT COUNT(*) FROM core_snapshot WHERE depth = 1").fetchone()[0] >= len(recursive_test_site['child_urls'])
+            and c.execute("SELECT COUNT(*) FROM core_snapshot WHERE depth = 1").fetchone()[0] >= len(recursive_test_site["child_urls"])
            and c.execute(
                "SELECT COUNT(DISTINCT ar.snapshot_id) "
                "FROM core_archiveresult ar "
                "JOIN core_snapshot s ON s.id = ar.snapshot_id "
                "WHERE s.depth = 1 "
                "AND ar.plugin LIKE 'parse_%_urls' "
-                "AND ar.status IN ('started', 'succeeded', 'failed')"
-            ).fetchone()[0] >= len(recursive_test_site['child_urls'])
+                "AND ar.status IN ('started', 'succeeded', 'failed')",
+            ).fetchone()[0]
+            >= len(recursive_test_site["child_urls"])
        ),
    )

-    conn = sqlite3.connect('index.sqlite3')
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()

    max_depth_found = c.execute(
-        "SELECT MAX(depth) FROM core_snapshot"
+        "SELECT MAX(depth) FROM core_snapshot",
    ).fetchone()[0]
    depth_counts = c.execute(
-        "SELECT depth, COUNT(*) FROM core_snapshot GROUP BY depth ORDER BY depth"
+        "SELECT depth, COUNT(*) FROM core_snapshot GROUP BY depth ORDER BY depth",
    ).fetchall()

    conn.close()

    assert max_depth_found is not None, "Should have at least one snapshot"
-    assert max_depth_found <= 1, \
-        f"Max depth should not exceed 1, got {max_depth_found}. Depth distribution: {depth_counts}"
+    assert max_depth_found <= 1, f"Max depth should not exceed 1, got {max_depth_found}. Depth distribution: {depth_counts}"


 def test_crawl_snapshot_has_parent_snapshot_field(tmp_path, process, disable_extractors_dict):
    """Test that Snapshot model has parent_snapshot field."""
    os.chdir(tmp_path)

-    conn = sqlite3.connect('index.sqlite3')
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()

    # Check schema for parent_snapshot_id column
@@ -324,15 +327,14 @@ def test_crawl_snapshot_has_parent_snapshot_field(tmp_path, process, disable_ext

    column_names = [col[1] for col in schema]

-    assert 'parent_snapshot_id' in column_names, \
-        f"Snapshot table should have parent_snapshot_id column. Columns: {column_names}"
+    assert "parent_snapshot_id" in column_names, f"Snapshot table should have parent_snapshot_id column. Columns: {column_names}"


 def test_snapshot_depth_field_exists(tmp_path, process, disable_extractors_dict):
    """Test that Snapshot model has depth field."""
    os.chdir(tmp_path)

-    conn = sqlite3.connect('index.sqlite3')
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()

    # Check schema for depth column
@@ -341,8 +343,7 @@ def test_snapshot_depth_field_exists(tmp_path, process, disable_extractors_dict)

    column_names = [col[1] for col in schema]

-    assert 'depth' in column_names, \
-        f"Snapshot table should have depth column. Columns: {column_names}"
+    assert "depth" in column_names, f"Snapshot table should have depth column. Columns: {column_names}"


 def test_root_snapshot_has_depth_zero(tmp_path, process, disable_extractors_dict, recursive_test_site):
@@ -353,21 +354,24 @@ def test_root_snapshot_has_depth_zero(tmp_path, process, disable_extractors_dict
    env["URL_ALLOWLIST"] = r"127\.0\.0\.1[:/].*"

    stdout, stderr = run_add_until(
-        ['archivebox', 'add', '--depth=1', '--plugins=wget,parse_html_urls', recursive_test_site['root_url']],
+        ["archivebox", "add", "--depth=1", "--plugins=wget,parse_html_urls", recursive_test_site["root_url"]],
        env=env,
        timeout=120,
-        condition=lambda c: c.execute(
-            "SELECT COUNT(*) FROM core_snapshot WHERE url = ?",
-            (recursive_test_site['root_url'],),
-        ).fetchone()[0] >= 1,
+        condition=lambda c: (
+            c.execute(
+                "SELECT COUNT(*) FROM core_snapshot WHERE url = ?",
+                (recursive_test_site["root_url"],),
+            ).fetchone()[0]
+            >= 1
+        ),
    )

-    conn = sqlite3.connect('index.sqlite3')
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()

    snapshot = c.execute(
        "SELECT id, depth FROM core_snapshot WHERE url = ? ORDER BY created_at LIMIT 1",
-        (recursive_test_site['root_url'],)
+        (recursive_test_site["root_url"],),
    ).fetchone()

    conn.close()
@@ -381,42 +385,47 @@ def test_archiveresult_worker_queue_filters_by_foreground_extractors(tmp_path, p
    os.chdir(tmp_path)

    env = os.environ.copy()
-    env.update({
-        "SAVE_WGET": "true",
-        "SAVE_SINGLEFILE": "false",
-        "SAVE_PDF": "false",
-        "SAVE_SCREENSHOT": "false",
-        "SAVE_FAVICON": "true",
-    })
-
-    stdout, stderr = run_add_until(
-        ['archivebox', 'add', '--plugins=favicon,wget,parse_html_urls', recursive_test_site['root_url']],
-        env=env,
-        timeout=120,
-        condition=lambda c: c.execute(
-            "SELECT COUNT(*) FROM core_archiveresult WHERE plugin LIKE 'parse_%_urls' AND status IN ('started', 'succeeded', 'failed')"
-        ).fetchone()[0] > 0,
+    env.update(
+        {
+            "SAVE_WGET": "true",
+            "SAVE_SINGLEFILE": "false",
+            "SAVE_PDF": "false",
+            "SAVE_SCREENSHOT": "false",
+            "SAVE_FAVICON": "true",
+        },
    )

-    conn = sqlite3.connect('index.sqlite3')
+    stdout, stderr = run_add_until(
+        ["archivebox", "add", "--plugins=favicon,wget,parse_html_urls", recursive_test_site["root_url"]],
+        env=env,
+        timeout=120,
+        condition=lambda c: (
+            c.execute(
+                "SELECT COUNT(*) FROM core_archiveresult WHERE plugin LIKE 'parse_%_urls' AND status IN ('started', 'succeeded', 'failed')",
+            ).fetchone()[0]
+            > 0
+        ),
+    )
+
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()

    bg_results = c.execute(
-        "SELECT plugin, status FROM core_archiveresult WHERE plugin IN ('favicon', 'consolelog', 'ssl', 'responses', 'redirects', 'staticfile') AND status IN ('started', 'succeeded', 'failed')"
+        "SELECT plugin, status FROM core_archiveresult WHERE plugin IN ('favicon', 'consolelog', 'ssl', 'responses', 'redirects', 'staticfile') AND status IN ('started', 'succeeded', 'failed')",
    ).fetchall()
    parser_status = c.execute(
-        "SELECT plugin, status FROM core_archiveresult WHERE plugin LIKE 'parse_%_urls'"
+        "SELECT plugin, status FROM core_archiveresult WHERE plugin LIKE 'parse_%_urls'",
    ).fetchall()

    conn.close()

    if len(bg_results) > 0:
        parser_statuses = [status for _, status in parser_status]
-        non_queued = [s for s in parser_statuses if s != 'queued']
-        assert len(non_queued) > 0 or len(parser_status) == 0, \
-            f"With {len(bg_results)} background hooks started, parser extractors should still run. " \
-            f"Got statuses: {parser_statuses}"
+        non_queued = [s for s in parser_statuses if s != "queued"]
+        assert len(non_queued) > 0 or len(parser_status) == 0, (
+            f"With {len(bg_results)} background hooks started, parser extractors should still run. Got statuses: {parser_statuses}"
+        )


-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
--- a/archivebox/tests/test_runner.py
+++ b/archivebox/tests/test_runner.py
@@ -1,5 +1,8 @@
 import asyncio
+import json
 import subprocess
+import sys
+from pathlib import Path
 from types import SimpleNamespace

 import pytest
@@ -12,6 +15,15 @@ pytestmark = pytest.mark.django_db
 class _DummyBus:
    def __init__(self, name: str):
        self.name = name
+        self.registrations = []
+
+    def on(self, event_pattern, handler):
+        registration = SimpleNamespace(event_pattern=event_pattern, handler=handler)
+        self.registrations.append(registration)
+        return registration
+
+    def off(self, event_pattern, registration):
+        self.registrations = [existing for existing in self.registrations if existing is not registration]

    async def stop(self):
        return None
@@ -41,16 +53,16 @@ def test_run_snapshot_uses_isolated_bus_per_snapshot(monkeypatch):
    from archivebox.services import runner as runner_module

    crawl = Crawl.objects.create(
-        urls='https://blog.sweeting.me\nhttps://sweeting.me',
+        urls="https://blog.sweeting.me\nhttps://sweeting.me",
        created_by_id=get_or_create_system_user_pk(),
    )
    snapshot_a = Snapshot.objects.create(
-        url='https://blog.sweeting.me',
+        url="https://blog.sweeting.me",
        crawl=crawl,
        status=Snapshot.StatusChoices.QUEUED,
    )
    snapshot_b = Snapshot.objects.create(
-        url='https://sweeting.me',
+        url="https://sweeting.me",
        crawl=crawl,
        status=Snapshot.StatusChoices.QUEUED,
    )
@@ -62,64 +74,66 @@ def test_run_snapshot_uses_isolated_bus_per_snapshot(monkeypatch):
        created_buses.append(bus)
        return bus

-    monkeypatch.setattr(runner_module, 'create_bus', fake_create_bus)
-    monkeypatch.setattr(runner_module, 'discover_plugins', lambda: {})
-    monkeypatch.setattr(runner_module, 'ProcessService', _DummyService)
-    monkeypatch.setattr(runner_module, 'MachineService', _DummyService)
-    monkeypatch.setattr(runner_module, 'BinaryService', _DummyService)
-    monkeypatch.setattr(runner_module, 'TagService', _DummyService)
-    monkeypatch.setattr(runner_module, 'CrawlService', _DummyService)
-    monkeypatch.setattr(runner_module, 'SnapshotService', _DummyService)
-    monkeypatch.setattr(runner_module, 'ArchiveResultService', _DummyService)
-    monkeypatch.setattr(runner_module, 'setup_abx_services', lambda *args, **kwargs: _DummyAbxServices())
+    monkeypatch.setattr(runner_module, "create_bus", fake_create_bus)
+    monkeypatch.setattr(runner_module, "discover_plugins", lambda: {})
+    monkeypatch.setattr(runner_module, "ProcessService", _DummyService)
+    monkeypatch.setattr(runner_module, "MachineService", _DummyService)
+    monkeypatch.setattr(runner_module, "BinaryService", _DummyService)
+    monkeypatch.setattr(runner_module, "TagService", _DummyService)
+    monkeypatch.setattr(runner_module, "CrawlService", _DummyService)
+    monkeypatch.setattr(runner_module, "SnapshotService", _DummyService)
+    monkeypatch.setattr(runner_module, "ArchiveResultService", _DummyService)
+    monkeypatch.setattr(runner_module, "setup_abx_services", lambda *args, **kwargs: _DummyAbxServices())

    download_calls = []

    async def fake_download(*, url, bus, config_overrides, snapshot, **kwargs):
        download_calls.append(
            {
-                'url': url,
-                'bus': bus,
-                'snapshot_id': config_overrides['SNAPSHOT_ID'],
-                'source_url': config_overrides['SOURCE_URL'],
-                'abx_snapshot_id': snapshot.id,
-            }
+                "url": url,
+                "bus": bus,
+                "snapshot_id": config_overrides["SNAPSHOT_ID"],
+                "source_url": config_overrides["SOURCE_URL"],
+                "abx_snapshot_id": snapshot.id,
+            },
        )
        await asyncio.sleep(0)
        return []

-    monkeypatch.setattr(runner_module, 'download', fake_download)
+    monkeypatch.setattr(runner_module, "download", fake_download)

    crawl_runner = runner_module.CrawlRunner(crawl)
    snapshot_data = {
        str(snapshot_a.id): {
-            'id': str(snapshot_a.id),
-            'url': snapshot_a.url,
-            'title': snapshot_a.title,
-            'timestamp': snapshot_a.timestamp,
-            'bookmarked_at': snapshot_a.bookmarked_at.isoformat() if snapshot_a.bookmarked_at else "",
-            'created_at': snapshot_a.created_at.isoformat() if snapshot_a.created_at else "",
-            'tags': snapshot_a.tags_str(),
-            'depth': snapshot_a.depth,
-            'parent_snapshot_id': str(snapshot_a.parent_snapshot_id) if snapshot_a.parent_snapshot_id else None,
-            'output_dir': str(snapshot_a.output_dir),
-            'config': crawl_runner._snapshot_config(snapshot_a),
+            "id": str(snapshot_a.id),
+            "url": snapshot_a.url,
+            "status": snapshot_a.status,
+            "title": snapshot_a.title,
+            "timestamp": snapshot_a.timestamp,
+            "bookmarked_at": snapshot_a.bookmarked_at.isoformat() if snapshot_a.bookmarked_at else "",
+            "created_at": snapshot_a.created_at.isoformat() if snapshot_a.created_at else "",
+            "tags": snapshot_a.tags_str(),
+            "depth": snapshot_a.depth,
+            "parent_snapshot_id": str(snapshot_a.parent_snapshot_id) if snapshot_a.parent_snapshot_id else None,
+            "output_dir": str(snapshot_a.output_dir),
+            "config": crawl_runner._snapshot_config(snapshot_a),
        },
        str(snapshot_b.id): {
-            'id': str(snapshot_b.id),
-            'url': snapshot_b.url,
-            'title': snapshot_b.title,
-            'timestamp': snapshot_b.timestamp,
-            'bookmarked_at': snapshot_b.bookmarked_at.isoformat() if snapshot_b.bookmarked_at else "",
-            'created_at': snapshot_b.created_at.isoformat() if snapshot_b.created_at else "",
-            'tags': snapshot_b.tags_str(),
-            'depth': snapshot_b.depth,
-            'parent_snapshot_id': str(snapshot_b.parent_snapshot_id) if snapshot_b.parent_snapshot_id else None,
-            'output_dir': str(snapshot_b.output_dir),
-            'config': crawl_runner._snapshot_config(snapshot_b),
+            "id": str(snapshot_b.id),
+            "url": snapshot_b.url,
+            "status": snapshot_b.status,
+            "title": snapshot_b.title,
+            "timestamp": snapshot_b.timestamp,
+            "bookmarked_at": snapshot_b.bookmarked_at.isoformat() if snapshot_b.bookmarked_at else "",
+            "created_at": snapshot_b.created_at.isoformat() if snapshot_b.created_at else "",
+            "tags": snapshot_b.tags_str(),
+            "depth": snapshot_b.depth,
+            "parent_snapshot_id": str(snapshot_b.parent_snapshot_id) if snapshot_b.parent_snapshot_id else None,
+            "output_dir": str(snapshot_b.output_dir),
+            "config": crawl_runner._snapshot_config(snapshot_b),
        },
    }
-    monkeypatch.setattr(crawl_runner, '_load_snapshot_run_data', lambda snapshot_id: snapshot_data[snapshot_id])
+    monkeypatch.setattr(crawl_runner, "_load_snapshot_run_data", lambda snapshot_id: snapshot_data[snapshot_id])

    async def run_both():
        await asyncio.gather(
@@ -130,9 +144,9 @@ def test_run_snapshot_uses_isolated_bus_per_snapshot(monkeypatch):
    asyncio.run(run_both())

    assert len(download_calls) == 2
-    assert {call['snapshot_id'] for call in download_calls} == {str(snapshot_a.id), str(snapshot_b.id)}
-    assert {call['source_url'] for call in download_calls} == {snapshot_a.url, snapshot_b.url}
-    assert len({id(call['bus']) for call in download_calls}) == 2
+    assert {call["snapshot_id"] for call in download_calls} == {str(snapshot_a.id), str(snapshot_b.id)}
+    assert {call["source_url"] for call in download_calls} == {snapshot_a.url, snapshot_b.url}
+    assert len({id(call["bus"]) for call in download_calls}) == 2
    assert len(created_buses) == 3  # 1 crawl bus + 2 isolated snapshot buses


@@ -146,38 +160,40 @@ def test_ensure_background_runner_starts_when_none_running(monkeypatch):
        def __init__(self, args, **kwargs):
            popen_calls.append((args, kwargs))

-    monkeypatch.setattr(machine_models.Process, 'cleanup_stale_running', classmethod(lambda cls, machine=None: 0))
-    monkeypatch.setattr(machine_models.Machine, 'current', classmethod(lambda cls: SimpleNamespace(id='machine-1')))
+    monkeypatch.setattr(machine_models.Process, "cleanup_stale_running", classmethod(lambda cls, machine=None: 0))
+    monkeypatch.setattr(machine_models.Process, "cleanup_orphaned_workers", classmethod(lambda cls: 0))
+    monkeypatch.setattr(machine_models.Machine, "current", classmethod(lambda cls: SimpleNamespace(id="machine-1")))
    monkeypatch.setattr(
        machine_models.Process.objects,
-        'filter',
+        "filter",
        lambda **kwargs: SimpleNamespace(exists=lambda: False),
    )
-    monkeypatch.setattr(runner_module.subprocess, 'Popen', DummyPopen)
+    monkeypatch.setattr(runner_module.subprocess, "Popen", DummyPopen)

    started = runner_module.ensure_background_runner(allow_under_pytest=True)

    assert started is True
    assert len(popen_calls) == 1
-    assert popen_calls[0][0] == [runner_module.sys.executable, '-m', 'archivebox', 'run', '--daemon']
-    assert popen_calls[0][1]['stdin'] is subprocess.DEVNULL
+    assert popen_calls[0][0] == [runner_module.sys.executable, "-m", "archivebox", "run", "--daemon"]
+    assert popen_calls[0][1]["stdin"] is subprocess.DEVNULL


 def test_ensure_background_runner_skips_when_orchestrator_running(monkeypatch):
    import archivebox.machine.models as machine_models
    from archivebox.services import runner as runner_module

-    monkeypatch.setattr(machine_models.Process, 'cleanup_stale_running', classmethod(lambda cls, machine=None: 0))
-    monkeypatch.setattr(machine_models.Machine, 'current', classmethod(lambda cls: SimpleNamespace(id='machine-1')))
+    monkeypatch.setattr(machine_models.Process, "cleanup_stale_running", classmethod(lambda cls, machine=None: 0))
+    monkeypatch.setattr(machine_models.Process, "cleanup_orphaned_workers", classmethod(lambda cls: 0))
+    monkeypatch.setattr(machine_models.Machine, "current", classmethod(lambda cls: SimpleNamespace(id="machine-1")))
    monkeypatch.setattr(
        machine_models.Process.objects,
-        'filter',
+        "filter",
        lambda **kwargs: SimpleNamespace(exists=lambda: True),
    )
    monkeypatch.setattr(
        runner_module.subprocess,
-        'Popen',
-        lambda *args, **kwargs: (_ for _ in ()).throw(AssertionError('runner should not be spawned')),
+        "Popen",
+        lambda *args, **kwargs: (_ for _ in ()).throw(AssertionError("runner should not be spawned")),
    )

    started = runner_module.ensure_background_runner(allow_under_pytest=True)
@@ -191,20 +207,20 @@ def test_runner_prepare_refreshes_network_interface_and_attaches_current_process
    from archivebox.services import runner as runner_module

    crawl = Crawl.objects.create(
-        urls='https://example.com',
+        urls="https://example.com",
        created_by_id=get_or_create_system_user_pk(),
    )

    class _Iface:
-        id = 'iface-1'
-        machine = SimpleNamespace(id='machine-1')
-        machine_id = 'machine-1'
+        id = "iface-1"
+        machine = SimpleNamespace(id="machine-1")
+        machine_id = "machine-1"

    saved_updates = []

    class _Proc:
        iface_id = None
-        machine_id = 'machine-1'
+        machine_id = "machine-1"
        iface = None
        machine = None

@@ -213,23 +229,23 @@ def test_runner_prepare_refreshes_network_interface_and_attaches_current_process

    proc = _Proc()

-    monkeypatch.setattr(runner_module, 'discover_plugins', lambda: {})
-    monkeypatch.setattr(runner_module, 'create_bus', lambda **kwargs: _DummyBus(kwargs['name']))
-    monkeypatch.setattr(runner_module, 'ProcessService', _DummyService)
-    monkeypatch.setattr(runner_module, 'MachineService', _DummyService)
-    monkeypatch.setattr(runner_module, 'BinaryService', _DummyService)
-    monkeypatch.setattr(runner_module, 'TagService', _DummyService)
-    monkeypatch.setattr(runner_module, 'CrawlService', _DummyService)
-    monkeypatch.setattr(runner_module, 'SnapshotService', _DummyService)
-    monkeypatch.setattr(runner_module, 'ArchiveResultService', _DummyService)
+    monkeypatch.setattr(runner_module, "discover_plugins", lambda: {})
+    monkeypatch.setattr(runner_module, "create_bus", lambda **kwargs: _DummyBus(kwargs["name"]))
+    monkeypatch.setattr(runner_module, "ProcessService", _DummyService)
+    monkeypatch.setattr(runner_module, "MachineService", _DummyService)
+    monkeypatch.setattr(runner_module, "BinaryService", _DummyService)
+    monkeypatch.setattr(runner_module, "TagService", _DummyService)
+    monkeypatch.setattr(runner_module, "CrawlService", _DummyService)
+    monkeypatch.setattr(runner_module, "SnapshotService", _DummyService)
+    monkeypatch.setattr(runner_module, "ArchiveResultService", _DummyService)

    from archivebox.machine.models import NetworkInterface, Process
    from archivebox.config import configset as configset_module

    refresh_calls = []
-    monkeypatch.setattr(NetworkInterface, 'current', classmethod(lambda cls, refresh=False: refresh_calls.append(refresh) or _Iface()))
-    monkeypatch.setattr(Process, 'current', classmethod(lambda cls: proc))
-    monkeypatch.setattr(configset_module, 'get_config', lambda **kwargs: {})
+    monkeypatch.setattr(NetworkInterface, "current", classmethod(lambda cls, refresh=False: refresh_calls.append(refresh) or _Iface()))
+    monkeypatch.setattr(Process, "current", classmethod(lambda cls: proc))
+    monkeypatch.setattr(configset_module, "get_config", lambda **kwargs: {})

    crawl_runner = runner_module.CrawlRunner(crawl)
    crawl_runner._prepare()
@@ -237,7 +253,182 @@ def test_runner_prepare_refreshes_network_interface_and_attaches_current_process
    assert refresh_calls == [True]
    assert proc.iface is not None
    assert proc.machine == proc.iface.machine
-    assert saved_updates == [('iface', 'machine', 'modified_at')]
+    assert saved_updates == [("iface", "machine", "modified_at")]
+
+
+def test_installed_binary_config_overrides_include_valid_installed_binaries(monkeypatch):
+    from archivebox.machine.models import Binary, Machine
+    from archivebox.services import runner as runner_module
+    from abx_dl.models import Plugin
+
+    machine = Machine.objects.create(
+        guid="test-guid-runner-overrides",
+        hostname="runner-host",
+        hw_in_docker=False,
+        hw_in_vm=False,
+        hw_manufacturer="Test",
+        hw_product="Test Product",
+        hw_uuid="test-hw-runner-overrides",
+        os_arch="arm64",
+        os_family="darwin",
+        os_platform="macOS",
+        os_release="14.0",
+        os_kernel="Darwin",
+        stats={},
+        config={},
+    )
+    mercury_binary = Binary.objects.create(
+        machine=machine,
+        name="postlight-parser",
+        abspath=sys.executable,
+        version="2.0.0",
+        binprovider="pip",
+        binproviders="env,pip",
+        status=Binary.StatusChoices.INSTALLED,
+    )
+    wget_binary = Binary.objects.create(
+        machine=machine,
+        name="wget",
+        abspath="/tmp/not-an-executable",
+        version="1.0.0",
+        binprovider="env",
+        binproviders="env",
+        status=Binary.StatusChoices.INSTALLED,
+    )
+
+    monkeypatch.setattr(Machine, "current", classmethod(lambda cls: machine))
+    monkeypatch.setattr(Path, "is_file", lambda self: str(self) in {sys.executable, mercury_binary.abspath, wget_binary.abspath})
+    monkeypatch.setattr(
+        runner_module.os,
+        "access",
+        lambda path, mode: str(path) == sys.executable,
+    )
+
+    overrides = runner_module._installed_binary_config_overrides(
+        {
+            "mercury": Plugin(
+                name="mercury",
+                path=Path("."),
+                hooks=[],
+                config_schema={"MERCURY_BINARY": {"type": "string", "default": "postlight-parser"}},
+            ),
+        },
+    )
+
+    assert overrides["MERCURY_BINARY"] == sys.executable
+    assert overrides["POSTLIGHT_PARSER_BINARY"] == sys.executable
+    assert "WGET_BINARY" not in overrides
+
+
+def test_run_snapshot_skips_descendant_when_max_size_already_reached(monkeypatch):
+    import asgiref.sync
+
+    from archivebox.base_models.models import get_or_create_system_user_pk
+    from archivebox.crawls.models import Crawl
+    from archivebox.services import runner as runner_module
+
+    crawl = Crawl.objects.create(
+        urls="https://example.com",
+        created_by_id=get_or_create_system_user_pk(),
+        max_size=16,
+    )
+
+    monkeypatch.setattr(runner_module, "discover_plugins", lambda: {})
+    monkeypatch.setattr(runner_module, "create_bus", lambda **kwargs: _DummyBus(kwargs["name"]))
+    monkeypatch.setattr(runner_module, "ProcessService", _DummyService)
+    monkeypatch.setattr(runner_module, "MachineService", _DummyService)
+    monkeypatch.setattr(runner_module, "BinaryService", _DummyService)
+    monkeypatch.setattr(runner_module, "TagService", _DummyService)
+    monkeypatch.setattr(runner_module, "CrawlService", _DummyService)
+    monkeypatch.setattr(runner_module, "SnapshotService", _DummyService)
+    monkeypatch.setattr(runner_module, "ArchiveResultService", _DummyService)
+    monkeypatch.setattr(runner_module, "_limit_stop_reason", lambda config: "max_size")
+    monkeypatch.setattr(
+        asgiref.sync,
+        "sync_to_async",
+        lambda func, thread_sensitive=True: lambda *args, **kwargs: _call_sync(func, *args, **kwargs),
+    )
+    monkeypatch.setattr(
+        runner_module,
+        "download",
+        lambda *args, **kwargs: (_ for _ in ()).throw(AssertionError("snapshot download should have been skipped")),
+    )
+
+    crawl_runner = runner_module.CrawlRunner(crawl)
+    cancelled: list[str] = []
+    crawl_runner._load_snapshot_run_data = lambda snapshot_id: {
+        "id": snapshot_id,
+        "url": "https://example.com/child",
+        "title": "",
+        "timestamp": "",
+        "bookmarked_at": "",
+        "created_at": "",
+        "tags": "",
+        "depth": 1,
+        "status": "queued",
+        "parent_snapshot_id": None,
+        "output_dir": "/tmp/child",
+        "config": {"CRAWL_DIR": "/tmp/crawl", "MAX_SIZE": 16},
+    }
+    crawl_runner._cancel_snapshot_due_to_limit = lambda snapshot_id: cancelled.append(snapshot_id)
+
+    asyncio.run(crawl_runner._run_snapshot("child-1"))
+
+    assert cancelled == ["child-1"]
+
+
+def test_seal_snapshot_cancels_queued_descendants_after_max_size():
+    from archivebox.base_models.models import get_or_create_system_user_pk
+    from archivebox.crawls.models import Crawl
+    from archivebox.core.models import Snapshot
+    from archivebox.services.snapshot_service import SnapshotService
+    from abx_dl.orchestrator import create_bus
+
+    crawl = Crawl.objects.create(
+        urls="https://example.com",
+        created_by_id=get_or_create_system_user_pk(),
+        max_size=16,
+    )
+    root = Snapshot.objects.create(
+        url="https://example.com",
+        crawl=crawl,
+        status=Snapshot.StatusChoices.STARTED,
+    )
+    child = Snapshot.objects.create(
+        url="https://example.com/child",
+        crawl=crawl,
+        depth=1,
+        parent_snapshot_id=root.id,
+        status=Snapshot.StatusChoices.QUEUED,
+    )
+
+    state_dir = Path(crawl.output_dir) / ".abx-dl"
+    state_dir.mkdir(parents=True, exist_ok=True)
+    (state_dir / "limits.json").write_text(
+        json.dumps(
+            {
+                "admitted_snapshot_ids": [str(root.id), str(child.id)],
+                "counted_process_ids": ["proc-1"],
+                "total_size": 32,
+                "stop_reason": "max_size",
+            },
+        ),
+        encoding="utf-8",
+    )
+
+    bus = create_bus(name="test_snapshot_limit_cancel")
+    service = SnapshotService(bus, crawl_id=str(crawl.id), schedule_snapshot=lambda snapshot_id: None)
+    try:
+        sealed_id = service._seal_snapshot(str(root.id))
+    finally:
+        asyncio.run(bus.stop())
+
+    root.refresh_from_db()
+    child.refresh_from_db()
+    assert sealed_id == str(root.id)
+    assert root.status == Snapshot.StatusChoices.SEALED
+    assert child.status == Snapshot.StatusChoices.SEALED
+    assert child.retry_at is None


 def test_create_crawl_api_queues_crawl_without_spawning_runner(monkeypatch):
@@ -245,28 +436,28 @@ def test_create_crawl_api_queues_crawl_without_spawning_runner(monkeypatch):
    from archivebox.api.v1_crawls import CrawlCreateSchema, create_crawl

    user = get_user_model().objects.create_superuser(
-        username='runner-api-admin',
-        email='runner-api-admin@example.com',
-        password='testpassword',
+        username="runner-api-admin",
+        email="runner-api-admin@example.com",
+        password="testpassword",
    )
-    request = RequestFactory().post('/api/v1/crawls')
+    request = RequestFactory().post("/api/v1/crawls")
    request.user = user

    crawl = create_crawl(
        request,
        CrawlCreateSchema(
-            urls=['https://example.com'],
+            urls=["https://example.com"],
            max_depth=0,
            tags=[],
-            tags_str='',
-            label='',
-            notes='',
+            tags_str="",
+            label="",
+            notes="",
            config={},
        ),
    )

    assert str(crawl.id)
-    assert crawl.status == 'queued'
+    assert crawl.status == "queued"
    assert crawl.retry_at is not None


@@ -278,36 +469,36 @@ def test_crawl_runner_does_not_seal_unfinished_crawl(monkeypatch):
    from archivebox.services import runner as runner_module

    crawl = Crawl.objects.create(
-        urls='https://example.com',
+        urls="https://example.com",
        created_by_id=get_or_create_system_user_pk(),
        status=Crawl.StatusChoices.STARTED,
    )
    snapshot = Snapshot.objects.create(
-        url='https://example.com',
+        url="https://example.com",
        crawl=crawl,
        status=Snapshot.StatusChoices.STARTED,
    )

-    monkeypatch.setattr(runner_module, '_attach_bus_trace', lambda bus: None)
-    monkeypatch.setattr(runner_module, '_stop_bus_trace', lambda bus: asyncio.sleep(0))
-    monkeypatch.setattr(runner_module, 'setup_abx_services', lambda *args, **kwargs: _DummyAbxServices())
+    monkeypatch.setattr(runner_module, "_attach_bus_trace", lambda bus: None)
+    monkeypatch.setattr(runner_module, "_stop_bus_trace", lambda bus: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module, "setup_abx_services", lambda *args, **kwargs: _DummyAbxServices())
    monkeypatch.setenv("DJANGO_ALLOW_ASYNC_UNSAFE", "true")
    monkeypatch.setattr(
        asgiref.sync,
-        'sync_to_async',
-        lambda func, thread_sensitive=True: (lambda *args, **kwargs: _call_sync(func, *args, **kwargs)),
+        "sync_to_async",
+        lambda func, thread_sensitive=True: lambda *args, **kwargs: _call_sync(func, *args, **kwargs),
    )
-    monkeypatch.setattr(Crawl.objects, 'get', lambda id: crawl)
-    monkeypatch.setattr(crawl, 'is_finished', lambda: False)
-    monkeypatch.setattr(crawl, 'save', lambda *args, **kwargs: None)
-    monkeypatch.setattr(runner_module.CrawlRunner, '_prepare', lambda self: None)
-    monkeypatch.setattr(runner_module.CrawlRunner, '_create_live_ui', lambda self: None)
-    monkeypatch.setattr(runner_module.CrawlRunner, '_initial_snapshot_ids', lambda self: [str(snapshot.id)])
-    monkeypatch.setattr(runner_module.CrawlRunner, '_run_crawl_setup', lambda self, snapshot_id: asyncio.sleep(0))
-    monkeypatch.setattr(runner_module.CrawlRunner, 'enqueue_snapshot', lambda self, snapshot_id: asyncio.sleep(0))
-    monkeypatch.setattr(runner_module.CrawlRunner, '_wait_for_snapshot_tasks', lambda self: asyncio.sleep(0))
-    monkeypatch.setattr(runner_module.CrawlRunner, '_run_crawl_cleanup', lambda self, snapshot_id: asyncio.sleep(0))
-    monkeypatch.setattr(runner_module.CrawlRunner, '_cleanup_persona', lambda self: None)
+    monkeypatch.setattr(Crawl.objects, "get", lambda id: crawl)
+    monkeypatch.setattr(crawl, "is_finished", lambda: False)
+    monkeypatch.setattr(crawl, "save", lambda *args, **kwargs: None)
+    monkeypatch.setattr(runner_module.CrawlRunner, "_prepare", lambda self: None)
+    monkeypatch.setattr(runner_module.CrawlRunner, "_create_live_ui", lambda self: None)
+    monkeypatch.setattr(runner_module.CrawlRunner, "_initial_snapshot_ids", lambda self: [str(snapshot.id)])
+    monkeypatch.setattr(runner_module.CrawlRunner, "_run_crawl_setup", lambda self, snapshot_id: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, "enqueue_snapshot", lambda self, snapshot_id: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, "_wait_for_snapshot_tasks", lambda self: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, "_run_crawl_cleanup", lambda self, snapshot_id: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, "_cleanup_persona", lambda self: None)

    asyncio.run(runner_module.CrawlRunner(crawl, snapshot_ids=[str(snapshot.id)]).run())

@@ -323,39 +514,39 @@ def test_crawl_runner_finalizes_with_sync_to_async_for_is_finished(monkeypatch):
    from archivebox.services import runner as runner_module

    crawl = Crawl.objects.create(
-        urls='https://example.com',
+        urls="https://example.com",
        created_by_id=get_or_create_system_user_pk(),
        status=Crawl.StatusChoices.STARTED,
    )
    snapshot = Snapshot.objects.create(
-        url='https://example.com',
+        url="https://example.com",
        crawl=crawl,
        status=Snapshot.StatusChoices.STARTED,
    )

-    monkeypatch.setattr(runner_module, 'create_bus', lambda *args, **kwargs: _DummyBus('runner'))
-    monkeypatch.setattr(runner_module, 'discover_plugins', lambda: {})
-    monkeypatch.setattr(runner_module, 'ProcessService', _DummyService)
-    monkeypatch.setattr(runner_module, 'MachineService', _DummyService)
-    monkeypatch.setattr(runner_module, 'BinaryService', _DummyService)
-    monkeypatch.setattr(runner_module, 'TagService', _DummyService)
-    monkeypatch.setattr(runner_module, 'CrawlService', _DummyService)
-    monkeypatch.setattr(runner_module, 'SnapshotService', _DummyService)
-    monkeypatch.setattr(runner_module, 'ArchiveResultService', _DummyService)
-    monkeypatch.setattr(runner_module, '_attach_bus_trace', lambda bus: None)
-    monkeypatch.setattr(runner_module, '_stop_bus_trace', lambda bus: asyncio.sleep(0))
-    monkeypatch.setattr(runner_module, 'setup_abx_services', lambda *args, **kwargs: _DummyAbxServices())
-    monkeypatch.setattr(Crawl.objects, 'get', lambda id: crawl)
-    monkeypatch.setattr(crawl, 'save', lambda *args, **kwargs: None)
-    monkeypatch.setattr(crawl, 'cleanup', lambda: None)
-    monkeypatch.setattr(runner_module.CrawlRunner, '_prepare', lambda self: None)
-    monkeypatch.setattr(runner_module.CrawlRunner, '_create_live_ui', lambda self: None)
-    monkeypatch.setattr(runner_module.CrawlRunner, '_initial_snapshot_ids', lambda self: [str(snapshot.id)])
-    monkeypatch.setattr(runner_module.CrawlRunner, '_run_crawl_setup', lambda self, snapshot_id: asyncio.sleep(0))
-    monkeypatch.setattr(runner_module.CrawlRunner, 'enqueue_snapshot', lambda self, snapshot_id: asyncio.sleep(0))
-    monkeypatch.setattr(runner_module.CrawlRunner, '_wait_for_snapshot_tasks', lambda self: asyncio.sleep(0))
-    monkeypatch.setattr(runner_module.CrawlRunner, '_run_crawl_cleanup', lambda self, snapshot_id: asyncio.sleep(0))
-    monkeypatch.setattr(runner_module.CrawlRunner, '_cleanup_persona', lambda self: None)
+    monkeypatch.setattr(runner_module, "create_bus", lambda *args, **kwargs: _DummyBus("runner"))
+    monkeypatch.setattr(runner_module, "discover_plugins", lambda: {})
+    monkeypatch.setattr(runner_module, "ProcessService", _DummyService)
+    monkeypatch.setattr(runner_module, "MachineService", _DummyService)
+    monkeypatch.setattr(runner_module, "BinaryService", _DummyService)
+    monkeypatch.setattr(runner_module, "TagService", _DummyService)
+    monkeypatch.setattr(runner_module, "CrawlService", _DummyService)
+    monkeypatch.setattr(runner_module, "SnapshotService", _DummyService)
+    monkeypatch.setattr(runner_module, "ArchiveResultService", _DummyService)
+    monkeypatch.setattr(runner_module, "_attach_bus_trace", lambda bus: None)
+    monkeypatch.setattr(runner_module, "_stop_bus_trace", lambda bus: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module, "setup_abx_services", lambda *args, **kwargs: _DummyAbxServices())
+    monkeypatch.setattr(Crawl.objects, "get", lambda id: crawl)
+    monkeypatch.setattr(crawl, "save", lambda *args, **kwargs: None)
+    monkeypatch.setattr(crawl, "cleanup", lambda: None)
+    monkeypatch.setattr(runner_module.CrawlRunner, "_prepare", lambda self: None)
+    monkeypatch.setattr(runner_module.CrawlRunner, "_create_live_ui", lambda self: None)
+    monkeypatch.setattr(runner_module.CrawlRunner, "_initial_snapshot_ids", lambda self: [str(snapshot.id)])
+    monkeypatch.setattr(runner_module.CrawlRunner, "_run_crawl_setup", lambda self, snapshot_id: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, "enqueue_snapshot", lambda self, snapshot_id: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, "_wait_for_snapshot_tasks", lambda self: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, "_run_crawl_cleanup", lambda self, snapshot_id: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, "_cleanup_persona", lambda self: None)

    sync_to_async_wrapped: list[str] = []
    sync_to_async_active = False
@@ -363,28 +554,29 @@ def test_crawl_runner_finalizes_with_sync_to_async_for_is_finished(monkeypatch):
    def fake_sync_to_async(func, thread_sensitive=True):
        async def wrapper(*args, **kwargs):
            nonlocal sync_to_async_active
-            sync_to_async_wrapped.append(getattr(func, '__name__', repr(func)))
+            sync_to_async_wrapped.append(getattr(func, "__name__", repr(func)))
            previous = sync_to_async_active
            sync_to_async_active = True
            try:
                return func(*args, **kwargs)
            finally:
                sync_to_async_active = previous
+
        return wrapper

    def guarded_is_finished():
        assert sync_to_async_active is True
        return False

-    monkeypatch.setattr(asgiref.sync, 'sync_to_async', fake_sync_to_async)
-    monkeypatch.setattr(crawl, 'is_finished', guarded_is_finished)
+    monkeypatch.setattr(asgiref.sync, "sync_to_async", fake_sync_to_async)
+    monkeypatch.setattr(crawl, "is_finished", guarded_is_finished)

    asyncio.run(runner_module.CrawlRunner(crawl, snapshot_ids=[str(snapshot.id)]).run())

    crawl.refresh_from_db()
    assert crawl.status == Crawl.StatusChoices.STARTED
    assert crawl.retry_at is not None
-    assert 'guarded_is_finished' in sync_to_async_wrapped
+    assert "guarded_is_finished" in sync_to_async_wrapped


 def test_wait_for_snapshot_tasks_surfaces_already_failed_task():
@@ -393,16 +585,16 @@ def test_wait_for_snapshot_tasks_surfaces_already_failed_task():
    from archivebox.services import runner as runner_module

    crawl = Crawl.objects.create(
-        urls='https://example.com',
+        urls="https://example.com",
        created_by_id=get_or_create_system_user_pk(),
    )
    crawl_runner = runner_module.CrawlRunner(crawl)

    async def run_test():
        task = asyncio.get_running_loop().create_future()
-        task.set_exception(RuntimeError('snapshot failed'))
-        crawl_runner.snapshot_tasks['snap-1'] = task
-        with pytest.raises(RuntimeError, match='snapshot failed'):
+        task.set_exception(RuntimeError("snapshot failed"))
+        crawl_runner.snapshot_tasks["snap-1"] = task
+        with pytest.raises(RuntimeError, match="snapshot failed"):
            await crawl_runner._wait_for_snapshot_tasks()

    asyncio.run(run_test())
@@ -414,7 +606,7 @@ def test_wait_for_snapshot_tasks_returns_after_completed_tasks_are_pruned():
    from archivebox.services import runner as runner_module

    crawl = Crawl.objects.create(
-        urls='https://example.com',
+        urls="https://example.com",
        created_by_id=get_or_create_system_user_pk(),
    )
    crawl_runner = runner_module.CrawlRunner(crawl)
@@ -424,7 +616,7 @@ def test_wait_for_snapshot_tasks_returns_after_completed_tasks_are_pruned():

    async def run_test():
        task = asyncio.create_task(finish_snapshot())
-        crawl_runner.snapshot_tasks['snap-1'] = task
+        crawl_runner.snapshot_tasks["snap-1"] = task
        await asyncio.wait_for(crawl_runner._wait_for_snapshot_tasks(), timeout=0.5)
        assert crawl_runner.snapshot_tasks == {}

@@ -439,43 +631,47 @@ def test_crawl_runner_calls_crawl_cleanup_after_snapshot_phase(monkeypatch):
    from archivebox.services import runner as runner_module

    crawl = Crawl.objects.create(
-        urls='https://example.com',
+        urls="https://example.com",
        created_by_id=get_or_create_system_user_pk(),
        status=Crawl.StatusChoices.STARTED,
    )
    snapshot = Snapshot.objects.create(
-        url='https://example.com',
+        url="https://example.com",
        crawl=crawl,
        status=Snapshot.StatusChoices.STARTED,
    )

-    monkeypatch.setattr(runner_module, '_attach_bus_trace', lambda bus: None)
-    monkeypatch.setattr(runner_module, '_stop_bus_trace', lambda bus: asyncio.sleep(0))
-    monkeypatch.setattr(runner_module, 'setup_abx_services', lambda *args, **kwargs: _DummyAbxServices())
+    monkeypatch.setattr(runner_module, "_attach_bus_trace", lambda bus: None)
+    monkeypatch.setattr(runner_module, "_stop_bus_trace", lambda bus: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module, "setup_abx_services", lambda *args, **kwargs: _DummyAbxServices())
    monkeypatch.setenv("DJANGO_ALLOW_ASYNC_UNSAFE", "true")
    monkeypatch.setattr(
        asgiref.sync,
-        'sync_to_async',
-        lambda func, thread_sensitive=True: (lambda *args, **kwargs: _call_sync(func, *args, **kwargs)),
+        "sync_to_async",
+        lambda func, thread_sensitive=True: lambda *args, **kwargs: _call_sync(func, *args, **kwargs),
    )
-    monkeypatch.setattr(Crawl.objects, 'get', lambda id: crawl)
-    monkeypatch.setattr(crawl, 'is_finished', lambda: False)
-    monkeypatch.setattr(crawl, 'save', lambda *args, **kwargs: None)
-    monkeypatch.setattr(runner_module.CrawlRunner, '_prepare', lambda self: None)
-    monkeypatch.setattr(runner_module.CrawlRunner, '_create_live_ui', lambda self: None)
-    monkeypatch.setattr(runner_module.CrawlRunner, '_initial_snapshot_ids', lambda self: [str(snapshot.id)])
-    monkeypatch.setattr(runner_module.CrawlRunner, '_run_crawl_setup', lambda self, snapshot_id: asyncio.sleep(0))
-    monkeypatch.setattr(runner_module.CrawlRunner, 'enqueue_snapshot', lambda self, snapshot_id: asyncio.sleep(0))
-    monkeypatch.setattr(runner_module.CrawlRunner, '_wait_for_snapshot_tasks', lambda self: asyncio.sleep(0))
-    monkeypatch.setattr(runner_module.CrawlRunner, '_cleanup_persona', lambda self: None)
+    monkeypatch.setattr(Crawl.objects, "get", lambda id: crawl)
+    monkeypatch.setattr(crawl, "is_finished", lambda: False)
+    monkeypatch.setattr(crawl, "save", lambda *args, **kwargs: None)
+    monkeypatch.setattr(runner_module.CrawlRunner, "_prepare", lambda self: None)
+    monkeypatch.setattr(runner_module.CrawlRunner, "_create_live_ui", lambda self: None)
+    monkeypatch.setattr(runner_module.CrawlRunner, "_initial_snapshot_ids", lambda self: [str(snapshot.id)])
+    monkeypatch.setattr(runner_module.CrawlRunner, "_run_crawl_setup", lambda self, snapshot_id: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, "enqueue_snapshot", lambda self, snapshot_id: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, "_wait_for_snapshot_tasks", lambda self: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, "_cleanup_persona", lambda self: None)

    cleanup_calls = []
-    monkeypatch.setattr(runner_module.CrawlRunner, '_run_crawl_cleanup', lambda self, snapshot_id: cleanup_calls.append('abx_cleanup') or asyncio.sleep(0))
-    monkeypatch.setattr(crawl, 'cleanup', lambda: cleanup_calls.append('crawl_cleanup'))
+    monkeypatch.setattr(
+        runner_module.CrawlRunner,
+        "_run_crawl_cleanup",
+        lambda self, snapshot_id: cleanup_calls.append("abx_cleanup") or asyncio.sleep(0),
+    )
+    monkeypatch.setattr(crawl, "cleanup", lambda: cleanup_calls.append("crawl_cleanup"))

    asyncio.run(runner_module.CrawlRunner(crawl, snapshot_ids=[str(snapshot.id)]).run())

-    assert cleanup_calls == ['crawl_cleanup', 'abx_cleanup']
+    assert cleanup_calls == ["crawl_cleanup", "abx_cleanup"]


 def test_abx_process_service_background_monitor_finishes_after_process_exit(monkeypatch, tmp_path):
@@ -497,7 +693,7 @@ def test_abx_process_service_background_monitor_finishes_after_process_exit(monk
            return ["daemon output\n"]

    service._emit_event = fake_emit_event
-    monkeypatch.setattr(service, '_stream_stdout', fake_stream_stdout)
+    monkeypatch.setattr(service, "_stream_stdout", fake_stream_stdout)

    class FakeAsyncProcess:
        def __init__(self):
@@ -509,32 +705,32 @@ def test_abx_process_service_background_monitor_finishes_after_process_exit(monk
            self.returncode = 0
            return 0

-    plugin_output_dir = tmp_path / 'chrome'
+    plugin_output_dir = tmp_path / "chrome"
    plugin_output_dir.mkdir()
-    stdout_file = plugin_output_dir / 'on_Crawl__90_chrome_launch.daemon.bg.stdout.log'
-    stderr_file = plugin_output_dir / 'on_Crawl__90_chrome_launch.daemon.bg.stderr.log'
-    stderr_file.write_text('')
-    pid_file = plugin_output_dir / 'on_Crawl__90_chrome_launch.daemon.bg.pid'
-    pid_file.write_text('12345')
+    stdout_file = plugin_output_dir / "on_Crawl__90_chrome_launch.daemon.bg.stdout.log"
+    stderr_file = plugin_output_dir / "on_Crawl__90_chrome_launch.daemon.bg.stderr.log"
+    stderr_file.write_text("")
+    pid_file = plugin_output_dir / "on_Crawl__90_chrome_launch.daemon.bg.pid"
+    pid_file.write_text("12345")

    proc = AbxProcess(
-        cmd=['hook'],
+        cmd=["hook"],
        pwd=str(plugin_output_dir),
        timeout=60,
        started_at=now_iso(),
-        plugin='chrome',
-        hook_name='on_Crawl__90_chrome_launch.daemon.bg',
+        plugin="chrome",
+        hook_name="on_Crawl__90_chrome_launch.daemon.bg",
    )
    process = FakeAsyncProcess()
    event = SimpleNamespace(
-        plugin_name='chrome',
-        hook_name='on_Crawl__90_chrome_launch.daemon.bg',
-        hook_path='hook',
-        hook_args=['--url=https://example.org/'],
+        plugin_name="chrome",
+        hook_name="on_Crawl__90_chrome_launch.daemon.bg",
+        hook_path="hook",
+        hook_args=["--url=https://example.org/"],
        env={},
        output_dir=str(plugin_output_dir),
        timeout=60,
-        snapshot_id='snap-1',
+        snapshot_id="snap-1",
        is_background=True,
    )

@@ -566,28 +762,29 @@ def test_run_pending_crawls_runs_due_snapshot_in_place(monkeypatch):
    from archivebox.services import runner as runner_module

    crawl = Crawl.objects.create(
-        urls='https://example.com',
+        urls="https://example.com",
        created_by_id=get_or_create_system_user_pk(),
        status=Crawl.StatusChoices.SEALED,
    )
    snapshot = Snapshot.objects.create(
-        url='https://example.com',
+        url="https://example.com",
        crawl=crawl,
        status=Snapshot.StatusChoices.QUEUED,
        retry_at=runner_module.timezone.now(),
    )

-    monkeypatch.setattr(type(snapshot), 'claim_processing_lock', lambda self, lock_seconds=60: True)
-    monkeypatch.setattr(type(crawl), 'claim_processing_lock', lambda self, lock_seconds=60: True)
+    monkeypatch.setattr(type(snapshot), "claim_processing_lock", lambda self, lock_seconds=60: True)
+    monkeypatch.setattr(type(crawl), "claim_processing_lock", lambda self, lock_seconds=60: True)

    run_calls: list[tuple[str, list[str] | None, bool]] = []
+
    def fake_run_crawl(crawl_id, snapshot_ids=None, selected_plugins=None, process_discovered_snapshots_inline=True):
        run_calls.append((crawl_id, snapshot_ids, process_discovered_snapshots_inline))
        snapshot.status = Snapshot.StatusChoices.SEALED
        snapshot.retry_at = None
-        snapshot.save(update_fields=['status', 'retry_at', 'modified_at'])
+        snapshot.save(update_fields=["status", "retry_at", "modified_at"])

-    monkeypatch.setattr(runner_module, 'run_crawl', fake_run_crawl)
+    monkeypatch.setattr(runner_module, "run_crawl", fake_run_crawl)

    result = runner_module.run_pending_crawls(daemon=False)

@@ -602,26 +799,26 @@ def test_run_pending_crawls_prioritizes_new_queued_crawl_before_snapshot_backlog
    from archivebox.services import runner as runner_module

    older_crawl = Crawl.objects.create(
-        urls='https://older.example.com',
+        urls="https://older.example.com",
        created_by_id=get_or_create_system_user_pk(),
        status=Crawl.StatusChoices.STARTED,
    )
    older_snapshot = Snapshot.objects.create(
-        url='https://older.example.com',
+        url="https://older.example.com",
        crawl=older_crawl,
        status=Snapshot.StatusChoices.QUEUED,
        retry_at=runner_module.timezone.now(),
    )
    newer_crawl = Crawl.objects.create(
-        urls='https://newer.example.com',
+        urls="https://newer.example.com",
        created_by_id=get_or_create_system_user_pk(),
        status=Crawl.StatusChoices.QUEUED,
        retry_at=runner_module.timezone.now(),
    )

-    monkeypatch.setattr(type(older_snapshot), 'claim_processing_lock', lambda self, lock_seconds=60: True)
-    monkeypatch.setattr(type(older_crawl), 'claim_processing_lock', lambda self, lock_seconds=60: True)
-    monkeypatch.setattr(type(newer_crawl), 'claim_processing_lock', lambda self, lock_seconds=60: True)
+    monkeypatch.setattr(type(older_snapshot), "claim_processing_lock", lambda self, lock_seconds=60: True)
+    monkeypatch.setattr(type(older_crawl), "claim_processing_lock", lambda self, lock_seconds=60: True)
+    monkeypatch.setattr(type(newer_crawl), "claim_processing_lock", lambda self, lock_seconds=60: True)

    run_calls: list[tuple[str, list[str] | None, bool]] = []

@@ -632,7 +829,7 @@ def test_run_pending_crawls_prioritizes_new_queued_crawl_before_snapshot_backlog
        run_calls.append((crawl_id, snapshot_ids, process_discovered_snapshots_inline))
        raise _StopScheduling

-    monkeypatch.setattr(runner_module, 'run_crawl', fake_run_crawl)
+    monkeypatch.setattr(runner_module, "run_crawl", fake_run_crawl)

    with pytest.raises(_StopScheduling):
        runner_module.run_pending_crawls(daemon=False)
--- a/archivebox/tests/test_savepagenow.py
+++ b/archivebox/tests/test_savepagenow.py
@@ -9,10 +9,18 @@ from pathlib import Path
 from archivebox.tests.conftest import create_test_url


-ADMIN_HOST = 'admin.archivebox.localhost:8000'
+ADMIN_HOST = "admin.archivebox.localhost:8000"


-def _run_savepagenow_script(initialized_archive: Path, request_url: str, expected_url: str, *, login: bool, public_add_view: bool, host: str):
+def _run_savepagenow_script(
+    initialized_archive: Path,
+    request_url: str,
+    expected_url: str,
+    *,
+    login: bool,
+    public_add_view: bool,
+    host: str,
+):
    script = textwrap.dedent(
        f"""
        import os
@@ -52,34 +60,34 @@ def _run_savepagenow_script(initialized_archive: Path, request_url: str, expecte
        assert resp2.status_code == 302, resp2.status_code
        assert Snapshot.objects.filter(url={expected_url!r}).count() == 1
        assert resp2['Location'] == f"/{{snapshot.url_path}}"
-        """
+        """,
    )

    env = {
        **os.environ,
-        'DATA_DIR': str(initialized_archive),
-        'USE_COLOR': 'False',
-        'SHOW_PROGRESS': 'False',
-        'PUBLIC_ADD_VIEW': 'True' if public_add_view else 'False',
-        'SAVE_ARCHIVEDOTORG': 'False',
-        'SAVE_TITLE': 'False',
-        'SAVE_FAVICON': 'False',
-        'SAVE_WGET': 'False',
-        'SAVE_WARC': 'False',
-        'SAVE_PDF': 'False',
-        'SAVE_SCREENSHOT': 'False',
-        'SAVE_DOM': 'False',
-        'SAVE_SINGLEFILE': 'False',
-        'SAVE_READABILITY': 'False',
-        'SAVE_MERCURY': 'False',
-        'SAVE_GIT': 'False',
-        'SAVE_YTDLP': 'False',
-        'SAVE_HEADERS': 'False',
-        'SAVE_HTMLTOTEXT': 'False',
+        "DATA_DIR": str(initialized_archive),
+        "USE_COLOR": "False",
+        "SHOW_PROGRESS": "False",
+        "PUBLIC_ADD_VIEW": "True" if public_add_view else "False",
+        "SAVE_ARCHIVEDOTORG": "False",
+        "SAVE_TITLE": "False",
+        "SAVE_FAVICON": "False",
+        "SAVE_WGET": "False",
+        "SAVE_WARC": "False",
+        "SAVE_PDF": "False",
+        "SAVE_SCREENSHOT": "False",
+        "SAVE_DOM": "False",
+        "SAVE_SINGLEFILE": "False",
+        "SAVE_READABILITY": "False",
+        "SAVE_MERCURY": "False",
+        "SAVE_GIT": "False",
+        "SAVE_YTDLP": "False",
+        "SAVE_HEADERS": "False",
+        "SAVE_HTMLTOTEXT": "False",
    }

    return subprocess.run(
-        [sys.executable, '-c', script],
+        [sys.executable, "-c", script],
        cwd=initialized_archive,
        env=env,
        text=True,
@@ -105,36 +113,104 @@ def _run_savepagenow_not_found_script(initialized_archive: Path, request_url: st
        target_url = {request_url!r}

        resp = client.get('/web/' + target_url, HTTP_HOST='web.archivebox.localhost:8000')
-        assert resp.status_code == 404, resp.status_code
+        assert resp.status_code == 302, resp.status_code
+        assert resp['Location'] == f'http://{ADMIN_HOST}/web/' + target_url
        assert Snapshot.objects.count() == 0
-        """
+        """,
    )

    env = {
        **os.environ,
-        'DATA_DIR': str(initialized_archive),
-        'USE_COLOR': 'False',
-        'SHOW_PROGRESS': 'False',
-        'PUBLIC_ADD_VIEW': 'False',
-        'SAVE_ARCHIVEDOTORG': 'False',
-        'SAVE_TITLE': 'False',
-        'SAVE_FAVICON': 'False',
-        'SAVE_WGET': 'False',
-        'SAVE_WARC': 'False',
-        'SAVE_PDF': 'False',
-        'SAVE_SCREENSHOT': 'False',
-        'SAVE_DOM': 'False',
-        'SAVE_SINGLEFILE': 'False',
-        'SAVE_READABILITY': 'False',
-        'SAVE_MERCURY': 'False',
-        'SAVE_GIT': 'False',
-        'SAVE_YTDLP': 'False',
-        'SAVE_HEADERS': 'False',
-        'SAVE_HTMLTOTEXT': 'False',
+        "DATA_DIR": str(initialized_archive),
+        "USE_COLOR": "False",
+        "SHOW_PROGRESS": "False",
+        "PUBLIC_ADD_VIEW": "False",
+        "SAVE_ARCHIVEDOTORG": "False",
+        "SAVE_TITLE": "False",
+        "SAVE_FAVICON": "False",
+        "SAVE_WGET": "False",
+        "SAVE_WARC": "False",
+        "SAVE_PDF": "False",
+        "SAVE_SCREENSHOT": "False",
+        "SAVE_DOM": "False",
+        "SAVE_SINGLEFILE": "False",
+        "SAVE_READABILITY": "False",
+        "SAVE_MERCURY": "False",
+        "SAVE_GIT": "False",
+        "SAVE_YTDLP": "False",
+        "SAVE_HEADERS": "False",
+        "SAVE_HTMLTOTEXT": "False",
    }

    return subprocess.run(
-        [sys.executable, '-c', script],
+        [sys.executable, "-c", script],
+        cwd=initialized_archive,
+        env=env,
+        text=True,
+        capture_output=True,
+        timeout=60,
+    )
+
+
+def _run_savepagenow_via_web_host_redirect_script(initialized_archive: Path, request_url: str, expected_url: str):
+    script = textwrap.dedent(
+        f"""
+        import os
+
+        os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'archivebox.core.settings')
+
+        from archivebox.config.django import setup_django
+        setup_django()
+
+        from django.test import Client
+        from django.contrib.auth import get_user_model
+        from archivebox.core.models import Snapshot
+
+        client = Client()
+        user = get_user_model().objects.create_user(username='tester', password='pw')
+        client.force_login(user)
+
+        target_url = {request_url!r}
+
+        resp = client.get('/web/' + target_url, HTTP_HOST='web.archivebox.localhost:8000')
+        assert resp.status_code == 302, resp.status_code
+        assert resp['Location'] == f'http://{ADMIN_HOST}/web/' + target_url
+
+        resp2 = client.get('/web/' + target_url, HTTP_HOST={ADMIN_HOST!r})
+        assert resp2.status_code == 302, resp2.status_code
+
+        snapshot = Snapshot.objects.filter(url={expected_url!r}).order_by('-created_at').first()
+        assert snapshot is not None
+        assert resp2['Location'] == f"/{{snapshot.url_path}}"
+        assert Snapshot.objects.filter(url={expected_url!r}).count() == 1
+        """,
+    )
+
+    env = {
+        **os.environ,
+        "DATA_DIR": str(initialized_archive),
+        "USE_COLOR": "False",
+        "SHOW_PROGRESS": "False",
+        "PUBLIC_ADD_VIEW": "False",
+        "SAVE_ARCHIVEDOTORG": "False",
+        "SAVE_TITLE": "False",
+        "SAVE_FAVICON": "False",
+        "SAVE_WGET": "False",
+        "SAVE_WARC": "False",
+        "SAVE_PDF": "False",
+        "SAVE_SCREENSHOT": "False",
+        "SAVE_DOM": "False",
+        "SAVE_SINGLEFILE": "False",
+        "SAVE_READABILITY": "False",
+        "SAVE_MERCURY": "False",
+        "SAVE_GIT": "False",
+        "SAVE_YTDLP": "False",
+        "SAVE_HEADERS": "False",
+        "SAVE_HTMLTOTEXT": "False",
+    }
+
+    return subprocess.run(
+        [sys.executable, "-c", script],
        cwd=initialized_archive,
        env=env,
        text=True,
@@ -168,34 +244,34 @@ def _run_savepagenow_existing_snapshot_script(initialized_archive: Path, request
        resp = client.get('/web/' + target_url, HTTP_HOST='web.archivebox.localhost:8000')
        assert resp.status_code == 302, resp.status_code
        assert resp['Location'] == f"/{{snapshot.url_path}}"
-        """
+        """,
    )

    env = {
        **os.environ,
-        'DATA_DIR': str(initialized_archive),
-        'USE_COLOR': 'False',
-        'SHOW_PROGRESS': 'False',
-        'PUBLIC_ADD_VIEW': 'False',
-        'SAVE_ARCHIVEDOTORG': 'False',
-        'SAVE_TITLE': 'False',
-        'SAVE_FAVICON': 'False',
-        'SAVE_WGET': 'False',
-        'SAVE_WARC': 'False',
-        'SAVE_PDF': 'False',
-        'SAVE_SCREENSHOT': 'False',
-        'SAVE_DOM': 'False',
-        'SAVE_SINGLEFILE': 'False',
-        'SAVE_READABILITY': 'False',
-        'SAVE_MERCURY': 'False',
-        'SAVE_GIT': 'False',
-        'SAVE_YTDLP': 'False',
-        'SAVE_HEADERS': 'False',
-        'SAVE_HTMLTOTEXT': 'False',
+        "DATA_DIR": str(initialized_archive),
+        "USE_COLOR": "False",
+        "SHOW_PROGRESS": "False",
+        "PUBLIC_ADD_VIEW": "False",
+        "SAVE_ARCHIVEDOTORG": "False",
+        "SAVE_TITLE": "False",
+        "SAVE_FAVICON": "False",
+        "SAVE_WGET": "False",
+        "SAVE_WARC": "False",
+        "SAVE_PDF": "False",
+        "SAVE_SCREENSHOT": "False",
+        "SAVE_DOM": "False",
+        "SAVE_SINGLEFILE": "False",
+        "SAVE_READABILITY": "False",
+        "SAVE_MERCURY": "False",
+        "SAVE_GIT": "False",
+        "SAVE_YTDLP": "False",
+        "SAVE_HEADERS": "False",
+        "SAVE_HTMLTOTEXT": "False",
    }

    return subprocess.run(
-        [sys.executable, '-c', script],
+        [sys.executable, "-c", script],
        cwd=initialized_archive,
        env=env,
        text=True,
@@ -206,47 +282,49 @@ def _run_savepagenow_existing_snapshot_script(initialized_archive: Path, request

 def test_web_add_creates_and_reuses_snapshot_logged_in(initialized_archive):
    """/web/https://... should work for authenticated users even when public add is off."""
-    url = create_test_url(domain='example.com', path='savepagenow-auth')
-    request_url = url.replace('https://', '')
+    url = create_test_url(domain="example.com", path="savepagenow-auth")
+    request_url = url.replace("https://", "")
    result = _run_savepagenow_script(initialized_archive, request_url, url, login=True, public_add_view=False, host=ADMIN_HOST)
-    assert result.returncode == 0, (
-        "SavePageNow shortcut (logged-in) test failed.\n"
-        f"stdout:\n{result.stdout}\n"
-        f"stderr:\n{result.stderr}"
-    )
+    assert result.returncode == 0, f"SavePageNow shortcut (logged-in) test failed.\nstdout:\n{result.stdout}\nstderr:\n{result.stderr}"


 def test_web_add_creates_and_reuses_snapshot_public(initialized_archive):
    """/web/https://... should work when PUBLIC_ADD_VIEW is enabled without login."""
-    url = create_test_url(domain='example.com', path='savepagenow-public')
-    request_url = url.replace('https://', '')
-    result = _run_savepagenow_script(initialized_archive, request_url, url, login=False, public_add_view=True, host='web.archivebox.localhost:8000')
-    assert result.returncode == 0, (
-        "SavePageNow shortcut (public add) test failed.\n"
-        f"stdout:\n{result.stdout}\n"
-        f"stderr:\n{result.stderr}"
+    url = create_test_url(domain="example.com", path="savepagenow-public")
+    request_url = url
+    result = _run_savepagenow_script(
+        initialized_archive,
+        request_url,
+        url,
+        login=False,
+        public_add_view=True,
+        host="web.archivebox.localhost:8000",
    )
+    assert result.returncode == 0, f"SavePageNow shortcut (public add) test failed.\nstdout:\n{result.stdout}\nstderr:\n{result.stderr}"


 def test_web_add_requires_login_when_public_off(initialized_archive):
-    """/web/https://... should 404 for new URLs when PUBLIC_ADD_VIEW is false and not logged in."""
-    url = create_test_url(domain='example.com', path='savepagenow-404')
-    request_url = url.replace('https://', '')
+    """/web/https://... should bounce to admin when PUBLIC_ADD_VIEW is false and not logged in."""
+    url = create_test_url(domain="example.com", path="savepagenow-404")
+    request_url = url
    result = _run_savepagenow_not_found_script(initialized_archive, request_url)
+    assert result.returncode == 0, f"SavePageNow shortcut (no public add) test failed.\nstdout:\n{result.stdout}\nstderr:\n{result.stderr}"
+
+
+def test_web_add_redirects_to_admin_and_creates_when_logged_in(initialized_archive):
+    """/web/https://... on web host should redirect to admin host and create when the user is logged in there."""
+    url = create_test_url(domain="example.com", path="savepagenow-web-admin")
+    result = _run_savepagenow_via_web_host_redirect_script(initialized_archive, url, url)
    assert result.returncode == 0, (
-        "SavePageNow shortcut (no public add) test failed.\n"
-        f"stdout:\n{result.stdout}\n"
-        f"stderr:\n{result.stderr}"
+        f"SavePageNow shortcut (web->admin redirect) test failed.\nstdout:\n{result.stdout}\nstderr:\n{result.stderr}"
    )


 def test_web_add_redirects_existing_snapshot_when_public_off(initialized_archive):
    """/web/https://... should redirect to existing snapshot even when public add is off and not logged in."""
-    url = create_test_url(domain='example.com', path='savepagenow-existing')
-    request_url = url.replace('https://', '')
+    url = create_test_url(domain="example.com", path="savepagenow-existing")
+    request_url = url.replace("https://", "")
    result = _run_savepagenow_existing_snapshot_script(initialized_archive, request_url, url)
    assert result.returncode == 0, (
-        "SavePageNow shortcut (existing snapshot) test failed.\n"
-        f"stdout:\n{result.stdout}\n"
-        f"stderr:\n{result.stderr}"
+        f"SavePageNow shortcut (existing snapshot) test failed.\nstdout:\n{result.stdout}\nstderr:\n{result.stderr}"
    )
--- a/archivebox/tests/test_schedule.py
+++ b/archivebox/tests/test_schedule.py
@@ -8,7 +8,6 @@ import subprocess
 import pytest


-
 def _fetchone(tmp_path, query):
    conn = sqlite3.connect(tmp_path / "index.sqlite3")
    try:
@@ -21,7 +20,7 @@ def test_schedule_creates_enabled_db_schedule(tmp_path, process):
    os.chdir(tmp_path)

    result = subprocess.run(
-        ['archivebox', 'schedule', '--every=daily', '--depth=1', 'https://example.com/feed.xml'],
+        ["archivebox", "schedule", "--every=daily", "--depth=1", "https://example.com/feed.xml"],
        capture_output=True,
        text=True,
    )
@@ -37,50 +36,50 @@ def test_schedule_creates_enabled_db_schedule(tmp_path, process):
        "SELECT urls, status, max_depth FROM crawls_crawl ORDER BY created_at DESC LIMIT 1",
    )

-    assert schedule_row == ('daily', 1, 'Scheduled import: https://example.com/feed.xml')
-    assert crawl_row == ('https://example.com/feed.xml', 'sealed', 1)
+    assert schedule_row == ("daily", 1, "Scheduled import: https://example.com/feed.xml")
+    assert crawl_row == ("https://example.com/feed.xml", "sealed", 1)


 def test_schedule_show_lists_enabled_schedules(tmp_path, process):
    os.chdir(tmp_path)

    subprocess.run(
-        ['archivebox', 'schedule', '--every=weekly', 'https://example.com/feed.xml'],
+        ["archivebox", "schedule", "--every=weekly", "https://example.com/feed.xml"],
        capture_output=True,
        text=True,
        check=True,
    )

    result = subprocess.run(
-        ['archivebox', 'schedule', '--show'],
+        ["archivebox", "schedule", "--show"],
        capture_output=True,
        text=True,
    )

    assert result.returncode == 0
-    assert 'Active scheduled crawls' in result.stdout
-    assert 'https://example.com/feed.xml' in result.stdout
-    assert 'weekly' in result.stdout
+    assert "Active scheduled crawls" in result.stdout
+    assert "https://example.com/feed.xml" in result.stdout
+    assert "weekly" in result.stdout


 def test_schedule_clear_disables_existing_schedules(tmp_path, process):
    os.chdir(tmp_path)

    subprocess.run(
-        ['archivebox', 'schedule', '--every=daily', 'https://example.com/feed.xml'],
+        ["archivebox", "schedule", "--every=daily", "https://example.com/feed.xml"],
        capture_output=True,
        text=True,
        check=True,
    )

    result = subprocess.run(
-        ['archivebox', 'schedule', '--clear'],
+        ["archivebox", "schedule", "--clear"],
        capture_output=True,
        text=True,
    )

    assert result.returncode == 0
-    assert 'Disabled 1 scheduled crawl' in result.stdout
+    assert "Disabled 1 scheduled crawl" in result.stdout

    disabled_count = _fetchone(
        tmp_path,
@@ -99,13 +98,13 @@ def test_schedule_every_requires_valid_period(tmp_path, process):
    os.chdir(tmp_path)

    result = subprocess.run(
-        ['archivebox', 'schedule', '--every=invalid_period', 'https://example.com/feed.xml'],
+        ["archivebox", "schedule", "--every=invalid_period", "https://example.com/feed.xml"],
        capture_output=True,
        text=True,
    )

    assert result.returncode != 0
-    assert 'Invalid schedule' in result.stderr or 'Invalid schedule' in result.stdout
+    assert "Invalid schedule" in result.stderr or "Invalid schedule" in result.stdout


 class TestScheduleCLI:
@@ -113,17 +112,17 @@ class TestScheduleCLI:
        os.chdir(tmp_path)

        result = subprocess.run(
-            ['archivebox', 'schedule', '--help'],
+            ["archivebox", "schedule", "--help"],
            capture_output=True,
            text=True,
        )

        assert result.returncode == 0
-        assert '--every' in result.stdout
-        assert '--show' in result.stdout
-        assert '--clear' in result.stdout
-        assert '--run-all' in result.stdout
+        assert "--every" in result.stdout
+        assert "--show" in result.stdout
+        assert "--clear" in result.stdout
+        assert "--run-all" in result.stdout


-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
--- a/archivebox/tests/test_schedule_e2e.py
+++ b/archivebox/tests/test_schedule_e2e.py
@@ -21,7 +21,7 @@ REPO_ROOT = Path(__file__).resolve().parents[2]

 def init_archive(cwd: Path) -> None:
    result = subprocess.run(
-        [sys.executable, '-m', 'archivebox', 'init', '--quick'],
+        [sys.executable, "-m", "archivebox", "init", "--quick"],
        cwd=cwd,
        capture_output=True,
        text=True,
@@ -32,46 +32,48 @@ def init_archive(cwd: Path) -> None:

 def build_test_env(port: int, **extra: str) -> dict[str, str]:
    env = os.environ.copy()
-    env.pop('DATA_DIR', None)
-    env.update({
-        'LISTEN_HOST': f'archivebox.localhost:{port}',
-        'ALLOWED_HOSTS': '*',
-        'CSRF_TRUSTED_ORIGINS': f'http://admin.archivebox.localhost:{port}',
-        'PUBLIC_ADD_VIEW': 'True',
-        'USE_COLOR': 'False',
-        'SHOW_PROGRESS': 'False',
-        'TIMEOUT': '20',
-        'URL_ALLOWLIST': r'127\.0\.0\.1[:/].*',
-        'SAVE_ARCHIVEDOTORG': 'False',
-        'SAVE_TITLE': 'False',
-        'SAVE_FAVICON': 'False',
-        'SAVE_WARC': 'False',
-        'SAVE_PDF': 'False',
-        'SAVE_SCREENSHOT': 'False',
-        'SAVE_DOM': 'False',
-        'SAVE_SINGLEFILE': 'False',
-        'SAVE_READABILITY': 'False',
-        'SAVE_MERCURY': 'False',
-        'SAVE_GIT': 'False',
-        'SAVE_YTDLP': 'False',
-        'SAVE_HEADERS': 'False',
-        'SAVE_HTMLTOTEXT': 'False',
-        'SAVE_WGET': 'True',
-        'USE_CHROME': 'False',
-    })
+    env.pop("DATA_DIR", None)
+    env.update(
+        {
+            "LISTEN_HOST": f"archivebox.localhost:{port}",
+            "ALLOWED_HOSTS": "*",
+            "CSRF_TRUSTED_ORIGINS": f"http://admin.archivebox.localhost:{port}",
+            "PUBLIC_ADD_VIEW": "True",
+            "USE_COLOR": "False",
+            "SHOW_PROGRESS": "False",
+            "TIMEOUT": "20",
+            "URL_ALLOWLIST": r"127\.0\.0\.1[:/].*",
+            "SAVE_ARCHIVEDOTORG": "False",
+            "SAVE_TITLE": "False",
+            "SAVE_FAVICON": "False",
+            "SAVE_WARC": "False",
+            "SAVE_PDF": "False",
+            "SAVE_SCREENSHOT": "False",
+            "SAVE_DOM": "False",
+            "SAVE_SINGLEFILE": "False",
+            "SAVE_READABILITY": "False",
+            "SAVE_MERCURY": "False",
+            "SAVE_GIT": "False",
+            "SAVE_YTDLP": "False",
+            "SAVE_HEADERS": "False",
+            "SAVE_HTMLTOTEXT": "False",
+            "SAVE_WGET": "True",
+            "USE_CHROME": "False",
+        },
+    )
    env.update(extra)
    return env


 def get_free_port() -> int:
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
-        sock.bind(('127.0.0.1', 0))
+        sock.bind(("127.0.0.1", 0))
        return sock.getsockname()[1]


 def start_server(cwd: Path, env: dict[str, str], port: int) -> None:
    result = subprocess.run(
-        [sys.executable, '-m', 'archivebox', 'server', '--daemonize', f'127.0.0.1:{port}'],
+        [sys.executable, "-m", "archivebox", "server", "--daemonize", f"127.0.0.1:{port}"],
        cwd=cwd,
        capture_output=True,
        text=True,
@@ -91,19 +93,19 @@ def stop_server(cwd: Path) -> None:
        from archivebox.workers.supervisord_util import stop_existing_supervisord_process
        stop_existing_supervisord_process()
        print('stopped')
-        """
+        """,
    )
    run_python_cwd(script, cwd=cwd, timeout=30)


-def wait_for_http(port: int, host: str, path: str = '/', timeout: int = 30) -> requests.Response:
+def wait_for_http(port: int, host: str, path: str = "/", timeout: int = 30) -> requests.Response:
    deadline = time.time() + timeout
    last_exc = None
    while time.time() < deadline:
        try:
            response = requests.get(
-                f'http://127.0.0.1:{port}{path}',
-                headers={'Host': host},
+                f"http://127.0.0.1:{port}{path}",
+                headers={"Host": host},
                timeout=2,
                allow_redirects=False,
            )
@@ -112,11 +114,11 @@ def wait_for_http(port: int, host: str, path: str = '/', timeout: int = 30) -> r
        except requests.RequestException as exc:
            last_exc = exc
        time.sleep(0.5)
-    raise AssertionError(f'Timed out waiting for HTTP on {host}: {last_exc}')
+    raise AssertionError(f"Timed out waiting for HTTP on {host}: {last_exc}")


 def make_latest_schedule_due(cwd: Path) -> None:
-    conn = sqlite3.connect(cwd / 'index.sqlite3')
+    conn = sqlite3.connect(cwd / "index.sqlite3")
    try:
        conn.execute(
            """
@@ -129,7 +131,7 @@ def make_latest_schedule_due(cwd: Path) -> None:
                ORDER BY created_at DESC
                LIMIT 1
            )
-            """
+            """,
        )
        conn.commit()
    finally:
@@ -182,7 +184,7 @@ def get_snapshot_file_text(cwd: Path, url: str) -> str:

        assert candidates, f'no captured html/txt files found in {{snapshot_dir}}'
        print(candidates[0].read_text(errors='ignore'))
-        """
+        """,
    )
    stdout, stderr, code = run_python_cwd(script, cwd=cwd, timeout=60)
    assert code == 0, stderr
@@ -198,11 +200,11 @@ def wait_for_snapshot_capture(cwd: Path, url: str, timeout: int = 180) -> str:
        except AssertionError as err:
            last_error = err
            time.sleep(2)
-    raise AssertionError(f'timed out waiting for captured content for {url}: {last_error}')
+    raise AssertionError(f"timed out waiting for captured content for {url}: {last_error}")


 def get_counts(cwd: Path, scheduled_url: str, one_shot_url: str) -> tuple[int, int, int]:
-    conn = sqlite3.connect(cwd / 'index.sqlite3')
+    conn = sqlite3.connect(cwd / "index.sqlite3")
    try:
        scheduled_snapshots = conn.execute(
            "SELECT COUNT(*) FROM core_snapshot WHERE url = ?",
@@ -259,7 +261,7 @@ def create_admin_and_token(cwd: Path) -> str:
            expires=timezone.now() + timedelta(days=1),
        )
        print(token.token)
-        """
+        """,
    )
    stdout, stderr, code = run_python_cwd(script, cwd=cwd, timeout=60)
    assert code == 0, stderr
@@ -275,7 +277,7 @@ def test_server_processes_due_cli_schedule_and_saves_real_content(tmp_path, recu
    env = build_test_env(port)

    schedule_result = subprocess.run(
-        [sys.executable, '-m', 'archivebox', 'schedule', '--every=daily', '--depth=0', recursive_test_site['root_url']],
+        [sys.executable, "-m", "archivebox", "schedule", "--every=daily", "--depth=0", recursive_test_site["root_url"]],
        cwd=tmp_path,
        capture_output=True,
        text=True,
@@ -283,16 +285,16 @@ def test_server_processes_due_cli_schedule_and_saves_real_content(tmp_path, recu
        timeout=60,
    )
    assert schedule_result.returncode == 0, schedule_result.stderr
-    assert 'Created scheduled crawl' in schedule_result.stdout
+    assert "Created scheduled crawl" in schedule_result.stdout

    make_latest_schedule_due(tmp_path)

    try:
        start_server(tmp_path, env=env, port=port)
-        wait_for_http(port, host=f'web.archivebox.localhost:{port}')
-        captured_text = wait_for_snapshot_capture(tmp_path, recursive_test_site['root_url'], timeout=180)
-        assert 'Root' in captured_text
-        assert 'About' in captured_text
+        wait_for_http(port, host=f"web.archivebox.localhost:{port}")
+        captured_text = wait_for_snapshot_capture(tmp_path, recursive_test_site["root_url"], timeout=180)
+        assert "Root" in captured_text
+        assert "About" in captured_text
    finally:
        stop_server(tmp_path)

@@ -304,11 +306,11 @@ def test_archivebox_add_remains_one_shot_even_when_schedule_is_due(tmp_path, rec

    port = get_free_port()
    env = build_test_env(port)
-    scheduled_url = recursive_test_site['root_url']
-    one_shot_url = recursive_test_site['child_urls'][0]
+    scheduled_url = recursive_test_site["root_url"]
+    one_shot_url = recursive_test_site["child_urls"][0]

    schedule_result = subprocess.run(
-        [sys.executable, '-m', 'archivebox', 'schedule', '--every=daily', '--depth=0', scheduled_url],
+        [sys.executable, "-m", "archivebox", "schedule", "--every=daily", "--depth=0", scheduled_url],
        cwd=tmp_path,
        capture_output=True,
        text=True,
@@ -320,7 +322,7 @@ def test_archivebox_add_remains_one_shot_even_when_schedule_is_due(tmp_path, rec
    make_latest_schedule_due(tmp_path)

    add_result = subprocess.run(
-        [sys.executable, '-m', 'archivebox', 'add', '--depth=0', '--plugins=wget', one_shot_url],
+        [sys.executable, "-m", "archivebox", "add", "--depth=0", "--plugins=wget", one_shot_url],
        cwd=tmp_path,
        capture_output=True,
        text=True,
@@ -329,7 +331,7 @@ def test_archivebox_add_remains_one_shot_even_when_schedule_is_due(tmp_path, rec
    )
    assert add_result.returncode == 0, add_result.stderr
    captured_text = wait_for_snapshot_capture(tmp_path, one_shot_url, timeout=120)
-    assert 'Deep About' in captured_text or 'About' in captured_text
+    assert "Deep About" in captured_text or "About" in captured_text

    scheduled_snapshots, one_shot_snapshots, scheduled_crawls = get_counts(tmp_path, scheduled_url, one_shot_url)
    assert one_shot_snapshots >= 1
@@ -348,27 +350,27 @@ def test_schedule_rest_api_works_over_running_server(tmp_path, recursive_test_si

    try:
        start_server(tmp_path, env=env, port=port)
-        wait_for_http(port, host=f'api.archivebox.localhost:{port}', path='/api/v1/docs')
+        wait_for_http(port, host=f"api.archivebox.localhost:{port}", path="/api/v1/docs")

        response = requests.post(
-            f'http://127.0.0.1:{port}/api/v1/cli/schedule',
+            f"http://127.0.0.1:{port}/api/v1/cli/schedule",
            headers={
-                'Host': f'api.archivebox.localhost:{port}',
-                'X-ArchiveBox-API-Key': api_token,
+                "Host": f"api.archivebox.localhost:{port}",
+                "X-ArchiveBox-API-Key": api_token,
            },
            json={
-                'every': 'daily',
-                'import_path': recursive_test_site['root_url'],
-                'quiet': True,
+                "every": "daily",
+                "import_path": recursive_test_site["root_url"],
+                "quiet": True,
            },
            timeout=10,
        )

        assert response.status_code == 200, response.text
        payload = response.json()
-        assert payload['success'] is True
-        assert payload['result_format'] == 'json'
-        assert len(payload['result']['created_schedule_ids']) == 1
+        assert payload["success"] is True
+        assert payload["result_format"] == "json"
+        assert len(payload["result"]["created_schedule_ids"]) == 1
    finally:
        stop_server(tmp_path)

@@ -379,21 +381,21 @@ def test_schedule_web_ui_post_works_over_running_server(tmp_path, recursive_test
    init_archive(tmp_path)

    port = get_free_port()
-    env = build_test_env(port, PUBLIC_ADD_VIEW='True')
+    env = build_test_env(port, PUBLIC_ADD_VIEW="True")

    try:
        start_server(tmp_path, env=env, port=port)
-        wait_for_http(port, host=f'web.archivebox.localhost:{port}', path='/add/')
+        wait_for_http(port, host=f"web.archivebox.localhost:{port}", path="/add/")

        response = requests.post(
-            f'http://127.0.0.1:{port}/add/',
-            headers={'Host': f'web.archivebox.localhost:{port}'},
+            f"http://127.0.0.1:{port}/add/",
+            headers={"Host": f"web.archivebox.localhost:{port}"},
            data={
-                'url': recursive_test_site['root_url'],
-                'depth': '0',
-                'schedule': 'daily',
-                'tag': 'web-ui',
-                'notes': 'created from web ui',
+                "url": recursive_test_site["root_url"],
+                "depth": "0",
+                "schedule": "daily",
+                "tag": "web-ui",
+                "notes": "created from web ui",
            },
            timeout=10,
            allow_redirects=False,
@@ -401,7 +403,7 @@ def test_schedule_web_ui_post_works_over_running_server(tmp_path, recursive_test

        assert response.status_code in (302, 303), response.text

-        conn = sqlite3.connect(tmp_path / 'index.sqlite3')
+        conn = sqlite3.connect(tmp_path / "index.sqlite3")
        try:
            row = conn.execute(
                """
@@ -410,11 +412,11 @@ def test_schedule_web_ui_post_works_over_running_server(tmp_path, recursive_test
                JOIN crawls_crawl c ON c.schedule_id = cs.id
                ORDER BY cs.created_at DESC
                LIMIT 1
-                """
+                """,
            ).fetchone()
        finally:
            conn.close()

-        assert row == ('daily', recursive_test_site['root_url'], 'web-ui')
+        assert row == ("daily", recursive_test_site["root_url"], "web-ui")
    finally:
        stop_server(tmp_path)
--- a/archivebox/tests/test_server_security_browser.py
+++ b/archivebox/tests/test_server_security_browser.py
@@ -103,7 +103,10 @@ async function main() {
    timeout: 15000,
  });

-  await new Promise((resolve) => setTimeout(resolve, 1500));
+  await page.waitForFunction(
+    () => window.__dangerousScriptRan !== true || window.__probeResults !== undefined,
+    {timeout: 15000},
+  );

  const pageState = await page.evaluate(() => ({
    href: location.href,
@@ -297,7 +300,7 @@ def _seed_archive(data_dir: Path) -> dict[str, object]:
            "password": "testpassword",
            "snapshots": snapshots,
        }))
-        """
+        """,
    )
    stdout, stderr, returncode = run_python_cwd(script, cwd=data_dir, timeout=120)
    assert returncode == 0, stderr
@@ -310,10 +313,17 @@ def _get_free_port() -> int:
        return sock.getsockname()[1]


-def _wait_for_http(port: int, host: str, timeout: float = 30.0) -> None:
+def _wait_for_http(
+    port: int,
+    host: str,
+    timeout: float = 30.0,
+    process: subprocess.Popen[str] | None = None,
+) -> None:
    deadline = time.time() + timeout
    last_error = "server did not answer"
    while time.time() < deadline:
+        if process is not None and process.poll() is not None:
+            raise AssertionError(f"Server exited before becoming ready with code {process.returncode}")
        try:
            response = requests.get(
                f"http://127.0.0.1:{port}/",
@@ -358,7 +368,7 @@ def _start_server(data_dir: Path, *, mode: str, port: int) -> subprocess.Popen[s
            "SAVE_HEADERS": "False",
            "SAVE_HTMLTOTEXT": "False",
            "USE_CHROME": "False",
-        }
+        },
    )
    process = subprocess.Popen(
        [sys.executable, "-m", "archivebox", "server", "--debug", "--nothreading", f"127.0.0.1:{port}"],
@@ -369,7 +379,11 @@ def _start_server(data_dir: Path, *, mode: str, port: int) -> subprocess.Popen[s
        text=True,
        start_new_session=True,
    )
-    _wait_for_http(port, f"archivebox.localhost:{port}")
+    try:
+        _wait_for_http(port, f"archivebox.localhost:{port}", process=process)
+    except AssertionError as exc:
+        server_log = _stop_server(process)
+        raise AssertionError(f"{exc}\n\nSERVER LOG:\n{server_log}") from exc
    return process


@@ -414,7 +428,7 @@ def _build_probe_config(mode: str, port: int, fixture: dict[str, object], runtim
            "victim": victim_url,
            "admin": f"{admin_origin}/admin/",
            "api": f"{admin_origin}/api/v1/docs",
-        }
+        },
    )

    return {
@@ -427,7 +441,13 @@ def _build_probe_config(mode: str, port: int, fixture: dict[str, object], runtim
    }


-def _run_browser_probe(data_dir: Path, runtime: dict[str, Path], mode: str, fixture: dict[str, object], tmp_path: Path) -> dict[str, object]:
+def _run_browser_probe(
+    data_dir: Path,
+    runtime: dict[str, Path],
+    mode: str,
+    fixture: dict[str, object],
+    tmp_path: Path,
+) -> dict[str, object]:
    port = _get_free_port()
    process = _start_server(data_dir, mode=mode, port=port)
    probe_path = tmp_path / "server_security_probe.js"
@@ -517,7 +537,13 @@ def _run_browser_probe(data_dir: Path, runtime: dict[str, Path], mode: str, fixt
        ),
    ],
 )
-def test_server_security_modes_in_chrome(initialized_archive: Path, browser_runtime, tmp_path: Path, mode: str, expected: dict[str, object]) -> None:
+def test_server_security_modes_in_chrome(
+    initialized_archive: Path,
+    browser_runtime,
+    tmp_path: Path,
+    mode: str,
+    expected: dict[str, object],
+) -> None:
    fixture = _seed_archive(initialized_archive)
    result = _run_browser_probe(initialized_archive, browser_runtime, mode, fixture, tmp_path)

--- a/archivebox/tests/test_snapshot.py
+++ b/archivebox/tests/test_snapshot.py
@@ -12,32 +12,31 @@ import uuid
 import pytest


-
 def test_snapshot_creates_snapshot_with_correct_url(tmp_path, process, disable_extractors_dict):
    """Test that snapshot stores the exact URL in the database."""
    os.chdir(tmp_path)

    subprocess.run(
-        ['archivebox', 'snapshot', 'create', 'https://example.com'],
+        ["archivebox", "snapshot", "create", "https://example.com"],
        capture_output=True,
-        env={**disable_extractors_dict, 'DATA_DIR': str(tmp_path)},
+        env={**disable_extractors_dict, "DATA_DIR": str(tmp_path)},
    )

-    conn = sqlite3.connect('index.sqlite3')
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()
    snapshot_row = c.execute(
        "SELECT id, created_at, url, crawl_id FROM core_snapshot WHERE url = ?",
-        ('https://example.com',)
+        ("https://example.com",),
    ).fetchone()
    assert snapshot_row is not None
    crawl_row = c.execute(
        "SELECT id, created_at, urls, created_by_id FROM crawls_crawl WHERE id = ?",
-        (snapshot_row[3],)
+        (snapshot_row[3],),
    ).fetchone()
    assert crawl_row is not None
    user_row = c.execute(
        "SELECT username FROM auth_user WHERE id = ?",
-        (crawl_row[3],)
+        (crawl_row[3],),
    ).fetchone()
    assert user_row is not None
    conn.close()
@@ -45,15 +44,12 @@ def test_snapshot_creates_snapshot_with_correct_url(tmp_path, process, disable_e
    snapshot_id_raw, snapshot_created_at, snapshot_url, crawl_id = snapshot_row
    snapshot_id = str(uuid.UUID(snapshot_id_raw))
    username = user_row[0]
-    snapshot_date_str = datetime.fromisoformat(snapshot_created_at).strftime('%Y%m%d')
-    domain = urlparse(snapshot_url).hostname or 'unknown'
+    snapshot_date_str = datetime.fromisoformat(snapshot_created_at).strftime("%Y%m%d")
+    domain = urlparse(snapshot_url).hostname or "unknown"

    # Verify crawl symlink exists and is relative
-    target_path = tmp_path / 'users' / username / 'snapshots' / snapshot_date_str / domain / snapshot_id
-    symlinks = [
-        p for p in tmp_path.rglob(str(snapshot_id))
-        if p.is_symlink()
-    ]
+    target_path = tmp_path / "users" / username / "snapshots" / snapshot_date_str / domain / snapshot_id
+    symlinks = [p for p in tmp_path.rglob(str(snapshot_id)) if p.is_symlink()]
    assert symlinks, "Snapshot symlink should exist under crawl dir"
    link_path = symlinks[0]

@@ -68,21 +64,25 @@ def test_snapshot_multiple_urls_creates_multiple_records(tmp_path, process, disa
    os.chdir(tmp_path)

    subprocess.run(
-        ['archivebox', 'snapshot', 'create',
-         'https://example.com',
-         'https://iana.org'],
+        [
+            "archivebox",
+            "snapshot",
+            "create",
+            "https://example.com",
+            "https://iana.org",
+        ],
        capture_output=True,
-        env={**disable_extractors_dict, 'DATA_DIR': str(tmp_path)},
+        env={**disable_extractors_dict, "DATA_DIR": str(tmp_path)},
    )

-    conn = sqlite3.connect('index.sqlite3')
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()
    urls = c.execute("SELECT url FROM core_snapshot ORDER BY url").fetchall()
    conn.close()

    urls = [u[0] for u in urls]
-    assert 'https://example.com' in urls
-    assert 'https://iana.org' in urls
+    assert "https://example.com" in urls
+    assert "https://iana.org" in urls
    assert len(urls) >= 2


@@ -91,31 +91,41 @@ def test_snapshot_tag_creates_tag_and_links_to_snapshot(tmp_path, process, disab
    os.chdir(tmp_path)

    subprocess.run(
-        ['archivebox', 'snapshot', 'create', '--tag=mytesttag',
-         'https://example.com'],
+        [
+            "archivebox",
+            "snapshot",
+            "create",
+            "--tag=mytesttag",
+            "https://example.com",
+        ],
        capture_output=True,
-        env={**disable_extractors_dict, 'DATA_DIR': str(tmp_path)},
+        env={**disable_extractors_dict, "DATA_DIR": str(tmp_path)},
    )

-    conn = sqlite3.connect('index.sqlite3')
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()

    # Verify tag was created
-    tag = c.execute("SELECT id, name FROM core_tag WHERE name = ?", ('mytesttag',)).fetchone()
+    tag = c.execute("SELECT id, name FROM core_tag WHERE name = ?", ("mytesttag",)).fetchone()
    assert tag is not None, "Tag 'mytesttag' should exist in core_tag"
    tag_id = tag[0]

    # Verify snapshot exists
-    snapshot = c.execute("SELECT id FROM core_snapshot WHERE url = ?",
-                        ('https://example.com',)).fetchone()
+    snapshot = c.execute(
+        "SELECT id FROM core_snapshot WHERE url = ?",
+        ("https://example.com",),
+    ).fetchone()
    assert snapshot is not None
    snapshot_id = snapshot[0]

    # Verify tag is linked to snapshot via join table
-    link = c.execute("""
+    link = c.execute(
+        """
        SELECT * FROM core_snapshot_tags
        WHERE snapshot_id = ? AND tag_id = ?
-    """, (snapshot_id, tag_id)).fetchone()
+    """,
+        (snapshot_id, tag_id),
+    ).fetchone()
    conn.close()

    assert link is not None, "Tag should be linked to snapshot via core_snapshot_tags"
@@ -127,23 +137,23 @@ def test_snapshot_jsonl_output_has_correct_structure(tmp_path, process, disable_

    # Pass URL as argument instead of stdin for more reliable behavior
    result = subprocess.run(
-        ['archivebox', 'snapshot', 'create', 'https://example.com'],
+        ["archivebox", "snapshot", "create", "https://example.com"],
        capture_output=True,
        text=True,
-        env={**disable_extractors_dict, 'DATA_DIR': str(tmp_path)},
+        env={**disable_extractors_dict, "DATA_DIR": str(tmp_path)},
    )

    # Parse JSONL output lines
    records = Process.parse_records_from_text(result.stdout)
-    snapshot_records = [r for r in records if r.get('type') == 'Snapshot']
+    snapshot_records = [r for r in records if r.get("type") == "Snapshot"]

    assert len(snapshot_records) >= 1, "Should output at least one Snapshot JSONL record"

    record = snapshot_records[0]
-    assert record.get('type') == 'Snapshot'
-    assert 'id' in record, "Snapshot record should have 'id' field"
-    assert 'url' in record, "Snapshot record should have 'url' field"
-    assert record['url'] == 'https://example.com'
+    assert record.get("type") == "Snapshot"
+    assert "id" in record, "Snapshot record should have 'id' field"
+    assert "url" in record, "Snapshot record should have 'url' field"
+    assert record["url"] == "https://example.com"


 def test_snapshot_with_tag_stores_tag_name(tmp_path, process, disable_extractors_dict):
@@ -152,22 +162,24 @@ def test_snapshot_with_tag_stores_tag_name(tmp_path, process, disable_extractors

    # Use command line args instead of stdin
    subprocess.run(
-        ['archivebox', 'snapshot', 'create', '--tag=customtag', 'https://example.com'],
+        ["archivebox", "snapshot", "create", "--tag=customtag", "https://example.com"],
        capture_output=True,
        text=True,
-        env={**disable_extractors_dict, 'DATA_DIR': str(tmp_path)},
+        env={**disable_extractors_dict, "DATA_DIR": str(tmp_path)},
    )

-    conn = sqlite3.connect('index.sqlite3')
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()

    # Verify tag was created with correct name
-    tag = c.execute("SELECT name FROM core_tag WHERE name = ?",
-                   ('customtag',)).fetchone()
+    tag = c.execute(
+        "SELECT name FROM core_tag WHERE name = ?",
+        ("customtag",),
+    ).fetchone()
    conn.close()

    assert tag is not None
-    assert tag[0] == 'customtag'
+    assert tag[0] == "customtag"


 def test_snapshot_with_depth_sets_snapshot_depth(tmp_path, process, disable_extractors_dict):
@@ -175,13 +187,18 @@ def test_snapshot_with_depth_sets_snapshot_depth(tmp_path, process, disable_extr
    os.chdir(tmp_path)

    subprocess.run(
-        ['archivebox', 'snapshot', 'create', '--depth=1',
-         'https://example.com'],
+        [
+            "archivebox",
+            "snapshot",
+            "create",
+            "--depth=1",
+            "https://example.com",
+        ],
        capture_output=True,
-        env={**disable_extractors_dict, 'DATA_DIR': str(tmp_path)},
+        env={**disable_extractors_dict, "DATA_DIR": str(tmp_path)},
    )

-    conn = sqlite3.connect('index.sqlite3')
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()
    snapshot = c.execute("SELECT depth FROM core_snapshot ORDER BY created_at DESC LIMIT 1").fetchone()
    conn.close()
@@ -196,24 +213,26 @@ def test_snapshot_allows_duplicate_urls_across_crawls(tmp_path, process, disable

    # Add same URL twice
    subprocess.run(
-        ['archivebox', 'snapshot', 'create', 'https://example.com'],
+        ["archivebox", "snapshot", "create", "https://example.com"],
        capture_output=True,
-        env={**disable_extractors_dict, 'DATA_DIR': str(tmp_path)},
+        env={**disable_extractors_dict, "DATA_DIR": str(tmp_path)},
    )
    subprocess.run(
-        ['archivebox', 'snapshot', 'create', 'https://example.com'],
+        ["archivebox", "snapshot", "create", "https://example.com"],
        capture_output=True,
-        env={**disable_extractors_dict, 'DATA_DIR': str(tmp_path)},
+        env={**disable_extractors_dict, "DATA_DIR": str(tmp_path)},
    )

-    conn = sqlite3.connect('index.sqlite3')
+    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()
-    count = c.execute("SELECT COUNT(*) FROM core_snapshot WHERE url = ?",
-                     ('https://example.com',)).fetchone()[0]
+    count = c.execute(
+        "SELECT COUNT(*) FROM core_snapshot WHERE url = ?",
+        ("https://example.com",),
+    ).fetchone()[0]
    conn.close()

    assert count == 2, "Same URL should create separate snapshots across different crawls"


-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
--- a/archivebox/tests/test_tag_admin.py
+++ b/archivebox/tests/test_tag_admin.py
@@ -13,15 +13,15 @@ pytestmark = pytest.mark.django_db


 User = get_user_model()
-ADMIN_HOST = 'admin.archivebox.localhost:8000'
+ADMIN_HOST = "admin.archivebox.localhost:8000"


@pytest.fixture
 def admin_user(db):
    return cast(UserManager, User.objects).create_superuser(
-        username='tagadmin',
-        email='tagadmin@test.com',
-        password='testpassword',
+        username="tagadmin",
+        email="tagadmin@test.com",
+        password="testpassword",
    )


@@ -39,7 +39,7 @@ def crawl(admin_user):
    from archivebox.crawls.models import Crawl

    return Crawl.objects.create(
-        urls='https://example.com',
+        urls="https://example.com",
        created_by=admin_user,
    )

@@ -48,15 +48,15 @@ def crawl(admin_user):
 def tagged_data(crawl, admin_user):
    from archivebox.core.models import Snapshot, Tag

-    tag = Tag.objects.create(name='Alpha Research', created_by=admin_user)
+    tag = Tag.objects.create(name="Alpha Research", created_by=admin_user)
    first = Snapshot.objects.create(
-        url='https://example.com/one',
-        title='Example One',
+        url="https://example.com/one",
+        title="Example One",
        crawl=crawl,
    )
    second = Snapshot.objects.create(
-        url='https://example.com/two',
-        title='Example Two',
+        url="https://example.com/two",
+        title="Example Two",
        crawl=crawl,
    )
    first.tags.add(tag)
@@ -65,27 +65,26 @@ def tagged_data(crawl, admin_user):


 def test_tag_admin_changelist_renders_custom_ui(client, admin_user, tagged_data):
-    client.login(username='tagadmin', password='testpassword')
+    client.login(username="tagadmin", password="testpassword")

-    response = client.get(reverse('admin:core_tag_changelist'), HTTP_HOST=ADMIN_HOST)
+    response = client.get(reverse("admin:core_tag_changelist"), HTTP_HOST=ADMIN_HOST)

    assert response.status_code == 200
    assert b'id="tag-live-search"' in response.content
    assert b'id="tag-sort-select"' in response.content
    assert b'id="tag-created-by-select"' in response.content
    assert b'id="tag-year-select"' in response.content
-    assert b'id="tag-has-snapshots-select"' in response.content
-    assert b'Alpha Research' in response.content
+    assert b"Alpha Research" in response.content
    assert b'class="tag-card"' in response.content


 def test_tag_admin_add_view_renders_similar_tag_reference(client, admin_user):
-    client.login(username='tagadmin', password='testpassword')
+    client.login(username="tagadmin", password="testpassword")

-    response = client.get(reverse('admin:core_tag_add'), HTTP_HOST=ADMIN_HOST)
+    response = client.get(reverse("admin:core_tag_add"), HTTP_HOST=ADMIN_HOST)

    assert response.status_code == 200
-    assert b'Similar Tags' in response.content
+    assert b"Similar Tags" in response.content
    assert b'data-tag-name-input="1"' in response.content


@@ -93,40 +92,40 @@ def test_tag_search_api_returns_card_payload(client, api_token, tagged_data):
    tag, snapshots = tagged_data

    response = client.get(
-        reverse('api-1:search_tags'),
-        {'q': 'Alpha', 'api_key': api_token},
+        reverse("api-1:search_tags"),
+        {"q": "Alpha", "api_key": api_token},
        HTTP_HOST=ADMIN_HOST,
    )

    assert response.status_code == 200
    payload = response.json()
-    assert payload['sort'] == 'created_desc'
-    assert payload['created_by'] == ''
-    assert payload['year'] == ''
-    assert payload['has_snapshots'] == 'all'
-    assert payload['tags'][0]['id'] == tag.id
-    assert payload['tags'][0]['name'] == 'Alpha Research'
-    assert payload['tags'][0]['num_snapshots'] == 2
-    assert payload['tags'][0]['snapshots'][0]['title'] in {'Example One', 'Example Two'}
-    assert payload['tags'][0]['export_jsonl_url'].endswith(f'/api/v1/core/tag/{tag.id}/snapshots.jsonl')
-    assert payload['tags'][0]['filter_url'].endswith(f'/admin/core/snapshot/?tags__id__exact={tag.id}')
-    assert {snapshot['url'] for snapshot in payload['tags'][0]['snapshots']} == {snap.url for snap in snapshots}
+    assert payload["sort"] == "created_desc"
+    assert payload["created_by"] == ""
+    assert payload["year"] == ""
+    assert payload["has_snapshots"] == "all"
+    assert payload["tags"][0]["id"] == tag.id
+    assert payload["tags"][0]["name"] == "Alpha Research"
+    assert payload["tags"][0]["num_snapshots"] == 2
+    assert payload["tags"][0]["snapshots"][0]["title"] in {"Example One", "Example Two"}
+    assert payload["tags"][0]["export_jsonl_url"].endswith(f"/api/v1/core/tag/{tag.id}/snapshots.jsonl")
+    assert payload["tags"][0]["filter_url"].endswith(f"/admin/core/snapshot/?tags__id__exact={tag.id}")
+    assert {snapshot["url"] for snapshot in payload["tags"][0]["snapshots"]} == {snap.url for snap in snapshots}


 def test_tag_search_api_respects_sort_and_filters(client, api_token, admin_user, crawl, tagged_data):
    from archivebox.core.models import Snapshot, Tag

    other_user = cast(UserManager, User.objects).create_user(
-        username='tagother',
-        email='tagother@test.com',
-        password='unused',
+        username="tagother",
+        email="tagother@test.com",
+        password="unused",
    )
    tag_with_snapshots = tagged_data[0]
-    empty_tag = Tag.objects.create(name='Zulu Empty', created_by=other_user)
-    alpha_tag = Tag.objects.create(name='Alpha Empty', created_by=other_user)
+    empty_tag = Tag.objects.create(name="Zulu Empty", created_by=other_user)
+    alpha_tag = Tag.objects.create(name="Alpha Empty", created_by=other_user)
    Snapshot.objects.create(
-        url='https://example.com/three',
-        title='Example Three',
+        url="https://example.com/three",
+        title="Example Three",
        crawl=crawl,
    ).tags.add(alpha_tag)

@@ -135,24 +134,24 @@ def test_tag_search_api_respects_sort_and_filters(client, api_token, admin_user,
    Tag.objects.filter(pk=tag_with_snapshots.pk).update(created_at=timezone.make_aware(datetime(2026, 1, 1, 12, 0, 0)))

    response = client.get(
-        reverse('api-1:search_tags'),
+        reverse("api-1:search_tags"),
        {
-            'sort': 'name_desc',
-            'created_by': str(other_user.pk),
-            'year': '2024',
-            'has_snapshots': 'no',
-            'api_key': api_token,
+            "sort": "name_desc",
+            "created_by": str(other_user.pk),
+            "year": "2024",
+            "has_snapshots": "no",
+            "api_key": api_token,
        },
        HTTP_HOST=ADMIN_HOST,
    )

    assert response.status_code == 200
    payload = response.json()
-    assert payload['sort'] == 'name_desc'
-    assert payload['created_by'] == str(other_user.pk)
-    assert payload['year'] == '2024'
-    assert payload['has_snapshots'] == 'no'
-    assert [tag['name'] for tag in payload['tags']] == ['Zulu Empty']
+    assert payload["sort"] == "name_desc"
+    assert payload["created_by"] == str(other_user.pk)
+    assert payload["year"] == "2024"
+    assert payload["has_snapshots"] == "no"
+    assert [tag["name"] for tag in payload["tags"]] == ["Zulu Empty"]


 def test_tag_rename_api_updates_slug(client, api_token, tagged_data):
@@ -160,30 +159,30 @@ def test_tag_rename_api_updates_slug(client, api_token, tagged_data):

    response = client.post(
        f"{reverse('api-1:rename_tag', args=[tag.id])}?api_key={api_token}",
-        data=json.dumps({'name': 'Alpha Archive'}),
-        content_type='application/json',
+        data=json.dumps({"name": "Alpha Archive"}),
+        content_type="application/json",
        HTTP_HOST=ADMIN_HOST,
    )

    assert response.status_code == 200

    tag.refresh_from_db()
-    assert tag.name == 'Alpha Archive'
-    assert tag.slug == 'alpha-archive'
+    assert tag.name == "Alpha Archive"
+    assert tag.slug == "alpha-archive"


 def test_tag_snapshots_export_returns_jsonl(client, api_token, tagged_data):
    tag, _ = tagged_data

    response = client.get(
-        reverse('api-1:tag_snapshots_export', args=[tag.id]),
-        {'api_key': api_token},
+        reverse("api-1:tag_snapshots_export", args=[tag.id]),
+        {"api_key": api_token},
        HTTP_HOST=ADMIN_HOST,
    )

    assert response.status_code == 200
-    assert response['Content-Type'].startswith('application/x-ndjson')
-    assert f'tag-{tag.slug}-snapshots.jsonl' in response['Content-Disposition']
+    assert response["Content-Type"].startswith("application/x-ndjson")
+    assert f"tag-{tag.slug}-snapshots.jsonl" in response["Content-Disposition"]
    body = response.content.decode()
    assert '"type": "Snapshot"' in body
    assert '"tags": "Alpha Research"' in body
@@ -193,13 +192,13 @@ def test_tag_urls_export_returns_plain_text_urls(client, api_token, tagged_data)
    tag, snapshots = tagged_data

    response = client.get(
-        reverse('api-1:tag_urls_export', args=[tag.id]),
-        {'api_key': api_token},
+        reverse("api-1:tag_urls_export", args=[tag.id]),
+        {"api_key": api_token},
        HTTP_HOST=ADMIN_HOST,
    )

    assert response.status_code == 200
-    assert response['Content-Type'].startswith('text/plain')
-    assert f'tag-{tag.slug}-urls.txt' in response['Content-Disposition']
+    assert response["Content-Type"].startswith("text/plain")
+    assert f"tag-{tag.slug}-urls.txt" in response["Content-Disposition"]
    exported_urls = set(filter(None, response.content.decode().splitlines()))
    assert exported_urls == {snapshot.url for snapshot in snapshots}
--- a/archivebox/tests/test_title.py
+++ b/archivebox/tests/test_title.py
@@ -6,11 +6,12 @@ from .fixtures import disable_extractors_dict, process

 FIXTURES = (disable_extractors_dict, process)

+
 def test_title_is_extracted(tmp_path, process, disable_extractors_dict):
    """Test that title is extracted from the page."""
    disable_extractors_dict.update({"SAVE_TITLE": "true"})
    add_process = subprocess.run(
-        ['archivebox', 'add', '--plugins=title', 'https://example.com'],
+        ["archivebox", "add", "--plugins=title", "https://example.com"],
        capture_output=True,
        text=True,
        env=disable_extractors_dict,
@@ -28,6 +29,7 @@ def test_title_is_extracted(tmp_path, process, disable_extractors_dict):
    assert snapshot[0] is not None
    assert "Example" in snapshot[0]

+
 def test_title_is_htmlencoded_in_index_html(tmp_path, process, disable_extractors_dict):
    """
    https://github.com/ArchiveBox/ArchiveBox/issues/330
@@ -36,7 +38,7 @@ def test_title_is_htmlencoded_in_index_html(tmp_path, process, disable_extractor
    """
    disable_extractors_dict.update({"SAVE_TITLE": "true"})
    add_process = subprocess.run(
-        ['archivebox', 'add', '--plugins=title', 'https://example.com'],
+        ["archivebox", "add", "--plugins=title", "https://example.com"],
        capture_output=True,
        text=True,
        env=disable_extractors_dict,
--- a/archivebox/tests/test_update.py
+++ b/archivebox/tests/test_update.py
@@ -1,28 +1,37 @@
 import json
 import sqlite3
 import subprocess
+from datetime import datetime, timedelta
+
+import pytest
+from django.utils import timezone

 from .fixtures import disable_extractors_dict, process

 FIXTURES = (disable_extractors_dict, process)

+
 def test_update_imports_orphaned_snapshots(tmp_path, process, disable_extractors_dict):
    """Test that archivebox update imports real legacy archive directories."""
-    legacy_timestamp = '1710000000'
-    legacy_dir = tmp_path / 'archive' / legacy_timestamp
+    legacy_timestamp = "1710000000"
+    legacy_dir = tmp_path / "archive" / legacy_timestamp
    legacy_dir.mkdir(parents=True, exist_ok=True)
-    (legacy_dir / 'singlefile.html').write_text('<html>example</html>')
-    (legacy_dir / 'index.json').write_text(json.dumps({
-        'url': 'https://example.com',
-        'timestamp': legacy_timestamp,
-        'title': 'Example Domain',
-        'fs_version': '0.8.0',
-        'archive_results': [],
-    }))
+    (legacy_dir / "singlefile.html").write_text("<html>example</html>")
+    (legacy_dir / "index.json").write_text(
+        json.dumps(
+            {
+                "url": "https://example.com",
+                "timestamp": legacy_timestamp,
+                "title": "Example Domain",
+                "fs_version": "0.8.0",
+                "archive_results": [],
+            },
+        ),
+    )

    # Run update without filters - should import and migrate the legacy directory.
    update_process = subprocess.run(
-        ['archivebox', 'update'],
+        ["archivebox", "update"],
        capture_output=True,
        text=True,
        env=disable_extractors_dict,
@@ -36,10 +45,151 @@ def test_update_imports_orphaned_snapshots(tmp_path, process, disable_extractors
    conn.commit()
    conn.close()

-    assert row == ('https://example.com', '0.9.0')
+    assert row == ("https://example.com", "0.9.0")
    assert legacy_dir.is_symlink()

    migrated_dir = legacy_dir.resolve()
    assert migrated_dir.exists()
-    assert (migrated_dir / 'index.jsonl').exists()
-    assert (migrated_dir / 'singlefile.html').exists()
+    assert (migrated_dir / "index.jsonl").exists()
+    assert (migrated_dir / "singlefile.html").exists()
+
+
+@pytest.mark.django_db
+def test_reindex_snapshots_resets_existing_search_results_and_reruns_requested_plugins(monkeypatch):
+    from archivebox.base_models.models import get_or_create_system_user_pk
+    from archivebox.cli.archivebox_update import reindex_snapshots
+    from archivebox.core.models import ArchiveResult, Snapshot
+    from archivebox.crawls.models import Crawl
+    import archivebox.cli.archivebox_extract as extract_mod
+
+    crawl = Crawl.objects.create(
+        urls="https://example.com",
+        created_by_id=get_or_create_system_user_pk(),
+    )
+    snapshot = Snapshot.objects.create(
+        url="https://example.com",
+        crawl=crawl,
+        status=Snapshot.StatusChoices.SEALED,
+    )
+    result = ArchiveResult.objects.create(
+        snapshot=snapshot,
+        plugin="search_backend_sqlite",
+        hook_name="on_Snapshot__90_index_sqlite.py",
+        status=ArchiveResult.StatusChoices.SUCCEEDED,
+        output_str="old index hit",
+        output_json={"indexed": True},
+        output_files={"search.sqlite3": {"size": 123}},
+        output_size=123,
+    )
+
+    captured: dict[str, object] = {}
+
+    def fake_run_plugins(*, args, records, wait, emit_results, plugins=""):
+        captured["args"] = args
+        captured["records"] = records
+        captured["wait"] = wait
+        captured["emit_results"] = emit_results
+        captured["plugins"] = plugins
+        return 0
+
+    monkeypatch.setattr(extract_mod, "run_plugins", fake_run_plugins)
+
+    stats = reindex_snapshots(
+        Snapshot.objects.filter(id=snapshot.id),
+        search_plugins=["search_backend_sqlite"],
+        batch_size=10,
+    )
+
+    result.refresh_from_db()
+
+    assert stats["processed"] == 1
+    assert stats["queued"] == 1
+    assert stats["reindexed"] == 1
+    assert result.status == ArchiveResult.StatusChoices.QUEUED
+    assert result.output_str == ""
+    assert result.output_json is None
+    assert result.output_files == {}
+    assert captured == {
+        "args": (),
+        "records": [{"type": "ArchiveResult", "snapshot_id": str(snapshot.id), "plugin": "search_backend_sqlite"}],
+        "wait": True,
+        "emit_results": False,
+        "plugins": "",
+    }
+
+
+@pytest.mark.django_db
+def test_build_filtered_snapshots_queryset_respects_resume_cutoff():
+    from archivebox.base_models.models import get_or_create_system_user_pk
+    from archivebox.cli.archivebox_update import _build_filtered_snapshots_queryset
+    from archivebox.core.models import Snapshot
+    from archivebox.crawls.models import Crawl
+
+    crawl = Crawl.objects.create(
+        urls="https://example.com\nhttps://example.org\nhttps://example.net",
+        created_by_id=get_or_create_system_user_pk(),
+    )
+    base = timezone.make_aware(datetime(2026, 3, 23, 12, 0, 0))
+    older = Snapshot.objects.create(
+        url="https://example.net",
+        crawl=crawl,
+        bookmarked_at=base - timedelta(hours=2),
+    )
+    middle = Snapshot.objects.create(
+        url="https://example.org",
+        crawl=crawl,
+        bookmarked_at=base - timedelta(hours=1),
+    )
+    newer = Snapshot.objects.create(
+        url="https://example.com",
+        crawl=crawl,
+        bookmarked_at=base,
+    )
+
+    snapshots = list(
+        _build_filtered_snapshots_queryset(
+            filter_patterns=(),
+            filter_type="exact",
+            before=None,
+            after=None,
+            resume=middle.timestamp,
+        ).values_list("id", flat=True),
+    )
+
+    assert str(newer.id) not in {str(snapshot_id) for snapshot_id in snapshots}
+    assert set(map(str, snapshots)) == {str(middle.id), str(older.id)}
+
+
+@pytest.mark.django_db
+def test_reconcile_with_index_json_tolerates_null_title(tmp_path):
+    from archivebox.base_models.models import get_or_create_system_user_pk
+    from archivebox.core.models import Snapshot
+    from archivebox.crawls.models import Crawl
+
+    crawl = Crawl.objects.create(
+        urls="https://example.com",
+        created_by_id=get_or_create_system_user_pk(),
+    )
+    snapshot = Snapshot.objects.create(
+        url="https://example.com",
+        crawl=crawl,
+        title="Example Domain",
+        status=Snapshot.StatusChoices.SEALED,
+    )
+    output_dir = snapshot.output_dir
+    output_dir.mkdir(parents=True, exist_ok=True)
+    (output_dir / "index.json").write_text(
+        json.dumps(
+            {
+                "url": snapshot.url,
+                "timestamp": snapshot.timestamp,
+                "title": None,
+                "archive_results": [],
+            },
+        ),
+    )
+
+    snapshot.reconcile_with_index_json()
+    snapshot.refresh_from_db()
+
+    assert snapshot.title == "Example Domain"
--- a/archivebox/tests/test_urls.py
+++ b/archivebox/tests/test_urls.py
@@ -49,19 +49,22 @@ def _build_script(body: str) -> str:
    from django.contrib.auth import get_user_model

    from archivebox.core.models import Snapshot, ArchiveResult
-    from archivebox.crawls.models import Crawl
    from archivebox.config.common import SERVER_CONFIG
    from archivebox.core.host_utils import (
        get_admin_host,
+        get_admin_base_url,
        get_api_host,
        get_web_host,
+        get_web_base_url,
        get_public_host,
+        get_snapshot_subdomain,
        get_snapshot_host,
        get_original_host,
        get_listen_subdomain,
        split_host_port,
        host_matches,
        is_snapshot_subdomain,
+        build_admin_url,
        build_snapshot_url,
    )

@@ -82,45 +85,12 @@ def _build_script(body: str) -> str:

    def get_snapshot():
        snapshot = Snapshot.objects.order_by("-created_at").first()
-        if snapshot is None:
-            admin = ensure_admin_user()
-            crawl = Crawl.objects.create(
-                urls="https://example.com",
-                created_by=admin,
-            )
-            snapshot = Snapshot.objects.create(
-                url="https://example.com",
-                title="Example Domain",
-                crawl=crawl,
-                status=Snapshot.StatusChoices.SEALED,
-            )
-            snapshot_dir = Path(snapshot.output_dir)
-            snapshot_dir.mkdir(parents=True, exist_ok=True)
-            (snapshot_dir / "index.json").write_text('{"url": "https://example.com"}', encoding="utf-8")
-            (snapshot_dir / "favicon.ico").write_bytes(b"ico")
-            screenshot_dir = snapshot_dir / "screenshot"
-            screenshot_dir.mkdir(parents=True, exist_ok=True)
-            (screenshot_dir / "screenshot.png").write_bytes(b"png")
-            responses_root = snapshot_dir / "responses" / snapshot.domain
-            responses_root.mkdir(parents=True, exist_ok=True)
-            (responses_root / "index.html").write_text(
-                "<!doctype html><html><body><h1>Example Domain</h1></body></html>",
-                encoding="utf-8",
-            )
-            ArchiveResult.objects.get_or_create(
-                snapshot=snapshot,
-                plugin="screenshot",
-                defaults={"status": "succeeded", "output_size": 1, "output_str": "."},
-            )
-            ArchiveResult.objects.get_or_create(
-                snapshot=snapshot,
-                plugin="responses",
-                defaults={"status": "succeeded", "output_size": 1, "output_str": "."},
-            )
+        assert snapshot is not None, "Expected real_archive_with_example to seed a snapshot"
        return snapshot

    def get_snapshot_files(snapshot):
        output_rel = None
+        reserved_snapshot_paths = {"index.html"}
        for output in snapshot.discover_outputs():
            candidate = output.get("path")
            if not candidate:
@@ -144,10 +114,22 @@ def _build_script(body: str) -> str:
            if not candidate.is_file():
                continue
            rel = candidate.relative_to(responses_root)
+            if str(rel) in reserved_snapshot_paths:
+                continue
            if not (Path(snapshot.output_dir) / rel).exists():
                response_file = candidate
                response_rel = str(rel)
                break
+        if response_file is None:
+            for candidate in responses_root.rglob("*"):
+                if not candidate.is_file():
+                    continue
+                rel = candidate.relative_to(responses_root)
+                if str(rel) in reserved_snapshot_paths:
+                    continue
+                response_file = candidate
+                response_rel = str(rel)
+                break
        if response_file is None:
            response_file = next(p for p in responses_root.rglob("*") if p.is_file())
            response_rel = str(response_file.relative_to(responses_root))
@@ -170,7 +152,7 @@ def _build_script(body: str) -> str:
            encoding="utf-8",
        )
        return "dangerous.html", "safe.json", "dangerous-response"
-    """
+    """,
    )
    return prelude + "\n" + textwrap.dedent(body)

@@ -179,13 +161,26 @@ class TestUrlRouting:
    data_dir: Path

    @pytest.fixture(autouse=True)
-    def _setup_data_dir(self, initialized_archive: Path) -> None:
-        self.data_dir = initialized_archive
+    def _setup_data_dir(self, real_archive_with_example: Path) -> None:
+        self.data_dir = real_archive_with_example

-    def _run(self, body: str, timeout: int = 120, mode: str | None = None) -> None:
+    def _run(
+        self,
+        body: str,
+        timeout: int = 120,
+        mode: str | None = None,
+        env_overrides: dict[str, str] | None = None,
+    ) -> None:
        script = _build_script(body)
-        env_overrides = {"SERVER_SECURITY_MODE": mode} if mode else None
-        result = _run_python(script, cwd=self.data_dir, timeout=timeout, env_overrides=env_overrides)
+        merged_env = dict(env_overrides or {})
+        if mode:
+            merged_env["SERVER_SECURITY_MODE"] = mode
+        result = _run_python(
+            script,
+            cwd=self.data_dir,
+            timeout=timeout,
+            env_overrides=merged_env or None,
+        )
        assert result.returncode == 0, result.stderr
        assert "OK" in result.stdout

@@ -200,6 +195,7 @@ class TestUrlRouting:
            admin_host = get_admin_host()
            api_host = get_api_host()
            public_host = get_public_host()
+            snapshot_subdomain = get_snapshot_subdomain(snapshot_id)
            snapshot_host = get_snapshot_host(snapshot_id)
            original_host = get_original_host(domain)
            base_host = SERVER_CONFIG.LISTEN_HOST
@@ -211,15 +207,17 @@ class TestUrlRouting:
            assert admin_host == "admin.archivebox.localhost:8000"
            assert api_host == "api.archivebox.localhost:8000"
            assert public_host == "public.archivebox.localhost:8000"
-            assert snapshot_host == f"{snapshot_id}.archivebox.localhost:8000"
+            assert snapshot_subdomain == f"snap-{snapshot_id[-12:].lower()}"
+            assert snapshot_host == f"{snapshot_subdomain}.archivebox.localhost:8000"
            assert original_host == f"{domain}.archivebox.localhost:8000"
            assert get_listen_subdomain(web_host) == "web"
            assert get_listen_subdomain(admin_host) == "admin"
            assert get_listen_subdomain(api_host) == "api"
-            assert get_listen_subdomain(snapshot_host) == snapshot_id
+            assert get_listen_subdomain(snapshot_host) == snapshot_subdomain
            assert get_listen_subdomain(original_host) == domain
            assert get_listen_subdomain(base_host) == ""
            assert host_matches(web_host, get_web_host())
+            assert is_snapshot_subdomain(snapshot_subdomain)
            assert is_snapshot_subdomain(snapshot_id)

            client = Client()
@@ -236,37 +234,77 @@ class TestUrlRouting:
            assert resp["Location"].startswith("/api/")

            print("OK")
-            """
+            """,
        )

    def test_web_admin_routing(self) -> None:
        self._run(
            """
            ensure_admin_user()
+            snapshot = get_snapshot()
            client = Client()
            web_host = get_web_host()
+            public_host = get_public_host()
            admin_host = get_admin_host()
+            snapshot_host = get_snapshot_host(str(snapshot.id))
+            original_host = get_original_host(snapshot.domain)

            resp = client.get("/admin/login/", HTTP_HOST=web_host)
            assert resp.status_code in (301, 302)
            assert admin_host in resp["Location"]

+            resp = client.get("/admin/login/?next=/admin/", HTTP_HOST=public_host)
+            assert resp.status_code in (301, 302)
+            assert resp["Location"] == f"http://{admin_host}/admin/login/?next=/admin/"
+
+            resp = client.get("/admin/login/?next=/admin/", HTTP_HOST=snapshot_host)
+            assert resp.status_code in (301, 302)
+            assert resp["Location"] == f"http://{admin_host}/admin/login/?next=/admin/"
+
+            resp = client.get("/admin/login/?next=/admin/", HTTP_HOST=original_host)
+            assert resp.status_code in (301, 302)
+            assert resp["Location"] == f"http://{admin_host}/admin/login/?next=/admin/"
+
            resp = client.get("/admin/login/", HTTP_HOST=admin_host)
            assert resp.status_code == 200

+            resp = client.get(f"/{snapshot.url_path}", HTTP_HOST=admin_host)
+            assert resp.status_code in (301, 302)
+            assert resp["Location"] == f"http://{snapshot_host}"
+
+            resp = client.get(f"/{snapshot.url_path}/index.html", HTTP_HOST=admin_host)
+            assert resp.status_code in (301, 302)
+            assert resp["Location"] == f"http://{snapshot_host}"
+
+            resp = client.get("/static/jquery.min.js", HTTP_HOST=snapshot_host)
+            assert resp.status_code == 200
+            assert "javascript" in (resp.headers.get("Content-Type") or "")
+
+            resp = client.get("/static/jquery.min.js", HTTP_HOST=original_host)
+            assert resp.status_code == 200
+            assert "javascript" in (resp.headers.get("Content-Type") or "")
+
            print("OK")
-            """
+            """,
        )

    def test_snapshot_routing_and_hosts(self) -> None:
        self._run(
            """
+            import io
+            import zipfile
+
            snapshot = get_snapshot()
            output_rel, response_file, response_rel, response_output_path = get_snapshot_files(snapshot)
            snapshot_id = str(snapshot.id)
+            snapshot_subdomain = get_snapshot_subdomain(snapshot_id)
            snapshot_host = get_snapshot_host(snapshot_id)
            original_host = get_original_host(snapshot.domain)
            web_host = get_web_host()
+            host_only, port = split_host_port(SERVER_CONFIG.LISTEN_HOST)
+            legacy_snapshot_host = f"{snapshot_id}.{host_only}"
+            if port:
+                legacy_snapshot_host = f"{legacy_snapshot_host}:{port}"

            client = Client()

@@ -289,6 +327,11 @@ class TestUrlRouting:
            assert resp.status_code in (301, 302)
            assert snapshot_host in resp["Location"]

+            resp = client.get("/", HTTP_HOST=legacy_snapshot_host)
+            assert resp.status_code in (301, 302)
+            assert resp["Location"].startswith(f"http://{snapshot_host}")
+            assert snapshot_subdomain in resp["Location"]
+
            resp = client.get(f"/{output_rel}", HTTP_HOST=snapshot_host)
            assert resp.status_code == 200
            assert response_body(resp) == Path(snapshot.output_dir, output_rel).read_bytes()
@@ -296,7 +339,10 @@ class TestUrlRouting:
            resp = client.get(f"/{response_rel}", HTTP_HOST=snapshot_host)
            assert resp.status_code == 200
            snapshot_body = response_body(resp)
-            if response_output_path.exists():
+            if response_rel == "index.html":
+                assert f"http://{snapshot_host}/".encode() in snapshot_body
+                assert b"See all files..." in snapshot_body
+            elif response_output_path.exists():
                assert snapshot_body == response_output_path.read_bytes()
            else:
                assert snapshot_body == response_file.read_bytes()
@@ -319,8 +365,149 @@ class TestUrlRouting:
            files_html = response_body(resp).decode("utf-8", "ignore")
            assert output_rel.split("/", 1)[0] in files_html

+            resp = client.get("/?files=1&download=zip", HTTP_HOST=snapshot_host)
+            assert resp.status_code == 200
+            assert resp["Content-Type"] == "application/zip"
+            assert ".zip" in resp["Content-Disposition"]
+            assert resp.streaming
+            with zipfile.ZipFile(io.BytesIO(response_body(resp))) as zip_file:
+                assert any(name.endswith(f"/{output_rel}") for name in zip_file.namelist())
+
+            output_dir = next((output.get("path", "").split("/", 1)[0] for output in snapshot.discover_outputs() if "/" in (output.get("path") or "")), None)
+            assert output_dir is not None
+            resp = client.get(f"/{output_dir}/", HTTP_HOST=snapshot_host)
+            assert resp.status_code == 200
+            dir_html = response_body(resp).decode("utf-8", "ignore")
+            assert f"Index of {output_dir}/" in dir_html
+
            print("OK")
+            """,
+        )
+
+    def test_safe_subdomains_original_domain_host_uses_latest_matching_response(self) -> None:
+        self._run(
            """
+            from datetime import timedelta
+            import shutil
+            from django.utils import timezone
+            from archivebox.crawls.models import Crawl
+
+            snapshot = get_snapshot()
+            original_host = get_original_host(snapshot.domain)
+            client = Client()
+
+            assert SERVER_CONFIG.SERVER_SECURITY_MODE == "safe-subdomains-fullreplay"
+
+            now = timezone.now()
+            created_by_id = snapshot.crawl.created_by_id
+            created_snapshots = []
+            created_crawls = []
+
+            def make_snapshot(url):
+                crawl = Crawl.objects.create(urls=url, created_by_id=created_by_id)
+                created_crawls.append(crawl)
+                snap = Snapshot.objects.create(url=url, crawl=crawl, status=Snapshot.StatusChoices.STARTED)
+                created_snapshots.append(snap)
+                return snap
+
+            try:
+                fixtures = (
+                    (make_snapshot("https://example.com"), now + timedelta(minutes=1), "old root"),
+                    (make_snapshot("https://example.com"), now + timedelta(minutes=2), "new root"),
+                    (make_snapshot("https://example.com/about.html"), now + timedelta(minutes=3), "old about"),
+                    (make_snapshot("https://example.com/about.html"), now + timedelta(minutes=4), "new about"),
+                )
+
+                for snap, stamp, content in fixtures:
+                    snap.created_at = stamp
+                    snap.bookmarked_at = stamp
+                    snap.downloaded_at = stamp
+                    snap.save(update_fields=["created_at", "bookmarked_at", "downloaded_at", "modified_at"])
+                    responses_root = Path(snap.output_dir) / "responses" / snap.domain
+                    responses_root.mkdir(parents=True, exist_ok=True)
+                    rel_path = "about.html" if snap.url.endswith("/about.html") else "index.html"
+                    (responses_root / rel_path).write_text(content, encoding="utf-8")
+
+                resp = client.get("/", HTTP_HOST=original_host)
+                assert resp.status_code == 200
+                root_html = response_body(resp).decode("utf-8", "ignore")
+                assert "new root" in root_html
+                assert "old root" not in root_html
+
+                resp = client.get("/about.html", HTTP_HOST=original_host)
+                assert resp.status_code == 200
+                about_html = response_body(resp).decode("utf-8", "ignore")
+                assert "new about" in about_html
+                assert "old about" not in about_html
+            finally:
+                for snap in created_snapshots:
+                    shutil.rmtree(snap.output_dir, ignore_errors=True)
+                for crawl in created_crawls:
+                    crawl.delete()
+
+            print("OK")
+            """,
+        )
+
+    def test_safe_subdomains_original_domain_host_falls_back_to_latest_snapshot_live_page(self) -> None:
+        self._run(
+            """
+            import shutil
+            from django.utils import timezone
+            from archivebox.crawls.models import Crawl
+
+            snapshot = get_snapshot()
+            fallback_domain = "fallback-original-host.example"
+            original_host = get_original_host(fallback_domain)
+            client = Client()
+
+            assert SERVER_CONFIG.SERVER_SECURITY_MODE == "safe-subdomains-fullreplay"
+
+            crawl = Crawl.objects.create(urls=f"https://{fallback_domain}", created_by_id=snapshot.crawl.created_by_id)
+            latest_snapshot = Snapshot.objects.create(
+                url=f"https://{fallback_domain}",
+                crawl=crawl,
+                status=Snapshot.StatusChoices.STARTED,
+            )
+
+            stamp = timezone.now()
+            latest_snapshot.created_at = stamp
+            latest_snapshot.bookmarked_at = stamp
+            latest_snapshot.downloaded_at = stamp
+            latest_snapshot.save(update_fields=["created_at", "bookmarked_at", "downloaded_at", "modified_at"])
+
+            try:
+                shutil.rmtree(Path(latest_snapshot.output_dir) / "responses", ignore_errors=True)
+
+                resp = client.get("/", HTTP_HOST=original_host)
+                assert resp.status_code == 200
+                html = response_body(resp).decode("utf-8", "ignore")
+                assert latest_snapshot.url in html
+                assert f"http://{get_snapshot_host(str(latest_snapshot.id))}/" in html
+            finally:
+                shutil.rmtree(latest_snapshot.output_dir, ignore_errors=True)
+                crawl.delete()
+
+            print("OK")
+            """,
+        )
+
+    def test_safe_subdomains_original_domain_host_redirects_to_save_page_now_when_missing_and_authenticated(self) -> None:
+        self._run(
+            """
+            ensure_admin_user()
+            client = Client()
+            client.login(username="testadmin", password="testpassword")
+
+            missing_domain = "missing-original-host.example"
+            original_host = get_original_host(missing_domain)
+            resp = client.get("/", HTTP_HOST=original_host)
+
+            assert resp.status_code in (301, 302)
+            assert resp["Location"] == f"http://{get_web_host()}/web/https://{missing_domain}"
+
+            print("OK")
+            """,
        )

    def test_safe_subdomains_fullreplay_leaves_risky_replay_unrestricted(self) -> None:
@@ -346,7 +533,7 @@ class TestUrlRouting:
            assert resp.headers.get("Content-Security-Policy") is None

            print("OK")
-            """
+            """,
        )

    def test_safe_onedomain_nojsreplay_routes_and_neuters_risky_documents(self) -> None:
@@ -396,6 +583,9 @@ class TestUrlRouting:
            assert resp.headers.get("Content-Security-Policy") is None
            assert resp.headers.get("X-Content-Type-Options") == "nosniff"

+            resp = client.get("/snapshot/{}/singlefile/".format(snapshot_id), HTTP_HOST=base_host)
+            assert resp.status_code == 404
+
            resp = client.get(f"/snapshot/{snapshot_id}/{sniffed_rel}", HTTP_HOST=base_host)
            assert resp.status_code == 200
            csp = resp.headers.get("Content-Security-Policy") or ""
@@ -486,6 +676,33 @@ class TestUrlRouting:
            mode="danger-onedomain-fullreplay",
        )

+    def test_onedomain_base_url_overrides_are_preserved_for_external_links(self) -> None:
+        self._run(
+            """
+            snapshot = get_snapshot()
+            snapshot_id = str(snapshot.id)
+            base_host = SERVER_CONFIG.LISTEN_HOST
+
+            assert SERVER_CONFIG.SERVER_SECURITY_MODE == "safe-onedomain-nojsreplay"
+            assert get_admin_host() == base_host
+            assert get_web_host() == base_host
+
+            assert get_admin_base_url() == "https://admin.archivebox.example"
+            assert get_web_base_url() == "https://archivebox.example"
+            assert build_admin_url("/admin/login/") == "https://admin.archivebox.example/admin/login/"
+            assert build_snapshot_url(snapshot_id, "index.jsonl") == (
+                f"https://archivebox.example/snapshot/{snapshot_id}/index.jsonl"
+            )
+
+            print("OK")
+            """,
+            mode="safe-onedomain-nojsreplay",
+            env_overrides={
+                "ADMIN_BASE_URL": "https://admin.archivebox.example",
+                "ARCHIVE_BASE_URL": "https://archivebox.example",
+            },
+        )
+
    def test_template_and_admin_links(self) -> None:
        self._run(
            """
@@ -510,6 +727,25 @@ class TestUrlRouting:
            live_html = response_body(resp).decode("utf-8", "ignore")
            assert f"http://{snapshot_host}/" in live_html
            assert f"http://{public_host}/static/archive.png" in live_html
+            assert "?preview=1" in live_html
+            assert "function createMainFrame(previousFrame)" in live_html
+            assert "function activateCardPreview(card, link)" in live_html
+            assert "ensureMainFrame(true)" in live_html
+            assert "previousFrame.parentNode.replaceChild(frame, previousFrame)" in live_html
+            assert "previousFrame.src = 'about:blank'" in live_html
+            assert "event.stopImmediatePropagation()" in live_html
+            assert "const matchingLink = [...document.querySelectorAll('a[target=preview]')].find" in live_html
+            assert "jQuery(link).click()" not in live_html
+            assert "searchParams.delete('preview')" in live_html
+            assert "doc.body.style.flexDirection = 'column'" in live_html
+            assert "doc.body.style.alignItems = 'center'" in live_html
+            assert "img.style.margin = '0 auto'" in live_html
+            assert "window.location.hash = getPreviewHashValue(link)" in live_html
+            assert "const selectedPreviewHash = decodeURIComponent(window.location.hash.slice(1)).toLowerCase()" in live_html
+            assert "pointer-events: none;" in live_html
+            assert "pointer-events: auto;" in live_html
+            assert 'class="thumbnail-click-overlay"' in live_html
+            assert "window.location.hash = getPreviewTypeFromPath(link)" not in live_html
            assert ">WARC<" not in live_html
            assert ">Media<" not in live_html
            assert ">Git<" not in live_html
@@ -517,6 +753,25 @@ class TestUrlRouting:
            static_html = Path(snapshot.output_dir, "index.html").read_text(encoding="utf-8", errors="ignore")
            assert f"http://{snapshot_host}/" in static_html
            assert f"http://{public_host}/static/archive.png" in static_html
+            assert "?preview=1" in static_html
+            assert "function createMainFrame(previousFrame)" in static_html
+            assert "function activateCardPreview(card, link)" in static_html
+            assert "ensureMainFrame(true)" in static_html
+            assert "previousFrame.parentNode.replaceChild(frame, previousFrame)" in static_html
+            assert "previousFrame.src = 'about:blank'" in static_html
+            assert "e.stopImmediatePropagation()" in static_html
+            assert "const matchingLink = [...document.querySelectorAll('a[target=preview]')].find" in static_html
+            assert "jQuery(link).click()" not in static_html
+            assert "searchParams.delete('preview')" in static_html
+            assert "doc.body.style.flexDirection = 'column'" in static_html
+            assert "doc.body.style.alignItems = 'center'" in static_html
+            assert "img.style.margin = '0 auto'" in static_html
+            assert "window.location.hash = getPreviewHashValue(link)" in static_html
+            assert "const selectedPreviewHash = decodeURIComponent(window.location.hash.slice(1)).toLowerCase()" in static_html
+            assert "pointer-events: none;" in static_html
+            assert "pointer-events: auto;" in static_html
+            assert 'class="thumbnail-click-overlay"' in static_html
+            assert "window.location.hash = getPreviewTypeFromPath(link)" not in static_html
            assert ">WARC<" not in static_html
            assert ">Media<" not in static_html
            assert ">Git<" not in static_html
@@ -536,7 +791,53 @@ class TestUrlRouting:
            assert f"http://{snapshot_host}/" in ar_html

            print("OK")
+            """,
+        )
+
+    def test_snapshot_pages_preview_filesystem_text_outputs(self) -> None:
+        self._run(
            """
+            snapshot = get_snapshot()
+            web_host = get_web_host()
+
+            consolelog_dir = Path(snapshot.output_dir) / "consolelog"
+            consolelog_dir.mkdir(parents=True, exist_ok=True)
+            (consolelog_dir / "console.jsonl").write_text(
+                '{"level":"log","text":"console preview works"}\\n'
+                '{"level":"warn","text":"second line"}\\n',
+                encoding="utf-8",
+            )
+
+            client = Client()
+            resp = client.get(f"/{snapshot.url_path}/index.html", HTTP_HOST=web_host)
+            assert resp.status_code == 200
+            live_html = response_body(resp).decode("utf-8", "ignore")
+            assert 'data-plugin="consolelog" data-compact="1"' in live_html
+            assert "console preview works" in live_html
+            snapshot_host = get_snapshot_host(str(snapshot.id))
+            resp = client.get("/consolelog/console.jsonl?preview=1", HTTP_HOST=snapshot_host)
+            assert resp.status_code == 200
+            assert resp["Content-Type"].startswith("text/html")
+            preview_html = response_body(resp).decode("utf-8", "ignore")
+            assert "archivebox-text-preview" in preview_html
+            assert "console preview works" in preview_html
+
+            screenshot_dir = Path(snapshot.output_dir) / "screenshot"
+            screenshot_dir.mkdir(parents=True, exist_ok=True)
+            (screenshot_dir / "screenshot.png").write_bytes(
+                bytes.fromhex(
+                    "89504e470d0a1a0a"
+                    "0000000d49484452000000010000000108060000001f15c489"
+                    "0000000d49444154789c63f8ffffff7f0009fb03fd2a86e38a"
+                    "0000000049454e44ae426082",
+                ),
+            )
+            resp = client.get("/screenshot/screenshot.png?preview=1", HTTP_HOST=snapshot_host)
+            assert resp.status_code == 200
+            assert resp["Content-Type"].startswith("text/html")
+
+            print("OK")
+            """,
        )

    def test_api_available_on_admin_and_api_hosts(self) -> None:
@@ -553,7 +854,7 @@ class TestUrlRouting:
            assert resp.status_code == 200

            print("OK")
-            """
+            """,
        )

    def test_api_auth_token_endpoint_available_on_admin_and_api_hosts(self) -> None:
@@ -587,7 +888,7 @@ class TestUrlRouting:
            assert data.get("token")

            print("OK")
-            """
+            """,
        )

    def test_api_post_with_token_on_admin_and_api_hosts(self) -> None:
@@ -631,5 +932,5 @@ class TestUrlRouting:
            assert data.get("tag_name") == "apitest-tag"

            print("OK")
-            """
+            """,
        )
--- a/archivebox/tests/test_util.py
+++ b/archivebox/tests/test_util.py
@@ -16,6 +16,7 @@ class _ExampleHandler(BaseHTTPRequestHandler):
    def log_message(self, format, *args):
        return

+
 def test_download_url_downloads_content():
    server = ThreadingHTTPServer(("127.0.0.1", 0), _ExampleHandler)
    thread = Thread(target=server.serve_forever, daemon=True)