fix lint

2026-04-06 07:47:53 +10:00 · 2026-03-15 18:45:29 -07:00
parent f97725d16f
commit 934e02695b
111 changed files with 919 additions and 461 deletions
--- a/archivebox/api/models.py
+++ b/archivebox/api/models.py
@@ -2,7 +2,6 @@ __package__ = 'archivebox.api'

 import secrets
 from archivebox.uuid_compat import uuid7
-from datetime import timedelta

 from django.conf import settings
 from django.db import models
--- a/archivebox/api/tests.py
+++ b/archivebox/api/tests.py
@@ -1,16 +1,17 @@
-import os
-import django
+import importlib
 from io import StringIO
 from types import SimpleNamespace

-os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'archivebox.settings')
-django.setup()
+from archivebox.config.django import setup_django

-from django.contrib.auth.models import User
-from django.test import TestCase
+setup_django()

-from archivebox.api.v1_cli import ScheduleCommandSchema, cli_schedule
-from archivebox.crawls.models import CrawlSchedule
+User = importlib.import_module('django.contrib.auth.models').User
+TestCase = importlib.import_module('django.test').TestCase
+api_v1_cli = importlib.import_module('archivebox.api.v1_cli')
+ScheduleCommandSchema = api_v1_cli.ScheduleCommandSchema
+cli_schedule = api_v1_cli.cli_schedule
+CrawlSchedule = importlib.import_module('archivebox.crawls.models').CrawlSchedule


 class CLIScheduleAPITests(TestCase):
--- a/archivebox/api/v1_auth.py
+++ b/archivebox/api/v1_auth.py
@@ -3,10 +3,7 @@ __package__ = 'archivebox.api'
 from typing import Optional

 from ninja import Router, Schema
-from django.utils import timezone
-from datetime import timedelta

-from archivebox.api.models import APIToken
 from archivebox.api.auth import auth_using_token, auth_using_password, get_or_create_api_token


--- a/archivebox/api/v1_crawls.py
+++ b/archivebox/api/v1_crawls.py
@@ -5,7 +5,6 @@ from typing import List, Optional
 from datetime import datetime
 from django.utils import timezone

-from django.db.models import Q
 from django.contrib.auth import get_user_model

 from ninja import Router, Schema
--- a/archivebox/base_models/admin.py
+++ b/archivebox/base_models/admin.py
@@ -6,7 +6,7 @@ import json

 from django import forms
 from django.contrib import admin
-from django.utils.html import format_html, mark_safe
+from django.utils.html import mark_safe
 from django_object_actions import DjangoObjectActions


--- a/archivebox/base_models/models.py
+++ b/archivebox/base_models/models.py
@@ -2,12 +2,9 @@

 __package__ = 'archivebox.base_models'

-from uuid import UUID
 from archivebox.uuid_compat import uuid7
-from typing import ClassVar
 from pathlib import Path

-from django.contrib import admin
 from django.db import models
 from django.db.models import F
 from django.utils import timezone
@@ -17,8 +14,6 @@ from django.conf import settings

 from django_stubs_ext.db.models import TypedModelMeta

-from archivebox import DATA_DIR
-from archivebox.misc.hashing import get_dir_info


 def get_or_create_system_user_pk(username='system'):
--- a/archivebox/cli/archivebox_add.py
+++ b/archivebox/cli/archivebox_add.py
@@ -57,6 +57,7 @@ def add(urls: str | list[str],
    from archivebox.core.models import Snapshot
    from archivebox.crawls.models import Crawl
    from archivebox.base_models.models import get_or_create_system_user_pk
+    from archivebox.personas.models import Persona
    from archivebox.workers.orchestrator import Orchestrator
    from archivebox.misc.logging_util import printable_filesize
    from archivebox.misc.system import get_dir_size
@@ -79,11 +80,15 @@ def add(urls: str | list[str],

    # Read URLs directly into crawl
    urls_content = sources_file.read_text()
+    persona_name = (persona or 'Default').strip() or 'Default'
+    persona_obj, _ = Persona.objects.get_or_create(name=persona_name)
+    persona_obj.ensure_dirs()

    crawl = Crawl.objects.create(
        urls=urls_content,
        max_depth=depth,
        tags_str=tag,
+        persona_id=persona_obj.id,
        label=f'{USER}@{HOSTNAME} $ {cmd_str} [{timestamp}]',
        created_by_id=created_by_id,
        config={
@@ -91,7 +96,7 @@ def add(urls: str | list[str],
            'INDEX_ONLY': index_only,
            'OVERWRITE': overwrite,
            'PLUGINS': plugins,
-            'DEFAULT_PERSONA': persona or 'Default',
+            'DEFAULT_PERSONA': persona_name,
            'PARSER': parser,
        }
    )
@@ -135,8 +140,7 @@ def add(urls: str | list[str],
        print('[yellow]\\[*] URLs queued. Orchestrator will process them (run `archivebox server` if not already running).[/yellow]')
    else:
        # Foreground mode: run full orchestrator until all work is done
-        print(f'[green]\\[*] Starting orchestrator to process crawl...[/green]')
-        from archivebox.workers.orchestrator import Orchestrator
+        print('[green]\\[*] Starting orchestrator to process crawl...[/green]')
        orchestrator = Orchestrator(exit_on_idle=True, crawl_id=str(crawl.id))
        orchestrator.runloop()  # Block until complete

--- a/archivebox/cli/archivebox_config.py
+++ b/archivebox/cli/archivebox_config.py
@@ -94,7 +94,7 @@ def config(*keys,

        # Display all plugin config in single [PLUGINS] section
        if plugin_keys:
-            print(f'[grey53]\\[PLUGINS][/grey53]')
+            print('[grey53]\\[PLUGINS][/grey53]')
            print(benedict(plugin_keys).to_toml(encoder=CustomTOMLEncoder()).strip().replace('\n\n', '\n'))
            print('[grey53]################################################################[/grey53]')

--- a/archivebox/cli/archivebox_extract.py
+++ b/archivebox/cli/archivebox_extract.py
@@ -31,7 +31,6 @@ __package__ = 'archivebox.cli'
 __command__ = 'archivebox extract'

 import sys
-from typing import Optional, List

 import rich_click as click

--- a/archivebox/cli/archivebox_install.py
+++ b/archivebox/cli/archivebox_install.py
@@ -3,8 +3,6 @@
 __package__ = 'archivebox.cli'

 import os
-import sys
-import shutil

 import rich_click as click
 from rich import print
--- a/archivebox/cli/archivebox_persona.py
+++ b/archivebox/cli/archivebox_persona.py
@@ -410,7 +410,6 @@ def create_personas(
    """
    from archivebox.misc.jsonl import write_record
    from archivebox.personas.models import Persona
-    from archivebox.config.constants import CONSTANTS

    is_tty = sys.stdout.isatty()
    name_list = list(names) if names else []
@@ -493,10 +492,10 @@ def create_personas(
                        'SingletonLock', 'SingletonSocket', 'SingletonCookie',
                    ),
                )
-                rprint(f'[green]Copied browser profile to persona[/green]', file=sys.stderr)
+                rprint('[green]Copied browser profile to persona[/green]', file=sys.stderr)

                # Extract cookies via CDP
-                rprint(f'[dim]Extracting cookies via CDP...[/dim]', file=sys.stderr)
+                rprint('[dim]Extracting cookies via CDP...[/dim]', file=sys.stderr)

                if extract_cookies_via_cdp(
                    persona_chrome_dir,
@@ -506,8 +505,8 @@ def create_personas(
                ):
                    rprint(f'[green]Extracted cookies to {cookies_file}[/green]', file=sys.stderr)
                else:
-                    rprint(f'[yellow]Could not extract cookies automatically.[/yellow]', file=sys.stderr)
-                    rprint(f'[dim]You can manually export cookies using a browser extension.[/dim]', file=sys.stderr)
+                    rprint('[yellow]Could not extract cookies automatically.[/yellow]', file=sys.stderr)
+                    rprint('[dim]You can manually export cookies using a browser extension.[/dim]', file=sys.stderr)

            except Exception as e:
                rprint(f'[red]Failed to copy browser profile: {e}[/red]', file=sys.stderr)
--- a/archivebox/cli/archivebox_pluginmap.py
+++ b/archivebox/cli/archivebox_pluginmap.py
@@ -3,7 +3,6 @@
 __package__ = 'archivebox.cli'

 from typing import Optional
-from pathlib import Path

 import rich_click as click

--- a/archivebox/cli/archivebox_search.py
+++ b/archivebox/cli/archivebox_search.py
@@ -4,7 +4,7 @@ __package__ = 'archivebox.cli'
 __command__ = 'archivebox search'

 from pathlib import Path
-from typing import Optional, List, Any
+from typing import Optional, List

 import rich_click as click
 from rich import print
@@ -71,7 +71,6 @@ def search(filter_patterns: list[str] | None=None,
           csv: str | None=None,
           with_headers: bool=False):
    """List, filter, and export information about archive entries"""
-    from archivebox.core.models import Snapshot

    if with_headers and not (json or html or csv):
        stderr('[X] --with-headers requires --json, --html or --csv\n', color='red')
--- a/archivebox/cli/archivebox_server.py
+++ b/archivebox/cli/archivebox_server.py
@@ -99,7 +99,6 @@ def server(runserver_args: Iterable[str]=(SERVER_CONFIG.BIND_ADDR,),
            get_existing_supervisord_process,
            get_worker,
            start_server_workers,
-            tail_multiple_worker_logs,
            is_port_in_use,
        )
        from archivebox.workers.orchestrator import Orchestrator
@@ -108,14 +107,14 @@ def server(runserver_args: Iterable[str]=(SERVER_CONFIG.BIND_ADDR,),
        if is_port_in_use(host, int(port)):
            print(f'[red][X] Error: Port {port} is already in use[/red]')
            print(f'    Another process (possibly daphne) is already listening on {host}:{port}')
-            print(f'    Stop the conflicting process or choose a different port')
+            print('    Stop the conflicting process or choose a different port')
            sys.exit(1)

        # Check if orchestrator is already running for this data directory
        if Orchestrator.is_running():
-            print(f'[red][X] Error: ArchiveBox orchestrator is already running for this data directory[/red]')
-            print(f'    Stop the existing orchestrator before starting a new server')
-            print(f'    To stop: pkill -f "archivebox manage orchestrator"')
+            print('[red][X] Error: ArchiveBox orchestrator is already running for this data directory[/red]')
+            print('    Stop the existing orchestrator before starting a new server')
+            print('    To stop: pkill -f "archivebox manage orchestrator"')
            sys.exit(1)

        # Check if supervisord is already running
@@ -129,7 +128,7 @@ def server(runserver_args: Iterable[str]=(SERVER_CONFIG.BIND_ADDR,),
                print('[red][X] Error: ArchiveBox server is already running[/red]')
                print(f'    [green]√[/green] Web server (worker_daphne) is RUNNING on [deep_sky_blue4][link=http://{host}:{port}]http://{host}:{port}[/link][/deep_sky_blue4]')
                if orchestrator_proc and orchestrator_proc.get('statename') == 'RUNNING':
-                    print(f'    [green]√[/green] Background worker (worker_orchestrator) is RUNNING')
+                    print('    [green]√[/green] Background worker (worker_orchestrator) is RUNNING')
                print()
                print('[yellow]To stop the existing server, run:[/yellow]')
                print('    pkill -f "archivebox server"')
--- a/archivebox/cli/archivebox_status.py
+++ b/archivebox/cli/archivebox_status.py
@@ -128,13 +128,13 @@ def status(out_dir: Path=DATA_DIR) -> None:
        if not snapshot.downloaded_at:
            continue
        print(
-            '[grey53] ' +
            (
+                '[grey53] '
                f'   > {str(snapshot.downloaded_at)[:16]} '
                f'[{snapshot.num_outputs} {("X", "√")[snapshot.is_archived]} {printable_filesize(snapshot.archive_size)}] '
                f'"{snapshot.title}": {snapshot.url}'
-            )[:SHELL_CONFIG.TERM_WIDTH]
-            + '[grey53]',
+                '[/grey53]'
+            )[:SHELL_CONFIG.TERM_WIDTH],
        )
    print('[grey53]   ...')

--- a/archivebox/cli/archivebox_update.py
+++ b/archivebox/cli/archivebox_update.py
@@ -36,8 +36,6 @@ def update(filter_patterns: Iterable[str] = (),
    from archivebox.config.django import setup_django
    setup_django()

-    from archivebox.core.models import Snapshot
-    from django.utils import timezone
    from django.core.management import call_command

    # Run migrations first to ensure DB schema is up-to-date
--- a/archivebox/cli/archivebox_version.py
+++ b/archivebox/cli/archivebox_version.py
@@ -6,7 +6,7 @@ import sys
 import os
 import platform
 from pathlib import Path
-from typing import Iterable, Optional
+from typing import Iterable

 import rich_click as click

--- a/archivebox/cli/tests.py
+++ b/archivebox/cli/tests.py
@@ -3,13 +3,13 @@
 __package__ = 'archivebox.cli'


+import importlib
 import os
-import sys
 import shutil
+import sys
 import unittest
-from pathlib import Path
-
 from contextlib import contextmanager
+from pathlib import Path

 TEST_CONFIG = {
    'USE_COLOR': 'False',
@@ -30,18 +30,15 @@ TEST_CONFIG = {
 DATA_DIR = 'data.tests'
 os.environ.update(TEST_CONFIG)

-from ..main import init
-from archivebox.config.constants import (
-    SQL_INDEX_FILENAME,
-    JSON_INDEX_FILENAME,
-    HTML_INDEX_FILENAME,
-)
-
-from . import (
-    archivebox_init,
-    archivebox_add,
-    archivebox_remove,
-)
+init = importlib.import_module('archivebox.main').init
+constants = importlib.import_module('archivebox.config.constants')
+SQL_INDEX_FILENAME = constants.SQL_INDEX_FILENAME
+JSON_INDEX_FILENAME = constants.JSON_INDEX_FILENAME
+HTML_INDEX_FILENAME = constants.HTML_INDEX_FILENAME
+archivebox_init = importlib.import_module('archivebox.cli.archivebox_init')
+archivebox_add = importlib.import_module('archivebox.cli.archivebox_add')
+archivebox_remove = importlib.import_module('archivebox.cli.archivebox_remove')
+parse_json_main_index = importlib.import_module('archivebox.misc.legacy').parse_json_main_index

 HIDE_CLI_OUTPUT = True

@@ -68,6 +65,13 @@ stdout = sys.stdout
 stderr = sys.stderr


+def load_main_index(*, out_dir: str):
+    index_path = Path(out_dir) / JSON_INDEX_FILENAME
+    if not index_path.exists():
+        raise FileNotFoundError(index_path)
+    return list(parse_json_main_index(Path(out_dir)))
+
+
@contextmanager
 def output_hidden(show_failing=True):
    if not HIDE_CLI_OUTPUT:
--- a/archivebox/cli/tests_piping.py
+++ b/archivebox/cli/tests_piping.py
@@ -23,7 +23,6 @@ Each command should:
 __package__ = 'archivebox.cli'

 import os
-import sys
 import json
 import shutil
 import tempfile
@@ -101,7 +100,7 @@ class TestJSONLParsing(unittest.TestCase):

    def test_parse_jsonl_with_id(self):
        """JSONL with id field should be recognized."""
-        from archivebox.misc.jsonl import parse_line, TYPE_SNAPSHOT
+        from archivebox.misc.jsonl import parse_line

        line = '{"type": "Snapshot", "id": "abc123", "url": "https://example.com"}'
        result = parse_line(line)
@@ -576,8 +575,7 @@ class TestPipingWorkflowIntegration(unittest.TestCase):
        """
        from archivebox.core.models import Snapshot
        from archivebox.misc.jsonl import (
-            read_args_or_stdin, write_record,
-            TYPE_SNAPSHOT
+            read_args_or_stdin, TYPE_SNAPSHOT
        )
        from archivebox.base_models.models import get_or_create_system_user_pk

@@ -608,7 +606,7 @@ class TestPipingWorkflowIntegration(unittest.TestCase):
        Test: archivebox snapshot URL | archivebox extract
        Extract should accept JSONL output from snapshot command.
        """
-        from archivebox.core.models import Snapshot, ArchiveResult
+        from archivebox.core.models import Snapshot
        from archivebox.misc.jsonl import (
            read_args_or_stdin,
            TYPE_SNAPSHOT
@@ -783,7 +781,6 @@ class TestParserPluginWorkflows(unittest.TestCase):
        Test: archivebox crawl --plugin=parse_html_urls URL | archivebox snapshot | archivebox extract
        """
        from archivebox.hooks import collect_urls_from_plugins
-        from archivebox.misc.jsonl import TYPE_SNAPSHOT

        # Create mock output directory
        snapshot_dir = Path(self.test_dir) / 'archive' / 'html-parser-test'
@@ -938,7 +935,6 @@ class TestPassThroughBehavior(unittest.TestCase):

    def test_crawl_passes_through_other_types(self):
        """crawl create should pass through records with other types."""
-        from archivebox.misc.jsonl import TYPE_CRAWL

        # Input: a Tag record (not a Crawl or URL)
        tag_record = {'type': 'Tag', 'id': 'test-tag', 'name': 'example'}
@@ -946,8 +942,9 @@ class TestPassThroughBehavior(unittest.TestCase):

        # Mock stdin with both records
        stdin = StringIO(
-            json.dumps(tag_record) + '\n' +
-            json.dumps(url_record)
+            json.dumps(tag_record)
+            + '\n'
+            + json.dumps(url_record)
        )
        stdin.isatty = lambda: False

@@ -964,7 +961,7 @@ class TestPassThroughBehavior(unittest.TestCase):

    def test_snapshot_passes_through_crawl(self):
        """snapshot create should pass through Crawl records."""
-        from archivebox.misc.jsonl import TYPE_CRAWL, TYPE_SNAPSHOT
+        from archivebox.misc.jsonl import TYPE_CRAWL

        crawl_record = {
            'type': TYPE_CRAWL,
--- a/archivebox/config/init.py
+++ b/archivebox/config/init.py
@@ -8,10 +8,6 @@ and other modules that expect to import config values directly.
 __package__ = 'archivebox.config'
 __order__ = 200

-import shutil
-from pathlib import Path
-from typing import Dict, List, Optional
-
 from .paths import (
    PACKAGE_DIR,                                    # noqa
    DATA_DIR,                                       # noqa
@@ -31,6 +27,7 @@ def _get_config():
    from .common import ARCHIVING_CONFIG, STORAGE_CONFIG
    return ARCHIVING_CONFIG, STORAGE_CONFIG

+
 # Direct exports (evaluated at import time for backwards compat)
 # These are recalculated each time the module attribute is accessed

--- a/archivebox/config/collection.py
+++ b/archivebox/config/collection.py
@@ -9,7 +9,6 @@ from configparser import ConfigParser

 from benedict import benedict

-import archivebox

 from archivebox.config.constants import CONSTANTS

--- a/archivebox/config/configset.py
+++ b/archivebox/config/configset.py
@@ -11,10 +11,10 @@ __package__ = "archivebox.config"
 import os
 import json
 from pathlib import Path
-from typing import Any, Dict, Optional, List, Type, Tuple, TYPE_CHECKING, cast
+from typing import Any, Dict, Optional, Type, Tuple
 from configparser import ConfigParser

-from pydantic import Field, ConfigDict
+from pydantic import ConfigDict
 from pydantic_settings import BaseSettings, PydanticBaseSettingsSource


@@ -166,6 +166,23 @@ def get_config(

    if user is None and crawl and hasattr(crawl, "created_by"):
        user = crawl.created_by
+
+    if persona is None and crawl is not None:
+        try:
+            from archivebox.personas.models import Persona
+
+            persona_id = getattr(crawl, "persona_id", None)
+            if persona_id:
+                persona = Persona.objects.filter(id=persona_id).first()
+
+            if persona is None:
+                crawl_config = getattr(crawl, "config", None) or {}
+                default_persona_name = crawl_config.get("DEFAULT_PERSONA")
+                if default_persona_name:
+                    persona, _ = Persona.objects.get_or_create(name=str(default_persona_name).strip() or "Default")
+                    persona.ensure_dirs()
+        except Exception:
+            pass
    from archivebox.config.constants import CONSTANTS
    from archivebox.config.common import (
        SHELL_CONFIG,
--- a/archivebox/config/django.py
+++ b/archivebox/config/django.py
@@ -100,9 +100,11 @@ def setup_django(check_db=False, in_memory_db=False) -> None:
                return

        from django.conf import settings
+        from archivebox.core.settings_logging import ERROR_LOG as DEFAULT_ERROR_LOG

        # log startup message to the error log
-        with open(settings.ERROR_LOG, "a", encoding='utf-8') as f:
+        error_log = getattr(settings, 'ERROR_LOG', DEFAULT_ERROR_LOG)
+        with open(error_log, "a", encoding='utf-8') as f:
            command = ' '.join(sys.argv)
            ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S')
            f.write(f"\n> {command}; TS={ts} VERSION={CONSTANTS.VERSION} IN_DOCKER={SHELL_CONFIG.IN_DOCKER} IS_TTY={SHELL_CONFIG.IS_TTY}\n")
--- a/archivebox/config/permissions.py
+++ b/archivebox/config/permissions.py
@@ -46,7 +46,6 @@ if RUNNING_AS_UID == 0:
        # if we are running as root it's really hard to figure out what the correct archivebox user should be
        # as a last resort instead of setting DATA_DIR ownership to 0:0 (which breaks it for non-root users)
        # check if 911:911 archivebox user exists on host system, and use it instead of 0
-        import pwd
        if pwd.getpwuid(DEFAULT_PUID).pw_name == 'archivebox':
            FALLBACK_UID = DEFAULT_PUID
            FALLBACK_GID = DEFAULT_PGID
--- a/archivebox/config/views.py
+++ b/archivebox/config/views.py
@@ -3,7 +3,6 @@ __package__ = 'archivebox.config'
 import os
 import shutil
 import inspect
-from pathlib import Path
 from typing import Any, List, Dict, cast
 from benedict import benedict

@@ -30,11 +29,11 @@ KNOWN_BINARIES = [
 ]


-def obj_to_yaml(obj: Any, indent: int=0) -> str:
+def obj_to_yaml(obj: Any, indent: int = 0) -> str:
    indent_str = "  " * indent
    if indent == 0:
        indent_str = '\n'  # put extra newline between top-level entries
-    
+
    if isinstance(obj, dict):
        if not obj:
            return "{}"
@@ -42,7 +41,7 @@ def obj_to_yaml(obj: Any, indent: int=0) -> str:
        for key, value in obj.items():
            result += f"{indent_str}{key}:{obj_to_yaml(value, indent + 1)}\n"
        return result
-    
+
    elif isinstance(obj, list):
        if not obj:
            return "[]"
@@ -50,16 +49,16 @@ def obj_to_yaml(obj: Any, indent: int=0) -> str:
        for item in obj:
            result += f"{indent_str}- {obj_to_yaml(item, indent + 1).lstrip()}\n"
        return result.rstrip()
-    
+
    elif isinstance(obj, str):
        if "\n" in obj:
            return f" |\n{indent_str}  " + obj.replace("\n", f"\n{indent_str}  ")
        else:
            return f" {obj}"
-    
+
    elif isinstance(obj, (int, float, bool)):
        return f" {str(obj)}"
-    
+
    elif callable(obj):
        source = '\n'.join(
            '' if 'def ' in line else line
@@ -67,7 +66,7 @@ def obj_to_yaml(obj: Any, indent: int=0) -> str:
            if line.strip()
        ).split('lambda: ')[-1].rstrip(',')
        return f" {indent_str}  " + source.replace("\n", f"\n{indent_str}  ")
-    
+
    else:
        return f" {str(obj)}"

@@ -75,7 +74,7 @@ def obj_to_yaml(obj: Any, indent: int=0) -> str:
 def get_detected_binaries() -> Dict[str, Dict[str, Any]]:
    """Detect available binaries using shutil.which."""
    binaries = {}
-    
+
    for name in KNOWN_BINARIES:
        path = shutil.which(name)
        if path:
@@ -85,7 +84,7 @@ def get_detected_binaries() -> Dict[str, Dict[str, Any]]:
                'version': None,  # Could add version detection later
                'is_available': True,
            }
-    
+
    return binaries


@@ -144,19 +143,19 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:

    # Get binaries from database (previously detected/installed)
    db_binaries = {b.name: b for b in Binary.objects.all()}
-    
-    # Get currently detectable binaries  
+
+    # Get currently detectable binaries
    detected = get_detected_binaries()
-    
+
    # Merge and display
    all_binary_names = sorted(set(list(db_binaries.keys()) + list(detected.keys())))
-    
+
    for name in all_binary_names:
        db_binary = db_binaries.get(name)
        detected_binary = detected.get(name)
-        
+
        rows['Binary Name'].append(ItemLink(name, key=name))
-        
+
        if db_binary:
            rows['Found Version'].append(f'✅ {db_binary.version}' if db_binary.version else '✅ found')
            rows['Provided By'].append(db_binary.binprovider or 'PATH')
@@ -175,6 +174,7 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
        table=rows,
    )

+
@render_with_item_view
 def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:

@@ -203,7 +203,7 @@ def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
        )
    except Binary.DoesNotExist:
        pass
-    
+
    # Try to detect from PATH
    path = shutil.which(key)
    if path:
@@ -224,7 +224,7 @@ def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
                },
            ],
        )
-    
+
    return ItemContext(
        slug=key,
        title=key,
@@ -286,6 +286,7 @@ def plugins_list_view(request: HttpRequest, **kwargs) -> TableContext:
        table=rows,
    )

+
@render_with_item_view
 def plugin_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
    import json
@@ -314,7 +315,10 @@ def plugin_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
    # Add config.json data if available
    if plugin.get('config'):
        config_json = json.dumps(plugin['config'], indent=2)
-        fields["config.json"] = mark_safe(f'<pre style="max-height: 600px; overflow-y: auto; background: #f5f5f5; padding: 10px; border-radius: 4px;"><code>{config_json}</code></pre>')
+        fields["config.json"] = mark_safe(
+            '<pre style="max-height: 600px; overflow-y: auto; background: #f5f5f5; '
+            f'padding: 10px; border-radius: 4px;"><code>{config_json}</code></pre>'
+        )

        # Also extract and display individual config properties for easier viewing
        if 'properties' in plugin['config']:
@@ -322,7 +326,6 @@ def plugin_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
            properties_summary = []
            for prop_name, prop_info in config_properties.items():
                prop_type = prop_info.get('type', 'unknown')
-                prop_default = prop_info.get('default', 'N/A')
                prop_desc = prop_info.get('description', '')
                properties_summary.append(f"• {prop_name} ({prop_type}): {prop_desc}")

@@ -365,7 +368,7 @@ def worker_list_view(request: HttpRequest, **kwargs) -> TableContext:
            title="No running worker processes",
            table=rows,
        )
-        
+
    all_config_entries = cast(List[Dict[str, Any]], supervisor.getAllConfigInfo() or [])
    all_config = {config["name"]: benedict(config) for config in all_config_entries}

@@ -514,7 +517,7 @@ def log_list_view(request: HttpRequest, **kwargs) -> TableContext:
@render_with_item_view
 def log_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
    assert request.user.is_superuser, "Must be a superuser to view configuration settings."
-    
+
    log_file = [logfile for logfile in CONSTANTS.LOGS_DIR.glob('*.log') if key in logfile.name][0]

    log_text = log_file.read_text()
--- a/archivebox/core/admin_site.py
+++ b/archivebox/core/admin_site.py
@@ -1,8 +1,8 @@
 __package__ = 'archivebox.core'

 from django.contrib import admin
+from admin_data_views.admin import get_app_list, admin_data_index_view, get_admin_data_urls, get_urls

-import archivebox

 class ArchiveBoxAdmin(admin.AdminSite):
    site_header = 'ArchiveBox'
@@ -20,7 +20,6 @@ archivebox_admin = ArchiveBoxAdmin()
 # patch admin with methods to add data views (implemented by admin_data_views package)
 # https://github.com/MrThearMan/django-admin-data-views
 # https://mrthearman.github.io/django-admin-data-views/setup/
-from admin_data_views.admin import get_app_list, admin_data_index_view, get_admin_data_urls, get_urls
 archivebox_admin.get_app_list = get_app_list.__get__(archivebox_admin, ArchiveBoxAdmin)
 archivebox_admin.admin_data_index_view = admin_data_index_view.__get__(archivebox_admin, ArchiveBoxAdmin)       # type: ignore
 archivebox_admin.get_admin_data_urls = get_admin_data_urls.__get__(archivebox_admin, ArchiveBoxAdmin)           # type: ignore
--- a/archivebox/core/admin_snapshots.py
+++ b/archivebox/core/admin_snapshots.py
@@ -26,7 +26,7 @@ from archivebox.base_models.admin import BaseModelAdmin, ConfigEditorMixin
 from archivebox.workers.tasks import bg_archive_snapshots, bg_add

 from archivebox.core.models import Tag, Snapshot, ArchiveResult
-from archivebox.core.admin_archiveresults import ArchiveResultInline, render_archiveresults_list
+from archivebox.core.admin_archiveresults import render_archiveresults_list
 from archivebox.core.widgets import TagEditorWidget, InlineTagEditorWidget


@@ -712,8 +712,6 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
        description="🔁 Redo Failed"
    )
    def update_snapshots(self, request, queryset):
-        count = queryset.count()
-
        queued = bg_archive_snapshots(queryset, kwargs={"overwrite": False, "out_dir": DATA_DIR})

        messages.success(
@@ -741,8 +739,6 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
        description="🔄 Redo"
    )
    def overwrite_snapshots(self, request, queryset):
-        count = queryset.count()
-
        queued = bg_archive_snapshots(queryset, kwargs={"overwrite": True, "out_dir": DATA_DIR})

        messages.success(
--- a/archivebox/core/apps.py
+++ b/archivebox/core/apps.py
@@ -60,7 +60,7 @@ class CoreConfig(AppConfig):
            from archivebox.workers.orchestrator import Orchestrator

            Process.cleanup_stale_running()
-            machine = Machine.current()
+            Machine.current()

            if not Orchestrator.is_running():
                Orchestrator(exit_on_idle=False).start()
--- a/archivebox/core/asgi.py
+++ b/archivebox/core/asgi.py
@@ -8,11 +8,10 @@ https://docs.djangoproject.com/en/stable/howto/deployment/asgi/
 """

 from archivebox.config.django import setup_django
+from django.core.asgi import get_asgi_application

 setup_django(in_memory_db=False, check_db=True)

-from django.core.asgi import get_asgi_application
-
 # Standard Django ASGI application (no websockets/channels needed)
 application = get_asgi_application()

--- a/archivebox/core/forms.py
+++ b/archivebox/core/forms.py
@@ -6,6 +6,7 @@ from archivebox.misc.util import URL_REGEX
 from taggit.utils import edit_string_for_tags, parse_tags
 from archivebox.base_models.admin import KeyValueWidget
 from archivebox.crawls.schedule_utils import validate_schedule
+from archivebox.hooks import get_plugins

 DEPTH_CHOICES = (
    ('0', 'depth = 0 (archive just these URLs)'),
@@ -15,7 +16,6 @@ DEPTH_CHOICES = (
    ('4', 'depth = 4 (+ URLs four hops away)'),
 )

-from archivebox.hooks import get_plugins

 def get_plugin_choices():
    """Get available extractor plugins from discovered hooks."""
@@ -210,15 +210,18 @@ class AddLinkForm(forms.Form):

        return schedule

+
 class TagWidgetMixin:
    def format_value(self, value):
        if value is not None and not isinstance(value, str):
            value = edit_string_for_tags(value)
        return super().format_value(value)

+
 class TagWidget(TagWidgetMixin, forms.TextInput):
    pass

+
 class TagField(forms.CharField):
    widget = TagWidget

--- a/archivebox/core/middleware.py
+++ b/archivebox/core/middleware.py
@@ -17,7 +17,6 @@ from archivebox.config import VERSION
 from archivebox.config.version import get_COMMIT_HASH
 from archivebox.core.host_utils import (
    build_admin_url,
-    build_api_url,
    build_web_url,
    get_api_host,
    get_admin_host,
--- a/archivebox/core/migrations/0006_auto_20201012_1520.py
+++ b/archivebox/core/migrations/0006_auto_20201012_1520.py
@@ -7,10 +7,8 @@ def forwards_func(apps, schema_editor):
    SnapshotModel = apps.get_model("core", "Snapshot")
    TagModel = apps.get_model("core", "Tag")

-    db_alias = schema_editor.connection.alias
    snapshots = SnapshotModel.objects.all()
    for snapshot in snapshots:
-        tags = snapshot.tags
        tag_set = (
            set(tag.strip() for tag in (snapshot.tags_old or '').split(','))
        )
@@ -23,9 +21,7 @@ def forwards_func(apps, schema_editor):

 def reverse_func(apps, schema_editor):
    SnapshotModel = apps.get_model("core", "Snapshot")
-    TagModel = apps.get_model("core", "Tag")

-    db_alias = schema_editor.connection.alias
    snapshots = SnapshotModel.objects.all()
    for snapshot in snapshots:
        tags = snapshot.tags.values_list("name", flat=True)
--- a/archivebox/core/migrations/0007_archiveresult.py
+++ b/archivebox/core/migrations/0007_archiveresult.py
@@ -43,7 +43,7 @@ def forwards_func(apps, schema_editor):
        try:
            with open(out_dir / "index.json", "r") as f:
                fs_index = json.load(f)
-        except Exception as e:
+        except Exception:
            continue

        history = fs_index["history"]
--- a/archivebox/core/migrations/0023_upgrade_to_0_9_0.py
+++ b/archivebox/core/migrations/0023_upgrade_to_0_9_0.py
@@ -234,7 +234,6 @@ def upgrade_core_tables(apps, schema_editor):
        tag_has_data = cursor.fetchone()[0] > 0

        if tag_has_data:
-            tag_cols = get_table_columns('core_tag')
            cursor.execute("PRAGMA table_info(core_tag)")
            tag_id_type = None
            for row in cursor.fetchall():
--- a/archivebox/core/migrations/0024_assign_default_crawl.py
+++ b/archivebox/core/migrations/0024_assign_default_crawl.py
@@ -2,7 +2,6 @@
 # Creates a default crawl for v0.7.2 migrated snapshots and makes crawl_id NOT NULL

 from django.db import migrations, models
-import uuid


 def create_default_crawl_and_assign_snapshots(apps, schema_editor):
--- a/archivebox/core/migrations/0027_copy_archiveresult_to_process.py
+++ b/archivebox/core/migrations/0027_copy_archiveresult_to_process.py
@@ -347,7 +347,7 @@ def copy_archiveresult_data_to_process(apps, schema_editor):
            migrated_count += 1

            if i == 0:
-                print(f'DEBUG 0027: Linked ArchiveResult to Process')
+                print('DEBUG 0027: Linked ArchiveResult to Process')

        except Exception as e:
            print(f'✗ Error migrating ArchiveResult {ar_id}: {e}')
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -1,6 +1,6 @@
 __package__ = 'archivebox.core'

-from typing import Optional, Dict, Iterable, Any, List, TYPE_CHECKING
+from typing import Optional, Dict, Iterable, Any, List
 from archivebox.uuid_compat import uuid7
 from datetime import datetime, timedelta
 from django_stubs_ext.db.models import TypedModelMeta
@@ -12,19 +12,18 @@ from pathlib import Path
 from statemachine import State, registry

 from django.db import models
-from django.db.models import QuerySet, Value, Case, When, IntegerField
+from django.db.models import QuerySet
 from django.utils.functional import cached_property
 from django.utils.text import slugify
 from django.utils import timezone
 from django.core.cache import cache
-from django.urls import reverse, reverse_lazy
+from django.urls import reverse_lazy
 from django.contrib import admin
 from django.conf import settings

 from archivebox.config import CONSTANTS
 from archivebox.misc.system import get_dir_size, atomic_write
-from archivebox.misc.util import parse_date, base_url, domain as url_domain, to_json, ts_to_date_str, urlencode, htmlencode, urldecode
-from archivebox.misc.hashing import get_dir_info
+from archivebox.misc.util import parse_date, domain as url_domain, to_json, ts_to_date_str, urlencode, htmlencode, urldecode
 from archivebox.hooks import (
    get_plugins, get_plugin_name, get_plugin_icon,
 )
@@ -186,7 +185,7 @@ class SnapshotQuerySet(models.QuerySet):
        for pattern in patterns:
            try:
                qsearch |= query_search_index(pattern)
-            except:
+            except BaseException:
                raise SystemExit(2)
        return self.all() & qsearch

@@ -344,8 +343,6 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
    @property
    def process_set(self):
        """Get all Process objects related to this snapshot's ArchiveResults."""
-        import json
-        import json
        from archivebox.machine.models import Process
        return Process.objects.filter(archiveresult__snapshot_id=self.id)

@@ -458,13 +455,13 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea

        if not old_dir.exists() or old_dir == new_dir:
            # No migration needed
-            print(f"[DEBUG _fs_migrate] Returning None (early return)")
+            print("[DEBUG _fs_migrate] Returning None (early return)")
            return None

        if new_dir.exists():
            # New directory already exists (files already copied), but we still need cleanup
            # Return cleanup info so old directory can be cleaned up
-            print(f"[DEBUG _fs_migrate] Returning cleanup info (new_dir exists)")
+            print("[DEBUG _fs_migrate] Returning cleanup info (new_dir exists)")
            return (old_dir, new_dir)

        new_dir.mkdir(parents=True, exist_ok=True)
@@ -499,7 +496,6 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea

        # Schedule cleanup AFTER transaction commits successfully
        # This ensures DB changes are committed before we delete old files
-        from django.db import transaction
        transaction.on_commit(lambda: self._cleanup_old_migration_dir(old_dir, new_dir))

        # Return cleanup info for manual cleanup if needed (when called directly)
@@ -594,8 +590,8 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
            domain = self.extract_domain_from_url(self.url)

            return (
-                CONSTANTS.DATA_DIR / 'users' / username / 'snapshots' /
-                date_str / domain / str(self.id)
+                CONSTANTS.DATA_DIR / 'users' / username / 'snapshots'
+                / date_str / domain / str(self.id)
            )
        else:
            # Unknown version - use current
@@ -670,7 +666,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
                print(f"[DEBUG load_from_directory] Found via fuzzy match: {snapshot.timestamp}")
                return snapshot
            elif candidates.count() > 1:
-                print(f"[DEBUG load_from_directory] Multiple fuzzy matches, using first")
+                print("[DEBUG load_from_directory] Multiple fuzzy matches, using first")
                return candidates.first()
            print(f"[DEBUG load_from_directory] NOT FOUND (fuzzy): {url} @ {timestamp}")
            return None
@@ -767,7 +763,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
                ts_int = int(float(ts))
                # 1995-01-01 to 2035-12-31
                return 788918400 <= ts_int <= 2082758400
-            except:
+            except (TypeError, ValueError, OverflowError):
                return False

        index_valid = is_valid_timestamp(index_timestamp) if index_timestamp else False
@@ -850,7 +846,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
            try:
                with open(json_path) as f:
                    index_data = json.load(f)
-            except:
+            except (OSError, TypeError, ValueError, json.JSONDecodeError):
                pass

        # Merge title
@@ -929,7 +925,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
        if result_data.get('start_ts'):
            try:
                start_ts = parser.parse(result_data['start_ts'])
-            except:
+            except (TypeError, ValueError, OverflowError):
                pass

        if (plugin, start_ts) in existing:
@@ -940,7 +936,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
            if result_data.get('end_ts'):
                try:
                    end_ts = parser.parse(result_data['end_ts'])
-                except:
+                except (TypeError, ValueError, OverflowError):
                    pass

            # Support both 'output' (legacy) and 'output_str' (new JSONL) field names
@@ -957,7 +953,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
                start_ts=start_ts,
                end_ts=end_ts,
            )
-        except:
+        except Exception:
            pass

    def write_index_json(self):
@@ -1176,7 +1172,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea

        try:
            shutil.move(str(snapshot_dir), str(dest))
-        except:
+        except Exception:
            pass

    @classmethod
@@ -1208,7 +1204,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
                try:
                    cls._merge_snapshots(snapshots)
                    merged += 1
-                except:
+                except Exception:
                    pass

        return merged
@@ -1244,7 +1240,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea

                try:
                    shutil.rmtree(dup_dir)
-                except:
+                except Exception:
                    pass

            # Merge tags
@@ -1615,7 +1611,6 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
        """
        import re
        from django.utils import timezone
-        from archivebox.misc.util import parse_date
        from archivebox.base_models.models import get_or_create_system_user_pk
        from archivebox.config.common import GENERAL_CONFIG

@@ -2125,7 +2120,6 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea

    def to_dict(self, extended: bool = False) -> Dict[str, Any]:
        """Convert Snapshot to a dictionary (replacement for Link._asdict())"""
-        from archivebox.misc.util import ts_to_date_str
        from archivebox.core.host_utils import build_snapshot_url

        result = {
@@ -2283,9 +2277,9 @@ class SnapshotMachine(BaseStateMachine):

    # Tick Event (polled by workers)
    tick = (
-        queued.to.itself(unless='can_start') |
-        queued.to(started, cond='can_start') |
-        started.to(sealed, cond='is_finished')
+        queued.to.itself(unless='can_start')
+        | queued.to(started, cond='can_start')
+        | started.to(sealed, cond='is_finished')
    )

    # Manual event (can also be triggered by last ArchiveResult finishing)
@@ -2783,7 +2777,7 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
        Updates status/output fields, queues discovered URLs, and triggers indexing.
        """
        from django.utils import timezone
-        from archivebox.hooks import BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR, run_hook, is_background_hook
+        from archivebox.hooks import BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR, run_hook
        from archivebox.config.configset import get_config

        # Get merged config with proper context
@@ -3190,16 +3184,16 @@ class ArchiveResultMachine(BaseStateMachine):
    #       queued → skipped (if exceeded max attempts)
    #       started → backoff → started (retry)
    tick = (
-        queued.to(skipped, cond='is_exceeded_max_attempts') |  # Check skip first
-        queued.to.itself(unless='can_start') |
-        queued.to(started, cond='can_start') |
-        started.to(succeeded, cond='is_succeeded') |
-        started.to(failed, cond='is_failed') |
-        started.to(skipped, cond='is_skipped') |
-        started.to(backoff, cond='is_backoff') |
-        backoff.to(skipped, cond='is_exceeded_max_attempts') |  # Check skip from backoff too
-        backoff.to.itself(unless='can_start') |
-        backoff.to(started, cond='can_start')
+        queued.to(skipped, cond='is_exceeded_max_attempts')  # Check skip first
+        | queued.to.itself(unless='can_start')
+        | queued.to(started, cond='can_start')
+        | started.to(succeeded, cond='is_succeeded')
+        | started.to(failed, cond='is_failed')
+        | started.to(skipped, cond='is_skipped')
+        | started.to(backoff, cond='is_backoff')
+        | backoff.to(skipped, cond='is_exceeded_max_attempts')  # Check skip from backoff too
+        | backoff.to.itself(unless='can_start')
+        | backoff.to(started, cond='can_start')
        # Removed redundant transitions: backoff.to(succeeded/failed/skipped)
        # Reason: backoff should always retry→started, then started→final states
    )
@@ -3241,8 +3235,8 @@ class ArchiveResultMachine(BaseStateMachine):
        """Check if we should backoff and retry later."""
        # Backoff if status is still started (plugin didn't complete) and output_str is empty
        return (
-            self.archiveresult.status == ArchiveResult.StatusChoices.STARTED and
-            not self.archiveresult.output_str
+            self.archiveresult.status == ArchiveResult.StatusChoices.STARTED
+            and not self.archiveresult.output_str
        )

    def is_finished(self) -> bool:
@@ -3286,7 +3280,6 @@ class ArchiveResultMachine(BaseStateMachine):

    @started.enter
    def enter_started(self):
-        from archivebox.machine.models import NetworkInterface

        # Update Process with network interface
        if self.archiveresult.process_id:
--- a/archivebox/core/settings.py
+++ b/archivebox/core/settings.py
@@ -6,6 +6,7 @@ import inspect

 from pathlib import Path

+from django.conf.locale.en import formats as en_formats  # type: ignore
 from django.utils.crypto import get_random_string

 import archivebox
@@ -13,6 +14,7 @@ import archivebox
 from archivebox.config import DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS  # noqa
 from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG, STORAGE_CONFIG  # noqa
 from archivebox.core.host_utils import normalize_base_url, get_admin_base_url, get_api_base_url
+from .settings_logging import SETTINGS_LOGGING


 IS_MIGRATING = "makemigrations" in sys.argv[:3] or "migrate" in sys.argv[:3]
@@ -54,8 +56,8 @@ INSTALLED_APPS = [
    "django.contrib.staticfiles",
    "django.contrib.admin",
    # 3rd-party apps from PyPI
-    "signal_webhooks",  # handles REST API outbound webhooks                              https://github.com/MrThearMan/django-signal-webhooks
-    "django_object_actions",  # provides easy Django Admin action buttons on change views       https://github.com/crccheck/django-object-actions
+    "signal_webhooks",  # handles REST API outbound webhooks
+    "django_object_actions",  # provides easy Django Admin action buttons on change views
    # Our ArchiveBox-provided apps (use fully qualified names)
    # NOTE: Order matters! Apps with migrations that depend on other apps must come AFTER their dependencies
    # "archivebox.config",  # ArchiveBox config settings (no models, not a real Django app)
@@ -117,7 +119,6 @@ try:

        try:
            # Try to import django-auth-ldap (will fail if not installed)
-            import django_auth_ldap
            from django_auth_ldap.config import LDAPSearch
            import ldap

@@ -414,9 +415,6 @@ DATETIME_FORMAT = "Y-m-d h:i:s A"
 SHORT_DATETIME_FORMAT = "Y-m-d h:i:s A"
 TIME_ZONE = CONSTANTS.TIMEZONE  # django convention is TIME_ZONE, archivebox config uses TIMEZONE, they are equivalent

-
-from django.conf.locale.en import formats as en_formats  # type: ignore
-
 en_formats.DATETIME_FORMAT = DATETIME_FORMAT  # monkey patch en_format default with our preferred format
 en_formats.SHORT_DATETIME_FORMAT = SHORT_DATETIME_FORMAT

@@ -425,9 +423,6 @@ en_formats.SHORT_DATETIME_FORMAT = SHORT_DATETIME_FORMAT
 ### Logging Settings
 ################################################################################

-
-from .settings_logging import SETTINGS_LOGGING, LOGS_DIR, ERROR_LOG
-
 LOGGING = SETTINGS_LOGGING


--- a/archivebox/core/settings_logging.py
+++ b/archivebox/core/settings_logging.py
@@ -5,8 +5,6 @@ import os
 import tempfile
 import logging

-import pydantic
-import django.template

 from archivebox.config import CONSTANTS

--- a/archivebox/core/tests.py
+++ b/archivebox/core/tests.py
@@ -1,5 +1,6 @@
 """Tests for the core views, especially AddView."""

+import importlib
 import os
 import django
 from unittest.mock import patch
@@ -8,13 +9,14 @@ from unittest.mock import patch
 os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'archivebox.settings')
 django.setup()

-from django.test import TestCase, Client
-from django.contrib.auth.models import User
-from django.urls import reverse
-
-from archivebox.crawls.models import Crawl, CrawlSchedule
-from archivebox.core.models import Tag
-from archivebox.config.common import SERVER_CONFIG
+TestCase = importlib.import_module('django.test').TestCase
+Client = importlib.import_module('django.test').Client
+User = importlib.import_module('django.contrib.auth.models').User
+reverse = importlib.import_module('django.urls').reverse
+Crawl = importlib.import_module('archivebox.crawls.models').Crawl
+CrawlSchedule = importlib.import_module('archivebox.crawls.models').CrawlSchedule
+Tag = importlib.import_module('archivebox.core.models').Tag
+SERVER_CONFIG = importlib.import_module('archivebox.config.common').SERVER_CONFIG


 class AddViewTests(TestCase):
@@ -252,7 +254,7 @@ class AddViewTests(TestCase):
    def test_add_staff_admin_custom_config_is_allowed(self):
        """Admin users can override crawl config."""
        self.client.logout()
-        admin_user = User.objects.create_user(
+        User.objects.create_user(
            username='adminuser',
            password='adminpass123',
            email='admin@example.com',
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -10,7 +10,7 @@ from pathlib import Path
 from urllib.parse import urlparse

 from django.shortcuts import render, redirect
-from django.http import HttpRequest, HttpResponse, Http404, HttpResponseForbidden
+from django.http import JsonResponse, HttpRequest, HttpResponse, Http404, HttpResponseForbidden
 from django.utils.html import format_html, mark_safe
 from django.views import View
 from django.views.generic.list import ListView
@@ -24,9 +24,8 @@ from django.utils.decorators import method_decorator
 from admin_data_views.typing import TableContext, ItemContext
 from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink

-import archivebox
 from archivebox.config import CONSTANTS, CONSTANTS_CONFIG, DATA_DIR, VERSION
-from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG, ARCHIVING_CONFIG
+from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG
 from archivebox.config.configset import get_flat_config, get_config, get_all_configs
 from archivebox.misc.util import base_url, htmlencode, ts_to_date_str, urldecode
 from archivebox.misc.serve_static import serve_static_with_byterange_support
@@ -35,6 +34,9 @@ from archivebox.search import query_search_index

 from archivebox.core.models import Snapshot
 from archivebox.core.host_utils import build_snapshot_url
+from archivebox.core.forms import AddLinkForm
+from archivebox.crawls.models import Crawl
+from archivebox.hooks import get_enabled_plugins, get_plugin_name


 def _files_index_target(snapshot: Snapshot, archivefile: str | None) -> str:
@@ -49,12 +51,6 @@ def _files_index_target(snapshot: Snapshot, archivefile: str | None) -> str:
    return target


-from archivebox.core.forms import AddLinkForm
-from archivebox.crawls.models import Crawl
-from archivebox.hooks import get_enabled_plugins, get_plugin_name
-
-
-
 class HomepageView(View):
    def get(self, request):
        if request.user.is_authenticated:
@@ -1066,10 +1062,6 @@ class HealthCheckView(View):
            status=200
        )

-
-import json
-from django.http import JsonResponse
-
 def live_progress_view(request):
    """Simple JSON endpoint for live progress status - used by admin progress monitor."""
    try:
@@ -1077,7 +1069,6 @@ def live_progress_view(request):
        from archivebox.crawls.models import Crawl
        from archivebox.core.models import Snapshot, ArchiveResult
        from archivebox.machine.models import Process, Machine
-        from django.db.models import Case, When, Value, IntegerField

        # Get orchestrator status
        orchestrator_running = Orchestrator.is_running()
@@ -1133,7 +1124,6 @@ def live_progress_view(request):
                    })

        # Build hierarchical active crawls with nested snapshots and archive results
-        from django.db.models import Prefetch

        running_workers = Process.objects.filter(
            machine=machine,
@@ -1387,7 +1377,7 @@ def find_config_default(key: str) -> str:
    return default_val

 def find_config_type(key: str) -> str:
-    from typing import get_type_hints, ClassVar
+    from typing import ClassVar
    CONFIGS = get_all_configs()

    for config in CONFIGS.values():
@@ -1430,7 +1420,6 @@ def key_is_safe(key: str) -> bool:

 def find_config_source(key: str, merged_config: dict) -> str:
    """Determine where a config value comes from."""
-    import os
    from archivebox.machine.models import Machine

    # Check if it's from archivebox.machine.config
@@ -1464,12 +1453,11 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
    # Get merged config that includes Machine.config overrides
    try:
        from archivebox.machine.models import Machine
-        machine = Machine.current()
+        Machine.current()
        merged_config = get_config()
-    except Exception as e:
+    except Exception:
        # Fallback if Machine model not available
        merged_config = get_config()
-        machine = None

    rows = {
        "Section": [],
@@ -1525,7 +1513,6 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:

@render_with_item_view
 def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
-    import os
    from archivebox.machine.models import Machine
    from archivebox.config.configset import BaseConfigSet

--- a/archivebox/core/widgets.py
+++ b/archivebox/core/widgets.py
@@ -343,20 +343,17 @@ class InlineTagEditorWidget(TagEditorWidget):
        snapshot_id = snapshot_id or self.snapshot_id

        # Parse value to get list of tag dicts with id and name
-        tags = []
        tag_data = []
        if value:
            if hasattr(value, 'all'):  # QuerySet
                for tag in value.all():
                    tag_data.append({'id': tag.pk, 'name': tag.name})
                tag_data.sort(key=lambda x: x['name'].lower())
-                tags = [t['name'] for t in tag_data]
            elif isinstance(value, (list, tuple)):
                if value and hasattr(value[0], 'name'):
                    for tag in value:
                        tag_data.append({'id': tag.pk, 'name': tag.name})
                    tag_data.sort(key=lambda x: x['name'].lower())
-                    tags = [t['name'] for t in tag_data]

        widget_id_raw = f"inline_tags_{snapshot_id}" if snapshot_id else (attrs.get('id', name) if attrs else name)
        widget_id = self._normalize_id(widget_id_raw)
--- a/archivebox/core/wsgi.py
+++ b/archivebox/core/wsgi.py
@@ -9,9 +9,8 @@ https://docs.djangoproject.com/en/2.1/howto/deployment/wsgi/

 import archivebox                                       # noqa
 from archivebox.config.django import setup_django
+from django.core.wsgi import get_wsgi_application

 setup_django(in_memory_db=False, check_db=True)

-from django.core.wsgi import get_wsgi_application
-
 application = get_wsgi_application()
--- a/archivebox/crawls/admin.py
+++ b/archivebox/crawls/admin.py
@@ -1,17 +1,11 @@
 __package__ = 'archivebox.crawls'

-import json
-from pathlib import Path

 from django import forms
 from django.utils.html import format_html, format_html_join, mark_safe
 from django.contrib import admin, messages
-from django.urls import path
-from django.http import JsonResponse
-from django.views.decorators.http import require_POST
 from django.db.models import Count, Q

-from archivebox import DATA_DIR

 from django_object_actions import action

--- a/archivebox/crawls/models.py
+++ b/archivebox/crawls/models.py
@@ -1,12 +1,11 @@
 __package__ = 'archivebox.crawls'

-from typing import TYPE_CHECKING, Iterable
+from typing import TYPE_CHECKING
 from datetime import timedelta
 from archivebox.uuid_compat import uuid7
 from pathlib import Path

 from django.db import models
-from django.db.models import QuerySet
 from django.core.validators import MaxValueValidator, MinValueValidator
 from django.conf import settings
 from django.urls import reverse_lazy
@@ -15,13 +14,12 @@ from django_stubs_ext.db.models import TypedModelMeta
 from statemachine import State, registry
 from rich import print

-from archivebox.config import CONSTANTS
 from archivebox.base_models.models import ModelWithUUID, ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHealthStats, get_or_create_system_user_pk
 from archivebox.workers.models import ModelWithStateMachine, BaseStateMachine
 from archivebox.crawls.schedule_utils import next_run_for_schedule, validate_schedule

 if TYPE_CHECKING:
-    from archivebox.core.models import Snapshot, ArchiveResult
+    from archivebox.core.models import Snapshot


 class CrawlSchedule(ModelWithUUID, ModelWithNotes):
@@ -111,7 +109,6 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
    label = models.CharField(max_length=64, blank=True, null=False, default='')
    notes = models.TextField(blank=True, null=False, default='')
    schedule = models.ForeignKey(CrawlSchedule, on_delete=models.SET_NULL, null=True, blank=True, editable=True)
-    output_dir = models.CharField(max_length=512, null=False, blank=True, default='')

    status = ModelWithStateMachine.StatusField(choices=ModelWithStateMachine.StatusChoices, default=ModelWithStateMachine.StatusChoices.QUEUED)
    retry_at = ModelWithStateMachine.RetryAtField(default=timezone.now)
@@ -252,6 +249,22 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
            return system_url
        return None

+    def resolve_persona(self):
+        from archivebox.personas.models import Persona
+
+        if self.persona_id:
+            persona = Persona.objects.filter(id=self.persona_id).first()
+            if persona is None:
+                raise Persona.DoesNotExist(f'Crawl {self.id} references missing Persona {self.persona_id}')
+            return persona
+
+        default_persona_name = str((self.config or {}).get('DEFAULT_PERSONA') or '').strip()
+        if default_persona_name:
+            persona, _ = Persona.objects.get_or_create(name=default_persona_name or 'Default')
+            return persona
+
+        return None
+

    def add_url(self, entry: dict) -> bool:
        """
@@ -391,7 +404,10 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
            f.flush()

        def get_runtime_config():
-            return get_config(crawl=self)
+            config = get_config(crawl=self)
+            if persona_runtime_overrides:
+                config.update(persona_runtime_overrides)
+            return config

        system_task = self.get_system_task()
        if system_task == 'archivebox://update':
@@ -402,6 +418,15 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith

        machine = Machine.current()
        declared_binary_names: set[str] = set()
+        persona_runtime_overrides: dict[str, str] = {}
+        persona = self.resolve_persona()
+        if persona:
+            base_runtime_config = get_config(crawl=self, persona=persona)
+            chrome_binary = str(base_runtime_config.get('CHROME_BINARY') or '')
+            persona_runtime_overrides = persona.prepare_runtime_for_crawl(
+                crawl=self,
+                chrome_binary=chrome_binary,
+            )

        def install_declared_binaries(binary_names: set[str]) -> None:
            if not binary_names:
@@ -563,7 +588,7 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith

        # Discover and run on_Crawl hooks
        with open(debug_log, 'a') as f:
-            f.write(f'Discovering Crawl hooks...\n')
+            f.write('Discovering Crawl hooks...\n')
            f.flush()
        hooks = discover_hooks('Crawl', config=get_runtime_config())
        with open(debug_log, 'a') as f:
@@ -588,17 +613,17 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
                print(f'[yellow]⚠️  Removed {leaked_count} leaked snapshot(s) created during system crawl {system_task}[/yellow]')
            with open(debug_log, 'a') as f:
                f.write(f'Skipping snapshot creation for system crawl: {system_task}\n')
-                f.write(f'=== Crawl.run() complete ===\n\n')
+                f.write('=== Crawl.run() complete ===\n\n')
                f.flush()
            return None

        with open(debug_log, 'a') as f:
-            f.write(f'Creating snapshots from URLs...\n')
+            f.write('Creating snapshots from URLs...\n')
            f.flush()
        created_snapshots = self.create_snapshots_from_urls()
        with open(debug_log, 'a') as f:
            f.write(f'Created {len(created_snapshots)} snapshots\n')
-            f.write(f'=== Crawl.run() complete ===\n\n')
+            f.write('=== Crawl.run() complete ===\n\n')
            f.flush()

        # Return first snapshot for this crawl (newly created or existing)
@@ -647,6 +672,10 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
            for pid_file in self.output_dir.glob('**/*.pid'):
                pid_file.unlink(missing_ok=True)

+        persona = self.resolve_persona()
+        if persona:
+            persona.cleanup_runtime_for_crawl(self)
+
        # Run on_CrawlEnd hooks
        from archivebox.config.configset import get_config
        config = get_config(crawl=self)
@@ -715,9 +744,9 @@ class CrawlMachine(BaseStateMachine):

    # Tick Event (polled by workers)
    tick = (
-        queued.to.itself(unless='can_start') |
-        queued.to(started, cond='can_start') |
-        started.to(sealed, cond='is_finished')
+        queued.to.itself(unless='can_start')
+        | queued.to(started, cond='can_start')
+        | started.to(sealed, cond='is_finished')
    )

    # Manual event (triggered by last Snapshot sealing)
@@ -740,7 +769,6 @@ class CrawlMachine(BaseStateMachine):
    @started.enter
    def enter_started(self):
        import sys
-        from archivebox.core.models import Snapshot

        print(f'[cyan]🔄 CrawlMachine.enter_started() - creating snapshots for {self.crawl.id}[/cyan]', file=sys.stderr)

@@ -758,7 +786,7 @@ class CrawlMachine(BaseStateMachine):
                )
            else:
                # No snapshots (system crawl like archivebox://install)
-                print(f'[cyan]🔄 No snapshots created, sealing crawl immediately[/cyan]', file=sys.stderr)
+                print('[cyan]🔄 No snapshots created, sealing crawl immediately[/cyan]', file=sys.stderr)
                # Seal immediately since there's no work to do
                self.seal()

--- a/archivebox/hooks.py
+++ b/archivebox/hooks.py
@@ -56,16 +56,18 @@ __package__ = 'archivebox'

 import os
 import json
-import time
 from functools import lru_cache
 from pathlib import Path
-from typing import List, Dict, Any, Optional, TypedDict
+from typing import TYPE_CHECKING, List, Dict, Any, Optional, TypedDict

 from abx_plugins import get_plugins_dir
 from django.conf import settings
 from django.utils.safestring import mark_safe
 from archivebox.config.constants import CONSTANTS

+if TYPE_CHECKING:
+    from archivebox.machine.models import Process
+

 # Plugin directories
 BUILTIN_PLUGINS_DIR = Path(get_plugins_dir()).resolve()
@@ -266,9 +268,7 @@ def run_hook(
    """
    from archivebox.machine.models import Process, Machine
    from archivebox.config.constants import CONSTANTS
-    import time
    import sys
-    start_time = time.time()

    # Auto-detect timeout from plugin config if not explicitly provided
    if timeout is None:
--- a/archivebox/ldap/auth.py
+++ b/archivebox/ldap/auth.py
@@ -9,7 +9,6 @@ __package__ = "archivebox.ldap"
 from typing import TYPE_CHECKING

 if TYPE_CHECKING:
-    from django.contrib.auth.models import User
    from django_auth_ldap.backend import LDAPBackend as BaseLDAPBackend
 else:
    try:
--- a/archivebox/machine/models.py
+++ b/archivebox/machine/models.py
@@ -10,6 +10,7 @@ from datetime import timedelta, datetime
 from statemachine import State, registry

 from django.db import models
+from django.db.models import QuerySet
 from django.utils import timezone
 from django.utils.functional import cached_property

@@ -197,7 +198,6 @@ class NetworkInterface(ModelWithHealthStats):
 class BinaryManager(models.Manager):
    def get_from_db_or_cache(self, name: str, abspath: str = '', version: str = '', sha256: str = '', binprovider: str = 'env') -> 'Binary':
        """Get or create an Binary record from the database or cache."""
-        global _CURRENT_BINARIES
        cached = _CURRENT_BINARIES.get(name)
        if cached and timezone.now() < cached.modified_at + timedelta(seconds=BINARY_RECHECK_INTERVAL):
            return cached
@@ -583,7 +583,6 @@ class Binary(ModelWithHealthStats, ModelWithStateMachine):
        Called by state machine if needed (not typically used for binaries
        since installations are foreground, but included for consistency).
        """
-        from pathlib import Path

        # Kill any background binary installation hooks using Process records
        # (rarely used since binary installations are typically foreground)
@@ -1026,9 +1025,11 @@ class Process(models.Model):
        # Check cache validity
        if _CURRENT_PROCESS:
            # Verify: same PID, same machine, cache not expired
-            if (_CURRENT_PROCESS.pid == current_pid and
-                _CURRENT_PROCESS.machine_id == machine.id and
-                timezone.now() < _CURRENT_PROCESS.modified_at + timedelta(seconds=PROCESS_RECHECK_INTERVAL)):
+            if (
+                _CURRENT_PROCESS.pid == current_pid
+                and _CURRENT_PROCESS.machine_id == machine.id
+                and timezone.now() < _CURRENT_PROCESS.modified_at + timedelta(seconds=PROCESS_RECHECK_INTERVAL)
+            ):
                _CURRENT_PROCESS.ensure_log_files()
                return _CURRENT_PROCESS
            _CURRENT_PROCESS = None
@@ -1111,7 +1112,6 @@ class Process(models.Model):
        machine = machine or Machine.current()

        # Debug logging
-        import sys
        # print(f"DEBUG _find_parent_process: my_pid={os.getpid()}, ppid={ppid}", file=sys.stderr)

        # Get parent process start time from OS
@@ -1630,7 +1630,6 @@ class Process(models.Model):
            self (updated with pid, started_at, etc.)
        """
        import subprocess
-        import time

        # Validate pwd is set (required for output files)
        if not self.pwd:
@@ -1846,7 +1845,6 @@ class Process(models.Model):
        Returns:
            True if process was terminated, False if already dead
        """
-        import time
        import signal

        proc = self.proc
@@ -2199,8 +2197,8 @@ class BinaryMachine(BaseStateMachine):

    # Tick Event - install happens during transition
    tick = (
-        queued.to.itself(unless='can_install') |
-        queued.to(installed, cond='can_install', on='on_install')
+        queued.to.itself(unless='can_install')
+        | queued.to(installed, cond='can_install', on='on_install')
    )

    def can_install(self) -> bool:
@@ -2303,10 +2301,10 @@ class ProcessMachine(BaseStateMachine):

    # Tick Event - transitions based on conditions
    tick = (
-        queued.to.itself(unless='can_start') |
-        queued.to(running, cond='can_start') |
-        running.to.itself(unless='is_exited') |
-        running.to(exited, cond='is_exited')
+        queued.to.itself(unless='can_start')
+        | queued.to(running, cond='can_start')
+        | running.to.itself(unless='is_exited')
+        | running.to(exited, cond='is_exited')
    )

    # Additional events (for explicit control)
--- a/archivebox/machine/tests/test_machine_models.py
+++ b/archivebox/machine/tests/test_machine_models.py
@@ -12,8 +12,6 @@ Tests cover:
 """

 import os
-import sys
-from pathlib import Path
 from datetime import timedelta
 from unittest.mock import patch

@@ -29,7 +27,6 @@ from archivebox.machine.models import (
    BinaryMachine,
    ProcessMachine,
    MACHINE_RECHECK_INTERVAL,
-    PROCESS_RECHECK_INTERVAL,
    PID_REUSE_WINDOW,
 )

@@ -323,7 +320,6 @@ class TestProcessModel(TestCase):
    def test_process_update_and_requeue(self):
        """Process.update_and_requeue() should update fields and save."""
        process = Process.objects.create(machine=self.machine, cmd=['test'])
-        old_modified = process.modified_at

        process.update_and_requeue(
            status=Process.StatusChoices.RUNNING,
--- a/archivebox/mcp/server.py
+++ b/archivebox/mcp/server.py
@@ -1,5 +1,3 @@
-__package__ = 'archivebox.mcp'
-
 """
 Model Context Protocol (MCP) server implementation for ArchiveBox.

@@ -10,9 +8,7 @@ Click command metadata. Handles JSON-RPC 2.0 requests over stdio transport.
 import sys
 import json
 import traceback
-from typing import Any, Dict, List, Optional
-from io import StringIO
-from contextlib import redirect_stdout, redirect_stderr
+from typing import Optional

 import click
 from click.testing import CliRunner
--- a/archivebox/misc/checks.py
+++ b/archivebox/misc/checks.py
@@ -225,7 +225,6 @@ def check_tmp_dir(tmp_dir=None, throw=False, quiet=False, must_exist=True):


 def check_lib_dir(lib_dir: Path | None = None, throw=False, quiet=False, must_exist=True):
-    import archivebox
    from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
    from archivebox.misc.logging import STDERR
    from archivebox.misc.logging_util import pretty_path
--- a/archivebox/misc/folders.py
+++ b/archivebox/misc/folders.py
@@ -35,7 +35,6 @@ def fix_invalid_folder_locations(out_dir: Path = DATA_DIR) -> Tuple[List[str], L
                    with open(index_path, 'r') as f:
                        data = json.load(f)
                    timestamp = data.get('timestamp')
-                    url = data.get('url')
                except Exception:
                    continue

--- a/archivebox/misc/logging_util.py
+++ b/archivebox/misc/logging_util.py
@@ -21,13 +21,12 @@ if TYPE_CHECKING:

 from rich import print
 from rich.panel import Panel
-from django.core.management.base import DjangoHelpFormatter

 from archivebox.config import CONSTANTS, DATA_DIR, VERSION
 from archivebox.config.common import SHELL_CONFIG
 from archivebox.misc.system import get_dir_size
 from archivebox.misc.util import enforce_types
-from archivebox.misc.logging import ANSI, stderr
+from archivebox.misc.logging import ANSI

@dataclass
 class RuntimeStats:
--- a/archivebox/misc/monkey_patches.py
+++ b/archivebox/misc/monkey_patches.py
@@ -1,16 +1,18 @@
 __package__ = 'archivebox'

-import django
-import pydantic

+import datetime
+import warnings
+
+import benedict
+from daphne import access
 import django_stubs_ext
+from django.utils import timezone

 django_stubs_ext.monkeypatch()


 # monkey patch django timezone to add back utc (it was removed in Django 5.0)
-import datetime
-from django.utils import timezone
 timezone.utc = datetime.timezone.utc

 # monkey patch django-signals-webhooks to change how it shows up in Admin UI
@@ -26,12 +28,9 @@ timezone.utc = datetime.timezone.utc

 # Hide site-packages/sonic/client.py:115: SyntaxWarning
 # https://github.com/xmonader/python-sonic-client/pull/18
-import warnings     # noqa
 warnings.filterwarnings("ignore", category=SyntaxWarning, module='sonic')

 # Make daphne log requests quieter and esier to read
-from daphne import access                                        # noqa
-
 class ModifiedAccessLogGenerator(access.AccessLogGenerator):
    """Clutge workaround until daphne uses the Python logging framework. https://github.com/django/daphne/pull/473/files"""
    
@@ -68,5 +67,4 @@ access.AccessLogGenerator.write_entry = ModifiedAccessLogGenerator.write_entry #
 # fix benedict objects to pretty-print/repr more nicely with rich
 # https://stackoverflow.com/a/79048811/2156113
 # https://rich.readthedocs.io/en/stable/pretty.html#rich-repr-protocol
-import benedict                                                  # noqa
 benedict.benedict.__rich_repr__ = lambda self: (dict(self),)     # type: ignore
--- a/archivebox/misc/progress_layout.py
+++ b/archivebox/misc/progress_layout.py
@@ -135,7 +135,6 @@ class ProcessLogPanel:
            if line:
                log_lines.append(Text(line, style="cyan"))

-        compact = self.compact if self.compact is not None else self._is_background_hook()
        max_body = max(1, self.max_lines - len(header_lines))
        if not log_lines:
            log_lines = []
--- a/archivebox/misc/system.py
+++ b/archivebox/misc/system.py
@@ -4,10 +4,11 @@ __package__ = 'archivebox.misc'
 import os
 import signal
 import shutil
+import sys

 from json import dump
 from pathlib import Path
-from typing import Optional, Union, Set, Tuple
+from typing import Optional, Union, Tuple
 from subprocess import _mswindows, PIPE, Popen, CalledProcessError, CompletedProcess, TimeoutExpired

 from atomicwrites import atomic_write as lib_atomic_write
@@ -58,7 +59,7 @@ def run(cmd, *args, input=None, capture_output=True, timeout=None, check=False,
                    # far into the TimeoutExpired exception.
                    process.wait()
                raise
-            except:  # Including KeyboardInterrupt, communicate handled that.
+            except BaseException:  # Including KeyboardInterrupt, communicate handled that.
                process.kill()
                # We don't call process.wait() as .__exit__ does that for us.
                raise
--- a/archivebox/personas/admin.py
+++ b/archivebox/personas/admin.py
@@ -1,3 +1,2 @@
-from django.contrib import admin

 # Register your models here.
--- a/archivebox/personas/models.py
+++ b/archivebox/personas/models.py
@@ -11,8 +11,12 @@ Each persona has its own:

 __package__ = 'archivebox.personas'

+import shutil
+import subprocess
+import sys
+from contextlib import contextmanager
 from pathlib import Path
-from typing import TYPE_CHECKING, Iterator
+from typing import TYPE_CHECKING

 from django.db import models
 from django.conf import settings
@@ -21,8 +25,32 @@ from django.utils import timezone
 from archivebox.base_models.models import ModelWithConfig, get_or_create_system_user_pk
 from archivebox.uuid_compat import uuid7

+try:
+    import fcntl
+except ImportError:  # pragma: no cover
+    fcntl = None
+
 if TYPE_CHECKING:
-    from django.db.models import QuerySet
+    pass
+
+
+VOLATILE_PROFILE_DIR_NAMES = {
+    'Cache',
+    'Code Cache',
+    'GPUCache',
+    'ShaderCache',
+    'Service Worker',
+    'GCM Store',
+    'Crashpad',
+    'BrowserMetrics',
+}
+
+VOLATILE_PROFILE_FILE_NAMES = {
+    'BrowserMetrics-spare.pma',
+    'SingletonCookie',
+    'SingletonLock',
+    'SingletonSocket',
+}


 class Persona(ModelWithConfig):
@@ -120,37 +148,118 @@ class Persona(ModelWithConfig):
        (self.path / 'chrome_extensions').mkdir(parents=True, exist_ok=True)
        (self.path / 'chrome_downloads').mkdir(parents=True, exist_ok=True)

-    def cleanup_chrome(self) -> bool:
-        """
-        Clean up Chrome state files (SingletonLock, etc.) for this persona.
-
-        Returns:
-            True if cleanup was performed, False if no cleanup needed
-        """
+    def cleanup_chrome_profile(self, profile_dir: Path) -> bool:
+        """Remove volatile Chrome state that should never be reused across launches."""
        cleaned = False
-        chrome_dir = self.path / 'chrome_user_data'

-        if not chrome_dir.exists():
+        if not profile_dir.exists():
            return False

-        # Clean up SingletonLock files
-        for lock_file in chrome_dir.glob('**/SingletonLock'):
-            try:
-                lock_file.unlink()
-                cleaned = True
-            except OSError:
-                pass
+        for path in profile_dir.rglob('*'):
+            if path.name in VOLATILE_PROFILE_FILE_NAMES:
+                try:
+                    path.unlink()
+                    cleaned = True
+                except OSError:
+                    pass

-        # Clean up SingletonSocket files
-        for socket_file in chrome_dir.glob('**/SingletonSocket'):
+        for dirname in VOLATILE_PROFILE_DIR_NAMES:
+            for path in profile_dir.rglob(dirname):
+                if not path.is_dir():
+                    continue
+                shutil.rmtree(path, ignore_errors=True)
+                cleaned = True
+
+        for path in profile_dir.rglob('*.log'):
            try:
-                socket_file.unlink()
+                path.unlink()
                cleaned = True
            except OSError:
                pass

        return cleaned

+    def cleanup_chrome(self) -> bool:
+        """Clean up volatile Chrome state for this persona's base profile."""
+        return self.cleanup_chrome_profile(self.path / 'chrome_user_data')
+
+    @contextmanager
+    def lock_runtime_for_crawl(self):
+        lock_path = self.path / '.archivebox-crawl-profile.lock'
+        lock_path.parent.mkdir(parents=True, exist_ok=True)
+
+        with lock_path.open('w') as lock_file:
+            if fcntl is not None:
+                fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
+            try:
+                yield
+            finally:
+                if fcntl is not None:
+                    fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
+
+    def runtime_root_for_crawl(self, crawl) -> Path:
+        return Path(crawl.output_dir) / '.persona' / self.name
+
+    def runtime_profile_dir_for_crawl(self, crawl) -> Path:
+        return self.runtime_root_for_crawl(crawl) / 'chrome_user_data'
+
+    def runtime_downloads_dir_for_crawl(self, crawl) -> Path:
+        return self.runtime_root_for_crawl(crawl) / 'chrome_downloads'
+
+    def copy_chrome_profile(self, source_dir: Path, destination_dir: Path) -> None:
+        destination_dir.parent.mkdir(parents=True, exist_ok=True)
+        shutil.rmtree(destination_dir, ignore_errors=True)
+        destination_dir.mkdir(parents=True, exist_ok=True)
+
+        copy_cmd: list[str] | None = None
+        source_contents = f'{source_dir}/.'
+
+        if sys.platform == 'darwin':
+            copy_cmd = ['cp', '-cR', source_contents, str(destination_dir)]
+        elif sys.platform.startswith('linux'):
+            copy_cmd = ['cp', '-a', source_contents, str(destination_dir)]
+
+        if copy_cmd:
+            result = subprocess.run(copy_cmd, capture_output=True, text=True)
+            if result.returncode == 0:
+                return
+
+            shutil.rmtree(destination_dir, ignore_errors=True)
+            destination_dir.mkdir(parents=True, exist_ok=True)
+
+        shutil.copytree(source_dir, destination_dir, symlinks=True, dirs_exist_ok=True)
+
+    def prepare_runtime_for_crawl(self, crawl, chrome_binary: str = '') -> dict[str, str]:
+        self.ensure_dirs()
+
+        template_dir = Path(self.CHROME_USER_DATA_DIR)
+        runtime_root = self.runtime_root_for_crawl(crawl)
+        runtime_profile_dir = self.runtime_profile_dir_for_crawl(crawl)
+        runtime_downloads_dir = self.runtime_downloads_dir_for_crawl(crawl)
+
+        with self.lock_runtime_for_crawl():
+            if not runtime_profile_dir.exists():
+                if template_dir.exists() and any(template_dir.iterdir()):
+                    self.copy_chrome_profile(template_dir, runtime_profile_dir)
+                else:
+                    runtime_profile_dir.mkdir(parents=True, exist_ok=True)
+
+            runtime_downloads_dir.mkdir(parents=True, exist_ok=True)
+            self.cleanup_chrome_profile(runtime_profile_dir)
+
+            (runtime_root / 'persona_name.txt').write_text(self.name)
+            (runtime_root / 'template_dir.txt').write_text(str(template_dir))
+            if chrome_binary:
+                (runtime_root / 'chrome_binary.txt').write_text(chrome_binary)
+
+        return {
+            'CHROME_USER_DATA_DIR': str(runtime_profile_dir),
+            'CHROME_DOWNLOADS_DIR': str(runtime_downloads_dir),
+        }
+
+    def cleanup_runtime_for_crawl(self, crawl) -> None:
+        shutil.rmtree(Path(crawl.output_dir) / '.persona', ignore_errors=True)
+
    @classmethod
    def get_or_create_default(cls) -> 'Persona':
        """Get or create the Default persona."""
--- a/archivebox/personas/tests.py
+++ b/archivebox/personas/tests.py
@@ -1,3 +1,2 @@
-from django.test import TestCase

 # Create your tests here.
--- a/archivebox/personas/views.py
+++ b/archivebox/personas/views.py
@@ -1,3 +1,2 @@
-from django.shortcuts import render

 # Create your views here.
--- a/archivebox/search/init.py
+++ b/archivebox/search/init.py
@@ -14,7 +14,7 @@ Search backends must provide a search.py module with:

 __package__ = 'archivebox.search'

-from typing import TYPE_CHECKING, Any, Optional
+from typing import Any, Optional

 from django.db.models import QuerySet

@@ -22,9 +22,6 @@ from archivebox.misc.util import enforce_types
 from archivebox.misc.logging import stderr
 from archivebox.config.common import SEARCH_BACKEND_CONFIG

-if TYPE_CHECKING:
-    from archivebox.core.models import Snapshot
-

 # Cache discovered backends to avoid repeated filesystem scans
 _search_backends_cache: Optional[dict] = None
--- a/archivebox/tests/conftest.py
+++ b/archivebox/tests/conftest.py
@@ -1,7 +1,6 @@
 """archivebox/tests/conftest.py - Pytest fixtures for CLI tests."""

 import os
-import shutil
 import sys
 import subprocess
 import textwrap
@@ -13,6 +12,8 @@ import pytest

 from archivebox.uuid_compat import uuid7

+pytest_plugins = ["archivebox.tests.fixtures"]
+

 # =============================================================================
 # CLI Helpers (defined before fixtures that use them)
--- a/archivebox/tests/test_add.py
+++ b/archivebox/tests/test_add.py
@@ -1,9 +1,6 @@
-import subprocess
-import json
-import sqlite3
 import os
-
-from .fixtures import *
+import sqlite3
+import subprocess

 def test_depth_flag_is_accepted(process, disable_extractors_dict):
    arg_process = subprocess.run(["archivebox", "add", "--index-only", "https://example.com", "--depth=0"],
@@ -31,7 +28,7 @@ def test_depth_flag_fails_if_it_is_not_0_or_1(process, disable_extractors_dict):

 def test_depth_flag_0_creates_source_file(tmp_path, process, disable_extractors_dict):
    os.chdir(tmp_path)
-    arg_process = subprocess.run(
+    subprocess.run(
        ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
        capture_output=True,
        env=disable_extractors_dict,
--- a/archivebox/tests/test_admin_views.py
+++ b/archivebox/tests/test_admin_views.py
@@ -9,7 +9,7 @@ Tests cover:
 """

 import pytest
-from django.test import TestCase, Client, override_settings
+from django.test import override_settings
 from django.urls import reverse
 from django.contrib.auth import get_user_model

--- a/archivebox/tests/test_auth_ldap.py
+++ b/archivebox/tests/test_auth_ldap.py
@@ -9,7 +9,7 @@ import os
 import sys
 import tempfile
 import unittest
-from pathlib import Path
+from importlib.util import find_spec


 class TestLDAPConfig(unittest.TestCase):
@@ -100,13 +100,7 @@ class TestLDAPIntegration(unittest.TestCase):

    def test_django_settings_with_ldap_library_check(self):
        """Test that Django settings check for LDAP libraries when enabled."""
-        # Try to import django-auth-ldap to see if it's available
-        try:
-            import django_auth_ldap
-            import ldap
-            ldap_available = True
-        except ImportError:
-            ldap_available = False
+        ldap_available = find_spec("django_auth_ldap") is not None and find_spec("ldap") is not None

        # If LDAP libraries are not available, settings should handle gracefully
        if not ldap_available:
--- a/archivebox/tests/test_cli_add.py
+++ b/archivebox/tests/test_cli_add.py
@@ -5,11 +5,8 @@ Verify add creates snapshots in DB, crawls, source files, and archive directorie
 """

 import os
-import subprocess
 import sqlite3
-from pathlib import Path
-
-from .fixtures import *
+import subprocess


 def test_add_single_url_creates_snapshot_in_db(tmp_path, process, disable_extractors_dict):
@@ -169,6 +166,30 @@ def test_add_with_tags(tmp_path, process, disable_extractors_dict):
    assert 'test' in tags_str or 'example' in tags_str


+def test_add_records_selected_persona_on_crawl(tmp_path, process, disable_extractors_dict):
+    """Test add persists the selected persona so browser config derives from it later."""
+    os.chdir(tmp_path)
+    result = subprocess.run(
+        ['archivebox', 'add', '--index-only', '--depth=0', '--persona=Default', 'https://example.com'],
+        capture_output=True,
+        env=disable_extractors_dict,
+    )
+
+    assert result.returncode == 0
+
+    conn = sqlite3.connect("index.sqlite3")
+    c = conn.cursor()
+    persona_id, default_persona = c.execute(
+        "SELECT persona_id, json_extract(config, '$.DEFAULT_PERSONA') FROM crawls_crawl LIMIT 1"
+    ).fetchone()
+    conn.close()
+
+    assert persona_id
+    assert default_persona == 'Default'
+    assert (tmp_path / "personas" / "Default" / "chrome_user_data").is_dir()
+    assert (tmp_path / "personas" / "Default" / "chrome_extensions").is_dir()
+
+
 def test_add_duplicate_url_creates_separate_crawls(tmp_path, process, disable_extractors_dict):
    """Test that adding the same URL twice creates separate crawls and snapshots.

--- a/archivebox/tests/test_cli_archiveresult.py
+++ b/archivebox/tests/test_cli_archiveresult.py
@@ -9,7 +9,6 @@ Tests cover:
 """

 import json
-import pytest

 from archivebox.tests.conftest import (
    run_archivebox_cmd,
--- a/archivebox/tests/test_cli_config.py
+++ b/archivebox/tests/test_cli_config.py
@@ -6,9 +6,6 @@ Verify config reads/writes ArchiveBox.conf file correctly.

 import os
 import subprocess
-from pathlib import Path
-
-from .fixtures import *


 def test_config_displays_all_config(tmp_path, process):
--- a/archivebox/tests/test_cli_crawl.py
+++ b/archivebox/tests/test_cli_crawl.py
@@ -9,14 +9,11 @@ Tests cover:
 """

 import json
-import pytest

 from archivebox.tests.conftest import (
    run_archivebox_cmd,
    parse_jsonl_output,
-    assert_jsonl_contains_type,
    create_test_url,
-    create_test_crawl_json,
 )


--- a/archivebox/tests/test_cli_extract.py
+++ b/archivebox/tests/test_cli_extract.py
@@ -5,10 +5,8 @@ Verify extract re-runs extractors on existing snapshots.
 """

 import os
-import subprocess
 import sqlite3
-
-from .fixtures import *
+import subprocess


 def test_extract_runs_on_existing_snapshots(tmp_path, process, disable_extractors_dict):
--- a/archivebox/tests/test_cli_help.py
+++ b/archivebox/tests/test_cli_help.py
@@ -7,8 +7,6 @@ Verify command runs successfully and produces output.
 import os
 import subprocess

-from .fixtures import *
-

 def test_help_runs_successfully(tmp_path):
    """Test that help command runs and produces output."""
--- a/archivebox/tests/test_cli_init.py
+++ b/archivebox/tests/test_cli_init.py
@@ -5,14 +5,11 @@ Verify init creates correct database schema, filesystem structure, and config.
 """

 import os
-import subprocess
 import sqlite3
-from pathlib import Path
+import subprocess

 from archivebox.config.common import STORAGE_CONFIG

-from .fixtures import *
-

 DIR_PERMISSIONS = STORAGE_CONFIG.OUTPUT_PERMISSIONS.replace('6', '7').replace('4', '5')

--- a/archivebox/tests/test_cli_install.py
+++ b/archivebox/tests/test_cli_install.py
@@ -5,12 +5,10 @@ Verify install detects and records binary dependencies in DB.
 """

 import os
-import subprocess
 import sqlite3
+import subprocess
 from pathlib import Path

-from .fixtures import *
-

 def test_install_runs_successfully(tmp_path, process):
    """Test that install command runs without error."""
--- a/archivebox/tests/test_cli_manage.py
+++ b/archivebox/tests/test_cli_manage.py
@@ -6,9 +6,6 @@ Verify manage command runs Django management commands.

 import os
 import subprocess
-import sqlite3
-
-from .fixtures import *


 def test_manage_help_works(tmp_path, process):
--- a/archivebox/tests/test_cli_remove.py
+++ b/archivebox/tests/test_cli_remove.py
@@ -5,11 +5,8 @@ Verify remove deletes snapshots from DB and filesystem.
 """

 import os
-import subprocess
 import sqlite3
-from pathlib import Path
-
-from .fixtures import *
+import subprocess


 def test_remove_deletes_snapshot_from_db(tmp_path, process, disable_extractors_dict):
--- a/archivebox/tests/test_cli_run.py
+++ b/archivebox/tests/test_cli_run.py
@@ -8,7 +8,6 @@ Tests cover:
 """

 import json
-import pytest

 from archivebox.tests.conftest import (
    run_archivebox_cmd,
--- a/archivebox/tests/test_cli_run_binary_worker.py
+++ b/archivebox/tests/test_cli_run_binary_worker.py
@@ -10,11 +10,9 @@ Tests cover:

 import json
 import sqlite3
-import time

 from archivebox.tests.conftest import (
    run_archivebox_cmd,
-    parse_jsonl_output,
 )


--- a/archivebox/tests/test_cli_schedule.py
+++ b/archivebox/tests/test_cli_schedule.py
@@ -5,7 +5,6 @@ import os
 import sqlite3
 import subprocess

-from .fixtures import process, disable_extractors_dict


 def test_schedule_run_all_enqueues_scheduled_crawl(tmp_path, process, disable_extractors_dict):
--- a/archivebox/tests/test_cli_search.py
+++ b/archivebox/tests/test_cli_search.py
@@ -6,9 +6,6 @@ Verify search queries snapshots from DB.

 import os
 import subprocess
-import sqlite3
-
-from .fixtures import *


 def test_search_finds_snapshots(tmp_path, process, disable_extractors_dict):
--- a/archivebox/tests/test_cli_server.py
+++ b/archivebox/tests/test_cli_server.py
@@ -6,10 +6,6 @@ Verify server can start (basic smoke tests only, no full server testing).

 import os
 import subprocess
-import signal
-import time
-
-from .fixtures import *


 def test_server_shows_usage_info(tmp_path, process):
--- a/archivebox/tests/test_cli_shell.py
+++ b/archivebox/tests/test_cli_shell.py
@@ -7,8 +7,6 @@ Verify shell command starts Django shell (basic smoke tests only).
 import os
 import subprocess

-from .fixtures import *
-

 def test_shell_command_exists(tmp_path, process):
    """Test that shell command is recognized."""
--- a/archivebox/tests/test_cli_snapshot.py
+++ b/archivebox/tests/test_cli_snapshot.py
@@ -9,12 +9,10 @@ Tests cover:
 """

 import json
-import pytest

 from archivebox.tests.conftest import (
    run_archivebox_cmd,
    parse_jsonl_output,
-    assert_jsonl_contains_type,
    create_test_url,
 )

--- a/archivebox/tests/test_cli_status.py
+++ b/archivebox/tests/test_cli_status.py
@@ -5,12 +5,10 @@ Verify status reports accurate collection state from DB and filesystem.
 """

 import os
-import subprocess
 import sqlite3
+import subprocess
 from pathlib import Path

-from .fixtures import *
-

 def _find_snapshot_dir(data_dir: Path, snapshot_id: str) -> Path | None:
    candidates = {snapshot_id}
--- a/archivebox/tests/test_cli_update.py
+++ b/archivebox/tests/test_cli_update.py
@@ -5,10 +5,8 @@ Verify update drains old dirs, reconciles DB, and queues snapshots.
 """

 import os
-import subprocess
 import sqlite3
-
-from .fixtures import *
+import subprocess


 def test_update_runs_successfully_on_empty_archive(tmp_path, process):
--- a/archivebox/tests/test_cli_version.py
+++ b/archivebox/tests/test_cli_version.py
@@ -11,7 +11,9 @@ import tempfile
 import subprocess
 from pathlib import Path

-from .fixtures import *
+from .fixtures import process
+
+FIXTURES = (process,)


 def _archivebox_cli() -> str:
--- a/archivebox/tests/test_config.py
+++ b/archivebox/tests/test_config.py
@@ -6,7 +6,6 @@ import subprocess

 import pytest

-from .fixtures import process, disable_extractors_dict


 def test_config_shows_all_config_values(tmp_path, process):
@@ -49,6 +48,7 @@ def test_config_set_value_writes_to_config_file(tmp_path, process):
        capture_output=True,
        text=True,
    )
+    assert result.returncode == 0, result.stderr

    # Read the config file directly to verify it was written
    config_file = tmp_path / 'ArchiveBox.conf'
--- a/archivebox/tests/test_crawl.py
+++ b/archivebox/tests/test_crawl.py
@@ -4,11 +4,9 @@
 import os
 import subprocess
 import sqlite3
-import json

 import pytest

-from .fixtures import process, disable_extractors_dict


 def test_crawl_creates_crawl_object(tmp_path, process, disable_extractors_dict):
--- a/archivebox/tests/test_extract.py
+++ b/archivebox/tests/test_extract.py
@@ -8,7 +8,6 @@ import json

 import pytest

-from .fixtures import process, disable_extractors_dict


 def test_extract_runs_on_snapshot_id(tmp_path, process, disable_extractors_dict):
@@ -231,6 +230,7 @@ def test_extract_multiple_snapshots(tmp_path, process, disable_extractors_dict):
        text=True,
        env=disable_extractors_dict,
    )
+    assert result.returncode == 0, result.stderr

    # Should not error
    conn = sqlite3.connect('index.sqlite3')
--- a/archivebox/tests/test_extractors.py
+++ b/archivebox/tests/test_extractors.py
@@ -1,8 +1,12 @@
-from .fixtures import *
 import json as pyjson
 import sqlite3
+import subprocess
 from pathlib import Path

+from .fixtures import disable_extractors_dict, process
+
+FIXTURES = (disable_extractors_dict, process)
+

 def _find_snapshot_dir(data_dir: Path, snapshot_id: str) -> Path | None:
    candidates = {snapshot_id}
--- a/archivebox/tests/test_hooks.py
+++ b/archivebox/tests/test_hooks.py
@@ -16,7 +16,7 @@ import subprocess
 import tempfile
 import unittest
 from pathlib import Path
-from unittest.mock import MagicMock, patch
+from unittest.mock import patch

 # Set up Django before importing any Django-dependent modules
 os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'archivebox.settings')
--- a/archivebox/tests/test_init.py
+++ b/archivebox/tests/test_init.py
@@ -3,13 +3,13 @@

 import os
 import subprocess
-from pathlib import Path
-import json, shutil
 import sqlite3

 from archivebox.config.common import STORAGE_CONFIG

-from .fixtures import *
+from .fixtures import disable_extractors_dict, process
+
+FIXTURES = (disable_extractors_dict, process)

 DIR_PERMISSIONS = STORAGE_CONFIG.OUTPUT_PERMISSIONS.replace('6', '7').replace('4', '5')

@@ -25,6 +25,7 @@ def test_add_link(tmp_path, process, disable_extractors_dict):
    os.chdir(tmp_path)
    add_process = subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'],
                                  capture_output=True, env=disable_extractors_dict)
+    assert add_process.returncode == 0, add_process.stderr.decode("utf-8")

    # In the new architecture, URLs are saved to source files
    # Check that a source file was created with the URL
@@ -41,6 +42,7 @@ def test_add_multiple_urls(tmp_path, process, disable_extractors_dict):
    os.chdir(tmp_path)
    add_process = subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com', 'https://iana.org'],
                                  capture_output=True, env=disable_extractors_dict)
+    assert add_process.returncode == 0, add_process.stderr.decode("utf-8")

    # Check that a source file was created with both URLs
    sources_dir = tmp_path / "sources"
@@ -61,6 +63,7 @@ def test_correct_permissions_add_command_results(tmp_path, process, disable_extr
    os.chdir(tmp_path)
    add_process = subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'], capture_output=True,
                                  env=disable_extractors_dict)
+    assert add_process.returncode == 0, add_process.stderr.decode("utf-8")

    # Check database permissions
    assert oct((tmp_path / "index.sqlite3").stat().st_mode)[-3:] in (STORAGE_CONFIG.OUTPUT_PERMISSIONS, DIR_PERMISSIONS)
--- a/archivebox/tests/test_install.py
+++ b/archivebox/tests/test_install.py
@@ -7,7 +7,6 @@ import sqlite3

 import pytest

-from .fixtures import process, disable_extractors_dict


 class TestInstallDryRun:
--- a/archivebox/tests/test_list.py
+++ b/archivebox/tests/test_list.py
@@ -1,7 +1,9 @@
 import json
 import subprocess

-from .fixtures import *
+from .fixtures import disable_extractors_dict, process
+
+FIXTURES = (disable_extractors_dict, process)

 def test_search_json(process, disable_extractors_dict):
    subprocess.run(["archivebox", "add", "--index-only", "https://example.com", "--depth=0"],
--- a/archivebox/tests/test_migrations_08_to_09.py
+++ b/archivebox/tests/test_migrations_08_to_09.py
@@ -10,10 +10,8 @@ Migration tests from 0.8.x to 0.9.x.
 - New fields like depth, retry_at, etc.
 """

-import json
 import shutil
 import sqlite3
-import subprocess
 import tempfile
 import unittest
 from pathlib import Path
@@ -579,7 +577,7 @@ class TestFilesystemMigration08to09(unittest.TestCase):
                        f"Files lost during migration: {files_before_count} -> {files_after_count}")

        # Run update to trigger filesystem reorganization
-        print(f"\n[*] Running archivebox update to reorganize filesystem...")
+        print("\n[*] Running archivebox update to reorganize filesystem...")
        result = run_archivebox(self.work_dir, ['update'], timeout=120)
        self.assertEqual(result.returncode, 0, f"Update failed: {result.stderr}")

@@ -657,7 +655,7 @@ class TestFilesystemMigration08to09(unittest.TestCase):

        # CRITICAL: Verify sample files exist in new structure
        self.assertGreater(len(new_sample_files), 0,
-                          f"Sample files not found in new structure")
+                          "Sample files not found in new structure")

        # Verify new path format
        for path_key, file_path in new_sample_files.items():
--- a/archivebox/tests/test_recursive_crawl.py
+++ b/archivebox/tests/test_recursive_crawl.py
@@ -10,7 +10,6 @@ from pathlib import Path

 import pytest

-from .fixtures import process, disable_extractors_dict, recursive_test_site


 def wait_for_db_condition(timeout, condition, interval=0.5):
@@ -77,7 +76,6 @@ def test_background_hooks_dont_block_parser_extractors(tmp_path, process, recurs
        "SAVE_ARCHIVEDOTORG": "false",
        "SAVE_TITLE": "false",
        "SAVE_FAVICON": "true",
-        "SAVE_WGET": "false",
    })

    proc = subprocess.Popen(
--- a/archivebox/tests/test_remove.py
+++ b/archivebox/tests/test_remove.py
@@ -1,7 +1,10 @@
 import os
 import sqlite3
+import subprocess

-from .fixtures import *
+from .fixtures import disable_extractors_dict, process
+
+FIXTURES = (disable_extractors_dict, process)

 def test_remove_single_snapshot(tmp_path, process, disable_extractors_dict):
    """Test removing a snapshot by URL pattern"""
--- a/archivebox/tests/test_schedule.py
+++ b/archivebox/tests/test_schedule.py
@@ -7,7 +7,6 @@ import subprocess

 import pytest

-from .fixtures import process


 def _fetchone(tmp_path, query):
--- a/archivebox/tests/test_schedule_e2e.py
+++ b/archivebox/tests/test_schedule_e2e.py
@@ -0,0 +1,420 @@
+#!/usr/bin/env python3
+"""End-to-end tests for scheduling across CLI, server, API, and web UI."""
+
+import os
+import socket
+import sqlite3
+import subprocess
+import sys
+import textwrap
+import time
+from pathlib import Path
+
+import pytest
+import requests
+
+from .conftest import run_python_cwd
+
+
+REPO_ROOT = Path(__file__).resolve().parents[2]
+
+
+def init_archive(cwd: Path) -> None:
+    result = subprocess.run(
+        [sys.executable, '-m', 'archivebox', 'init', '--quick'],
+        cwd=cwd,
+        capture_output=True,
+        text=True,
+        timeout=60,
+    )
+    assert result.returncode == 0, result.stderr
+
+
+def build_test_env(port: int, **extra: str) -> dict[str, str]:
+    env = os.environ.copy()
+    env.pop('DATA_DIR', None)
+    env.update({
+        'LISTEN_HOST': f'archivebox.localhost:{port}',
+        'ALLOWED_HOSTS': '*',
+        'CSRF_TRUSTED_ORIGINS': f'http://admin.archivebox.localhost:{port}',
+        'PUBLIC_ADD_VIEW': 'True',
+        'USE_COLOR': 'False',
+        'SHOW_PROGRESS': 'False',
+        'TIMEOUT': '20',
+        'URL_ALLOWLIST': r'127\.0\.0\.1[:/].*',
+        'SAVE_ARCHIVEDOTORG': 'False',
+        'SAVE_TITLE': 'False',
+        'SAVE_FAVICON': 'False',
+        'SAVE_WARC': 'False',
+        'SAVE_PDF': 'False',
+        'SAVE_SCREENSHOT': 'False',
+        'SAVE_DOM': 'False',
+        'SAVE_SINGLEFILE': 'False',
+        'SAVE_READABILITY': 'False',
+        'SAVE_MERCURY': 'False',
+        'SAVE_GIT': 'False',
+        'SAVE_YTDLP': 'False',
+        'SAVE_HEADERS': 'False',
+        'SAVE_HTMLTOTEXT': 'False',
+        'SAVE_WGET': 'True',
+        'USE_CHROME': 'False',
+    })
+    env.update(extra)
+    return env
+
+
+def get_free_port() -> int:
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
+        sock.bind(('127.0.0.1', 0))
+        return sock.getsockname()[1]
+
+
+def start_server(cwd: Path, env: dict[str, str], port: int) -> None:
+    result = subprocess.run(
+        [sys.executable, '-m', 'archivebox', 'server', '--daemonize', f'127.0.0.1:{port}'],
+        cwd=cwd,
+        capture_output=True,
+        text=True,
+        env=env,
+        timeout=60,
+    )
+    assert result.returncode == 0, result.stderr
+
+
+def stop_server(cwd: Path) -> None:
+    script = textwrap.dedent(
+        """
+        import os
+        os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'archivebox.settings')
+        import django
+        django.setup()
+        from archivebox.workers.supervisord_util import stop_existing_supervisord_process
+        stop_existing_supervisord_process()
+        print('stopped')
+        """
+    )
+    run_python_cwd(script, cwd=cwd, timeout=30)
+
+
+def wait_for_http(port: int, host: str, path: str = '/', timeout: int = 30) -> requests.Response:
+    deadline = time.time() + timeout
+    last_exc = None
+    while time.time() < deadline:
+        try:
+            response = requests.get(
+                f'http://127.0.0.1:{port}{path}',
+                headers={'Host': host},
+                timeout=2,
+                allow_redirects=False,
+            )
+            if response.status_code < 500:
+                return response
+        except requests.RequestException as exc:
+            last_exc = exc
+        time.sleep(0.5)
+    raise AssertionError(f'Timed out waiting for HTTP on {host}: {last_exc}')
+
+
+def make_latest_schedule_due(cwd: Path) -> None:
+    conn = sqlite3.connect(cwd / 'index.sqlite3')
+    try:
+        conn.execute(
+            """
+            UPDATE crawls_crawl
+            SET created_at = datetime('now', '-2 day'),
+                modified_at = datetime('now', '-2 day')
+            WHERE id = (
+                SELECT template_id
+                FROM crawls_crawlschedule
+                ORDER BY created_at DESC
+                LIMIT 1
+            )
+            """
+        )
+        conn.commit()
+    finally:
+        conn.close()
+
+
+def get_snapshot_file_text(cwd: Path, url: str) -> str:
+    script = textwrap.dedent(
+        f"""
+        import os
+        from pathlib import Path
+
+        os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'archivebox.settings')
+        import django
+        django.setup()
+
+        from archivebox.core.models import Snapshot
+
+        snapshot = Snapshot.objects.filter(url={url!r}).order_by('-created_at').first()
+        assert snapshot is not None, 'missing snapshot'
+        assert snapshot.status == 'sealed', snapshot.status
+
+        snapshot_dir = Path(snapshot.output_dir)
+        candidates = []
+        preferred_patterns = (
+            'wget/**/index.html',
+            'wget/**/*.html',
+            'trafilatura/content.html',
+            'trafilatura/content.txt',
+            'defuddle/content.html',
+            'defuddle/content.txt',
+        )
+        for pattern in preferred_patterns:
+            for candidate in snapshot_dir.glob(pattern):
+                if candidate.is_file():
+                    candidates.append(candidate)
+
+        if not candidates:
+            for candidate in snapshot_dir.rglob('*'):
+                if not candidate.is_file():
+                    continue
+                rel = candidate.relative_to(snapshot_dir)
+                if rel.parts and rel.parts[0] == 'responses':
+                    continue
+                if candidate.suffix not in ('.html', '.htm', '.txt'):
+                    continue
+                if candidate.name in ('stdout.log', 'stderr.log', 'cmd.sh'):
+                    continue
+                candidates.append(candidate)
+
+        assert candidates, f'no captured html/txt files found in {{snapshot_dir}}'
+        print(candidates[0].read_text(errors='ignore'))
+        """
+    )
+    stdout, stderr, code = run_python_cwd(script, cwd=cwd, timeout=60)
+    assert code == 0, stderr
+    return stdout
+
+
+def wait_for_snapshot_capture(cwd: Path, url: str, timeout: int = 180) -> str:
+    deadline = time.time() + timeout
+    last_error = None
+    while time.time() < deadline:
+        try:
+            return get_snapshot_file_text(cwd, url)
+        except AssertionError as err:
+            last_error = err
+            time.sleep(2)
+    raise AssertionError(f'timed out waiting for captured content for {url}: {last_error}')
+
+
+def get_counts(cwd: Path, scheduled_url: str, one_shot_url: str) -> tuple[int, int, int]:
+    conn = sqlite3.connect(cwd / 'index.sqlite3')
+    try:
+        scheduled_snapshots = conn.execute(
+            "SELECT COUNT(*) FROM core_snapshot WHERE url = ?",
+            (scheduled_url,),
+        ).fetchone()[0]
+        one_shot_snapshots = conn.execute(
+            "SELECT COUNT(*) FROM core_snapshot WHERE url = ?",
+            (one_shot_url,),
+        ).fetchone()[0]
+        scheduled_crawls = conn.execute(
+            """
+            SELECT COUNT(*)
+            FROM crawls_crawl
+            WHERE schedule_id IS NOT NULL
+              AND urls = ?
+            """,
+            (scheduled_url,),
+        ).fetchone()[0]
+        return scheduled_snapshots, one_shot_snapshots, scheduled_crawls
+    finally:
+        conn.close()
+
+
+def create_admin_and_token(cwd: Path) -> str:
+    script = textwrap.dedent(
+        """
+        import os
+        from datetime import timedelta
+        from django.utils import timezone
+
+        os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'archivebox.settings')
+        import django
+        django.setup()
+
+        from django.contrib.auth import get_user_model
+        from archivebox.api.models import APIToken
+
+        User = get_user_model()
+        user, _ = User.objects.get_or_create(
+            username='apitestadmin',
+            defaults={
+                'email': 'apitestadmin@example.com',
+                'is_staff': True,
+                'is_superuser': True,
+            },
+        )
+        user.is_staff = True
+        user.is_superuser = True
+        user.set_password('testpass123')
+        user.save()
+
+        token = APIToken.objects.create(
+            created_by=user,
+            expires=timezone.now() + timedelta(days=1),
+        )
+        print(token.token)
+        """
+    )
+    stdout, stderr, code = run_python_cwd(script, cwd=cwd, timeout=60)
+    assert code == 0, stderr
+    return stdout.strip().splitlines()[-1]
+
+
+@pytest.mark.timeout(180)
+def test_server_processes_due_cli_schedule_and_saves_real_content(tmp_path, recursive_test_site):
+    os.chdir(tmp_path)
+    init_archive(tmp_path)
+
+    port = get_free_port()
+    env = build_test_env(port)
+
+    schedule_result = subprocess.run(
+        [sys.executable, '-m', 'archivebox', 'schedule', '--every=daily', '--depth=0', recursive_test_site['root_url']],
+        cwd=tmp_path,
+        capture_output=True,
+        text=True,
+        env=env,
+        timeout=60,
+    )
+    assert schedule_result.returncode == 0, schedule_result.stderr
+    assert 'Created scheduled crawl' in schedule_result.stdout
+
+    make_latest_schedule_due(tmp_path)
+
+    try:
+        start_server(tmp_path, env=env, port=port)
+        wait_for_http(port, host=f'web.archivebox.localhost:{port}')
+        captured_text = wait_for_snapshot_capture(tmp_path, recursive_test_site['root_url'], timeout=180)
+        assert 'Root' in captured_text
+        assert 'About' in captured_text
+    finally:
+        stop_server(tmp_path)
+
+
+@pytest.mark.timeout(180)
+def test_archivebox_add_remains_one_shot_even_when_schedule_is_due(tmp_path, recursive_test_site):
+    os.chdir(tmp_path)
+    init_archive(tmp_path)
+
+    port = get_free_port()
+    env = build_test_env(port)
+    scheduled_url = recursive_test_site['root_url']
+    one_shot_url = recursive_test_site['child_urls'][0]
+
+    schedule_result = subprocess.run(
+        [sys.executable, '-m', 'archivebox', 'schedule', '--every=daily', '--depth=0', scheduled_url],
+        cwd=tmp_path,
+        capture_output=True,
+        text=True,
+        env=env,
+        timeout=60,
+    )
+    assert schedule_result.returncode == 0, schedule_result.stderr
+
+    make_latest_schedule_due(tmp_path)
+
+    add_result = subprocess.run(
+        [sys.executable, '-m', 'archivebox', 'add', '--depth=0', '--plugins=wget', one_shot_url],
+        cwd=tmp_path,
+        capture_output=True,
+        text=True,
+        env=env,
+        timeout=120,
+    )
+    assert add_result.returncode == 0, add_result.stderr
+    captured_text = wait_for_snapshot_capture(tmp_path, one_shot_url, timeout=120)
+    assert 'Deep About' in captured_text or 'About' in captured_text
+
+    scheduled_snapshots, one_shot_snapshots, scheduled_crawls = get_counts(tmp_path, scheduled_url, one_shot_url)
+    assert one_shot_snapshots >= 1
+    assert scheduled_snapshots == 0
+    assert scheduled_crawls == 1  # template only, no materialized scheduled run
+
+
+@pytest.mark.timeout(180)
+def test_schedule_rest_api_works_over_running_server(tmp_path, recursive_test_site):
+    os.chdir(tmp_path)
+    init_archive(tmp_path)
+
+    port = get_free_port()
+    env = build_test_env(port)
+    api_token = create_admin_and_token(tmp_path)
+
+    try:
+        start_server(tmp_path, env=env, port=port)
+        wait_for_http(port, host=f'api.archivebox.localhost:{port}', path='/api/v1/docs')
+
+        response = requests.post(
+            f'http://127.0.0.1:{port}/api/v1/cli/schedule',
+            headers={
+                'Host': f'api.archivebox.localhost:{port}',
+                'X-ArchiveBox-API-Key': api_token,
+            },
+            json={
+                'every': 'daily',
+                'import_path': recursive_test_site['root_url'],
+                'quiet': True,
+            },
+            timeout=10,
+        )
+
+        assert response.status_code == 200, response.text
+        payload = response.json()
+        assert payload['success'] is True
+        assert payload['result_format'] == 'json'
+        assert len(payload['result']['created_schedule_ids']) == 1
+    finally:
+        stop_server(tmp_path)
+
+
+@pytest.mark.timeout(180)
+def test_schedule_web_ui_post_works_over_running_server(tmp_path, recursive_test_site):
+    os.chdir(tmp_path)
+    init_archive(tmp_path)
+
+    port = get_free_port()
+    env = build_test_env(port, PUBLIC_ADD_VIEW='True')
+
+    try:
+        start_server(tmp_path, env=env, port=port)
+        wait_for_http(port, host=f'web.archivebox.localhost:{port}', path='/add/')
+
+        response = requests.post(
+            f'http://127.0.0.1:{port}/add/',
+            headers={'Host': f'web.archivebox.localhost:{port}'},
+            data={
+                'url': recursive_test_site['root_url'],
+                'depth': '0',
+                'schedule': 'daily',
+                'tag': 'web-ui',
+                'notes': 'created from web ui',
+            },
+            timeout=10,
+            allow_redirects=False,
+        )
+
+        assert response.status_code in (302, 303), response.text
+
+        conn = sqlite3.connect(tmp_path / 'index.sqlite3')
+        try:
+            row = conn.execute(
+                """
+                SELECT cs.schedule, c.urls, c.tags_str
+                FROM crawls_crawlschedule cs
+                JOIN crawls_crawl c ON c.schedule_id = cs.id
+                ORDER BY cs.created_at DESC
+                LIMIT 1
+                """
+            ).fetchone()
+        finally:
+            conn.close()
+
+        assert row == ('daily', recursive_test_site['root_url'], 'web-ui')
+    finally:
+        stop_server(tmp_path)
--- a/archivebox/tests/test_search.py
+++ b/archivebox/tests/test_search.py
@@ -3,12 +3,9 @@

 import os
 import subprocess
-import sqlite3
-import json

 import pytest

-from .fixtures import process, disable_extractors_dict


 def test_search_returns_snapshots(tmp_path, process, disable_extractors_dict):
--- a/archivebox/tests/test_snapshot.py
+++ b/archivebox/tests/test_snapshot.py
@@ -6,13 +6,11 @@ import subprocess
 import sqlite3
 from archivebox.machine.models import Process
 from datetime import datetime
-from pathlib import Path
 from urllib.parse import urlparse
 import uuid

 import pytest

-from .fixtures import process, disable_extractors_dict


 def test_snapshot_creates_snapshot_with_correct_url(tmp_path, process, disable_extractors_dict):
@@ -46,9 +44,7 @@ def test_snapshot_creates_snapshot_with_correct_url(tmp_path, process, disable_e

    snapshot_id_raw, snapshot_created_at, snapshot_url, crawl_id = snapshot_row
    snapshot_id = str(uuid.UUID(snapshot_id_raw))
-    crawl_id, crawl_created_at, crawl_urls, crawl_created_by_id = crawl_row
    username = user_row[0]
-    crawl_date_str = datetime.fromisoformat(crawl_created_at).strftime('%Y%m%d')
    snapshot_date_str = datetime.fromisoformat(snapshot_created_at).strftime('%Y%m%d')
    domain = urlparse(snapshot_url).hostname or 'unknown'

--- a/Show More
+++ b/Show More