bump package versions

2026-04-06 07:47:53 +10:00 · 2026-03-15 20:47:28 -07:00
parent bc21d4bfdb
commit 9de084da65
32 changed files with 469 additions and 711 deletions
--- a/archivebox/base_models/models.py
+++ b/archivebox/base_models/models.py
@@ -69,7 +69,7 @@ class ModelWithNotes(models.Model):
    """Mixin for models with a notes field."""
    notes = models.TextField(blank=True, null=False, default='')

-    class Meta:
+    class Meta(TypedModelMeta):
        abstract = True


@@ -78,7 +78,7 @@ class ModelWithHealthStats(models.Model):
    num_uses_failed = models.PositiveIntegerField(default=0)
    num_uses_succeeded = models.PositiveIntegerField(default=0)

-    class Meta:
+    class Meta(TypedModelMeta):
        abstract = True

    @property
@@ -96,7 +96,7 @@ class ModelWithConfig(models.Model):
    """Mixin for models with a JSON config field."""
    config = models.JSONField(default=dict, null=True, blank=True, editable=True)

-    class Meta:
+    class Meta(TypedModelMeta):
        abstract = True


--- a/archivebox/cli/archivebox_pluginmap.py
+++ b/archivebox/cli/archivebox_pluginmap.py
@@ -297,6 +297,7 @@ def pluginmap(
        if not quiet:
            # Show diagram if this model has one
            if info.get('diagram'):
+                assert info['diagram'] is not None
                prnt(Panel(
                    info['diagram'],
                    title=f'[bold green]{info["machine"]}[/bold green]',
--- a/archivebox/cli/archivebox_process.py
+++ b/archivebox/cli/archivebox_process.py
@@ -69,8 +69,8 @@ def list_processes(
    for process in queryset:
        if is_tty:
            binary_name_str = process.binary.name if process.binary else 'unknown'
-            exit_code = process.returncode if process.returncode is not None else '?'
-            status_color = 'green' if process.returncode == 0 else 'red' if process.returncode else 'yellow'
+            exit_code = process.exit_code if process.exit_code is not None else '?'
+            status_color = 'green' if process.exit_code == 0 else 'red' if process.exit_code else 'yellow'
            rprint(f'[{status_color}]exit={exit_code:3}[/{status_color}] [cyan]{binary_name_str:15}[/cyan] [dim]{process.id}[/dim]')
        else:
            write_record(process.to_json())
--- a/archivebox/cli/archivebox_search.py
+++ b/archivebox/cli/archivebox_search.py
@@ -208,7 +208,7 @@ def search(filter_patterns: list[str] | None=None,
    else:
        from archivebox.misc.logging_util import printable_folders
        # Convert to dict for printable_folders
-        folders: dict[str, Snapshot | None] = {snapshot.output_dir: snapshot for snapshot in snapshots}
+        folders: dict[str, Snapshot | None] = {str(snapshot.output_dir): snapshot for snapshot in snapshots}
        output = printable_folders(folders, with_headers)

    # Structured exports must be written directly to stdout.
--- a/archivebox/cli/tests.py
+++ b/archivebox/cli/tests.py
@@ -11,6 +11,8 @@ import unittest
 from contextlib import contextmanager
 from pathlib import Path

+from archivebox.config.constants import CONSTANTS
+
 TEST_CONFIG = {
    'USE_COLOR': 'False',
    'SHOW_PROGRESS': 'False',
@@ -31,10 +33,9 @@ DATA_DIR = 'data.tests'
 os.environ.update(TEST_CONFIG)

 init = importlib.import_module('archivebox.main').init
-constants = importlib.import_module('archivebox.config.constants')
-SQL_INDEX_FILENAME = constants.SQL_INDEX_FILENAME
-JSON_INDEX_FILENAME = constants.JSON_INDEX_FILENAME
-HTML_INDEX_FILENAME = constants.HTML_INDEX_FILENAME
+SQL_INDEX_FILENAME = CONSTANTS.SQL_INDEX_FILENAME
+JSON_INDEX_FILENAME = CONSTANTS.JSON_INDEX_FILENAME
+HTML_INDEX_FILENAME = CONSTANTS.HTML_INDEX_FILENAME
 archivebox_init = importlib.import_module('archivebox.cli.archivebox_init')
 archivebox_add = importlib.import_module('archivebox.cli.archivebox_add')
 archivebox_remove = importlib.import_module('archivebox.cli.archivebox_remove')
--- a/archivebox/cli/tests_piping.py
+++ b/archivebox/cli/tests_piping.py
@@ -68,6 +68,15 @@ def require(value: T | None) -> T:
    return value


+class MockTTYStringIO(StringIO):
+    def __init__(self, initial_value: str = '', *, is_tty: bool):
+        super().__init__(initial_value)
+        self._is_tty = is_tty
+
+    def isatty(self) -> bool:
+        return self._is_tty
+
+
 # =============================================================================
 # JSONL Utility Tests
 # =============================================================================
@@ -176,10 +185,7 @@ class TestReadArgsOrStdin(unittest.TestCase):
        from archivebox.misc.jsonl import read_args_or_stdin

        stdin_content = 'https://example1.com\nhttps://example2.com\n'
-        stream = StringIO(stdin_content)
-
-        # Mock isatty to return False (simulating piped input)
-        stream.isatty = lambda: False
+        stream = MockTTYStringIO(stdin_content, is_tty=False)

        records = list(read_args_or_stdin((), stream=stream))

@@ -192,8 +198,7 @@ class TestReadArgsOrStdin(unittest.TestCase):
        from archivebox.misc.jsonl import read_args_or_stdin

        stdin_content = '{"type": "Snapshot", "url": "https://example.com", "tags": "test"}\n'
-        stream = StringIO(stdin_content)
-        stream.isatty = lambda: False
+        stream = MockTTYStringIO(stdin_content, is_tty=False)

        records = list(read_args_or_stdin((), stream=stream))

@@ -206,8 +211,7 @@ class TestReadArgsOrStdin(unittest.TestCase):
        from archivebox.misc.jsonl import read_args_or_stdin, TYPE_CRAWL

        stdin_content = '{"type": "Crawl", "id": "abc123", "urls": "https://example.com\\nhttps://foo.com"}\n'
-        stream = StringIO(stdin_content)
-        stream.isatty = lambda: False
+        stream = MockTTYStringIO(stdin_content, is_tty=False)

        records = list(read_args_or_stdin((), stream=stream))

@@ -219,8 +223,7 @@ class TestReadArgsOrStdin(unittest.TestCase):
        """Should not read from TTY stdin (would block)."""
        from archivebox.misc.jsonl import read_args_or_stdin

-        stream = StringIO('https://example.com')
-        stream.isatty = lambda: True  # Simulate TTY
+        stream = MockTTYStringIO('https://example.com', is_tty=True)

        records = list(read_args_or_stdin((), stream=stream))
        self.assertEqual(len(records), 0)
@@ -297,8 +300,7 @@ class TestSnapshotCommand(unittest.TestCase):
        """snapshot should accept Crawl JSONL as input."""
        from archivebox.misc.jsonl import read_args_or_stdin, TYPE_CRAWL

-        stdin = StringIO('{"type": "Crawl", "id": "abc123", "urls": "https://example.com"}\n')
-        stdin.isatty = lambda: False
+        stdin = MockTTYStringIO('{"type": "Crawl", "id": "abc123", "urls": "https://example.com"}\n', is_tty=False)

        records = list(read_args_or_stdin((), stream=stdin))

@@ -311,8 +313,7 @@ class TestSnapshotCommand(unittest.TestCase):
        """snapshot should accept JSONL with tags and other metadata."""
        from archivebox.misc.jsonl import read_args_or_stdin

-        stdin = StringIO('{"type": "Snapshot", "url": "https://example.com", "tags": "tag1,tag2", "title": "Test"}\n')
-        stdin.isatty = lambda: False
+        stdin = MockTTYStringIO('{"type": "Snapshot", "url": "https://example.com", "tags": "tag1,tag2", "title": "Test"}\n', is_tty=False)

        records = list(read_args_or_stdin((), stream=stdin))

@@ -353,8 +354,7 @@ class TestArchiveResultCommand(unittest.TestCase):
        """archiveresult should accept JSONL Snapshot records."""
        from archivebox.misc.jsonl import read_args_or_stdin, TYPE_SNAPSHOT

-        stdin = StringIO('{"type": "Snapshot", "id": "abc123", "url": "https://example.com"}\n')
-        stdin.isatty = lambda: False
+        stdin = MockTTYStringIO('{"type": "Snapshot", "id": "abc123", "url": "https://example.com"}\n', is_tty=False)

        records = list(read_args_or_stdin((), stream=stdin))

@@ -461,395 +461,6 @@ class TestURLCollection(unittest.TestCase):
        self.assertEqual(len(urls), 0)


-# =============================================================================
-# Integration Tests
-# =============================================================================
-
-class TestPipingWorkflowIntegration(unittest.TestCase):
-    """
-    Integration tests for the complete piping workflow.
-
-    These tests require Django to be set up and use the actual database.
-    """
-
-    @classmethod
-    def setUpClass(cls):
-        """Set up Django and test database."""
-        cls.test_dir = tempfile.mkdtemp()
-        os.environ['DATA_DIR'] = cls.test_dir
-
-        # Initialize Django
-        from archivebox.config.django import setup_django
-        setup_django()
-
-        # Initialize the archive
-        from archivebox.cli.archivebox_init import init
-        init()
-
-    @classmethod
-    def tearDownClass(cls):
-        """Clean up test database."""
-        shutil.rmtree(cls.test_dir, ignore_errors=True)
-
-    def test_crawl_creates_and_outputs_jsonl(self):
-        """
-        Test: archivebox crawl URL1 URL2 URL3
-        Should create a single Crawl with all URLs and output JSONL when piped.
-        """
-        from archivebox.crawls.models import Crawl
-        from archivebox.misc.jsonl import TYPE_CRAWL
-        from archivebox.base_models.models import get_or_create_system_user_pk
-
-        created_by_id = get_or_create_system_user_pk()
-
-        # Create crawl with multiple URLs (as newline-separated string)
-        urls = 'https://test-crawl-1.example.com\nhttps://test-crawl-2.example.com'
-        crawl = require(Crawl.from_json({'urls': urls}, overrides={'created_by_id': created_by_id}))
-        self.assertIsNotNone(crawl.id)
-        self.assertEqual(crawl.urls, urls)
-        self.assertEqual(crawl.status, 'queued')
-
-        # Verify URLs list
-        urls_list = crawl.get_urls_list()
-        self.assertEqual(len(urls_list), 2)
-        self.assertIn('https://test-crawl-1.example.com', urls_list)
-        self.assertIn('https://test-crawl-2.example.com', urls_list)
-
-        # Verify output format
-        output = crawl.to_json()
-        self.assertEqual(output['type'], TYPE_CRAWL)
-        self.assertIn('id', output)
-        self.assertEqual(output['urls'], urls)
-        self.assertIn('schema_version', output)
-
-    def test_snapshot_accepts_crawl_jsonl(self):
-        """
-        Test: archivebox crawl URL | archivebox snapshot
-        Snapshot should accept Crawl JSONL and create Snapshots for each URL.
-        """
-        from archivebox.crawls.models import Crawl
-        from archivebox.core.models import Snapshot
-        from archivebox.misc.jsonl import (
-            read_args_or_stdin,
-            TYPE_CRAWL, TYPE_SNAPSHOT
-        )
-        from archivebox.base_models.models import get_or_create_system_user_pk
-
-        created_by_id = get_or_create_system_user_pk()
-
-        # Step 1: Create crawl (simulating 'archivebox crawl')
-        urls = 'https://crawl-to-snap-1.example.com\nhttps://crawl-to-snap-2.example.com'
-        crawl = require(Crawl.from_json({'urls': urls}, overrides={'created_by_id': created_by_id}))
-        crawl_output = crawl.to_json()
-
-        # Step 2: Parse crawl output as snapshot input
-        stdin = StringIO(json.dumps(crawl_output) + '\n')
-        stdin.isatty = lambda: False
-
-        records = list(read_args_or_stdin((), stream=stdin))
-
-        self.assertEqual(len(records), 1)
-        self.assertEqual(records[0]['type'], TYPE_CRAWL)
-
-        # Step 3: Create snapshots from crawl URLs
-        created_snapshots = []
-        for url in crawl.get_urls_list():
-            snapshot = Snapshot.from_json({'url': url}, overrides={'created_by_id': created_by_id})
-            if snapshot:
-                created_snapshots.append(snapshot)
-
-        self.assertEqual(len(created_snapshots), 2)
-
-        # Verify snapshot output
-        for snapshot in created_snapshots:
-            output = snapshot.to_json()
-            self.assertEqual(output['type'], TYPE_SNAPSHOT)
-            self.assertIn(output['url'], [
-                'https://crawl-to-snap-1.example.com',
-                'https://crawl-to-snap-2.example.com'
-            ])
-
-    def test_snapshot_creates_and_outputs_jsonl(self):
-        """
-        Test: archivebox snapshot URL
-        Should create a Snapshot and output JSONL when piped.
-        """
-        from archivebox.core.models import Snapshot
-        from archivebox.misc.jsonl import (
-            read_args_or_stdin, TYPE_SNAPSHOT
-        )
-        from archivebox.base_models.models import get_or_create_system_user_pk
-
-        created_by_id = get_or_create_system_user_pk()
-
-        # Simulate input
-        url = 'https://test-snapshot-1.example.com'
-        records = list(read_args_or_stdin((url,)))
-
-        self.assertEqual(len(records), 1)
-        self.assertEqual(records[0]['url'], url)
-
-        # Create snapshot
-        overrides = {'created_by_id': created_by_id}
-        snapshot = require(Snapshot.from_json(records[0], overrides=overrides))
-
-        self.assertIsNotNone(snapshot.id)
-        self.assertEqual(snapshot.url, url)
-
-        # Verify output format
-        output = snapshot.to_json()
-        self.assertEqual(output['type'], TYPE_SNAPSHOT)
-        self.assertIn('id', output)
-        self.assertEqual(output['url'], url)
-
-    def test_extract_accepts_snapshot_from_previous_command(self):
-        """
-        Test: archivebox snapshot URL | archivebox extract
-        Extract should accept JSONL output from snapshot command.
-        """
-        from archivebox.core.models import Snapshot
-        from archivebox.misc.jsonl import (
-            read_args_or_stdin,
-            TYPE_SNAPSHOT
-        )
-        from archivebox.base_models.models import get_or_create_system_user_pk
-
-        created_by_id = get_or_create_system_user_pk()
-
-        # Step 1: Create snapshot (simulating 'archivebox snapshot')
-        url = 'https://test-extract-1.example.com'
-        overrides = {'created_by_id': created_by_id}
-        snapshot = require(Snapshot.from_json({'url': url}, overrides=overrides))
-        snapshot_output = snapshot.to_json()
-
-        # Step 2: Parse snapshot output as extract input
-        stdin = StringIO(json.dumps(snapshot_output) + '\n')
-        stdin.isatty = lambda: False
-
-        records = list(read_args_or_stdin((), stream=stdin))
-
-        self.assertEqual(len(records), 1)
-        self.assertEqual(records[0]['type'], TYPE_SNAPSHOT)
-        self.assertEqual(records[0]['id'], str(snapshot.id))
-
-        # Step 3: Gather snapshot IDs (as extract does)
-        snapshot_ids = set()
-        for record in records:
-            if record.get('type') == TYPE_SNAPSHOT and record.get('id'):
-                snapshot_ids.add(record['id'])
-
-        self.assertIn(str(snapshot.id), snapshot_ids)
-
-    def test_full_pipeline_crawl_snapshot_extract(self):
-        """
-        Test: archivebox crawl URL | archivebox snapshot | archivebox extract
-
-        This is equivalent to: archivebox add --depth=0 URL
-        """
-        from archivebox.crawls.models import Crawl
-        from archivebox.core.models import Snapshot
-        from archivebox.misc.jsonl import (
-            read_args_or_stdin,
-            TYPE_CRAWL, TYPE_SNAPSHOT
-        )
-        from archivebox.base_models.models import get_or_create_system_user_pk
-
-        created_by_id = get_or_create_system_user_pk()
-
-        # === archivebox crawl https://example.com ===
-        url = 'https://test-pipeline-full.example.com'
-        crawl = require(Crawl.from_json({'url': url}, overrides={'created_by_id': created_by_id}))
-        crawl_jsonl = json.dumps(crawl.to_json())
-
-        # === | archivebox snapshot ===
-        stdin = StringIO(crawl_jsonl + '\n')
-        stdin.isatty = lambda: False
-
-        records = list(read_args_or_stdin((), stream=stdin))
-        self.assertEqual(len(records), 1)
-        self.assertEqual(records[0]['type'], TYPE_CRAWL)
-
-        # Create snapshots from crawl
-        created_snapshots = []
-        for record in records:
-            if record.get('type') == TYPE_CRAWL:
-                crawl_id = record.get('id')
-                if crawl_id:
-                    db_crawl = Crawl.objects.get(id=crawl_id)
-                    for crawl_url in db_crawl.get_urls_list():
-                        snapshot = Snapshot.from_json({'url': crawl_url}, overrides={'created_by_id': created_by_id})
-                        if snapshot:
-                            created_snapshots.append(snapshot)
-
-        self.assertEqual(len(created_snapshots), 1)
-        self.assertEqual(created_snapshots[0].url, url)
-
-        # === | archivebox extract ===
-        snapshot_jsonl_lines = [json.dumps(s.to_json()) for s in created_snapshots]
-        stdin = StringIO('\n'.join(snapshot_jsonl_lines) + '\n')
-        stdin.isatty = lambda: False
-
-        records = list(read_args_or_stdin((), stream=stdin))
-        self.assertEqual(len(records), 1)
-        self.assertEqual(records[0]['type'], TYPE_SNAPSHOT)
-        self.assertEqual(records[0]['id'], str(created_snapshots[0].id))
-
-
-class TestDepthWorkflows(unittest.TestCase):
-    """Test various depth crawl workflows."""
-
-    @classmethod
-    def setUpClass(cls):
-        """Set up Django and test database."""
-        cls.test_dir = tempfile.mkdtemp()
-        os.environ['DATA_DIR'] = cls.test_dir
-
-        from archivebox.config.django import setup_django
-        setup_django()
-
-        from archivebox.cli.archivebox_init import init
-        init()
-
-    @classmethod
-    def tearDownClass(cls):
-        """Clean up test database."""
-        shutil.rmtree(cls.test_dir, ignore_errors=True)
-
-    def test_depth_0_workflow(self):
-        """
-        Test: archivebox crawl URL | archivebox snapshot | archivebox extract
-
-        Depth 0: Only archive the specified URL, no recursive crawling.
-        """
-        from archivebox.crawls.models import Crawl
-        from archivebox.core.models import Snapshot
-        from archivebox.base_models.models import get_or_create_system_user_pk
-
-        created_by_id = get_or_create_system_user_pk()
-
-        # Create crawl with depth 0
-        url = 'https://depth0-test.example.com'
-        crawl = require(Crawl.from_json({'url': url, 'max_depth': 0}, overrides={'created_by_id': created_by_id}))
-
-        self.assertEqual(crawl.max_depth, 0)
-
-        # Create snapshot
-        snapshot = require(Snapshot.from_json({'url': url}, overrides={'created_by_id': created_by_id}))
-        self.assertEqual(snapshot.url, url)
-
-    def test_depth_metadata_in_crawl(self):
-        """Test that depth metadata is stored in Crawl."""
-        from archivebox.crawls.models import Crawl
-        from archivebox.base_models.models import get_or_create_system_user_pk
-
-        created_by_id = get_or_create_system_user_pk()
-
-        # Create crawl with depth
-        crawl = require(Crawl.from_json(
-            {'url': 'https://depth-meta-test.example.com', 'max_depth': 2},
-            overrides={'created_by_id': created_by_id}
-        ))
-
-        self.assertEqual(crawl.max_depth, 2)
-
-        # Verify in JSONL output
-        output = crawl.to_json()
-        self.assertEqual(output['max_depth'], 2)
-
-
-class TestParserPluginWorkflows(unittest.TestCase):
-    """Test workflows with specific parser plugins."""
-
-    @classmethod
-    def setUpClass(cls):
-        """Set up Django and test database."""
-        cls.test_dir = tempfile.mkdtemp()
-        os.environ['DATA_DIR'] = cls.test_dir
-
-        from archivebox.config.django import setup_django
-        setup_django()
-
-        from archivebox.cli.archivebox_init import init
-        init()
-
-    @classmethod
-    def tearDownClass(cls):
-        """Clean up test database."""
-        shutil.rmtree(cls.test_dir, ignore_errors=True)
-
-    def test_html_parser_workflow(self):
-        """
-        Test: archivebox crawl --plugin=parse_html_urls URL | archivebox snapshot | archivebox extract
-        """
-        from archivebox.hooks import collect_urls_from_plugins
-
-        # Create mock output directory
-        snapshot_dir = Path(self.test_dir) / 'archive' / 'html-parser-test'
-        snapshot_dir.mkdir(parents=True, exist_ok=True)
-        (snapshot_dir / 'parse_html_urls').mkdir(exist_ok=True)
-        (snapshot_dir / 'parse_html_urls' / 'urls.jsonl').write_text(
-            '{"url": "https://html-discovered.com", "title": "HTML Link"}\n'
-        )
-
-        # Collect URLs
-        discovered = collect_urls_from_plugins(snapshot_dir)
-
-        self.assertEqual(len(discovered), 1)
-        self.assertEqual(discovered[0]['url'], 'https://html-discovered.com')
-        self.assertEqual(discovered[0]['plugin'], 'parse_html_urls')
-
-    def test_rss_parser_workflow(self):
-        """
-        Test: archivebox crawl --plugin=parse_rss_urls URL | archivebox snapshot | archivebox extract
-        """
-        from archivebox.hooks import collect_urls_from_plugins
-
-        # Create mock output directory
-        snapshot_dir = Path(self.test_dir) / 'archive' / 'rss-parser-test'
-        snapshot_dir.mkdir(parents=True, exist_ok=True)
-        (snapshot_dir / 'parse_rss_urls').mkdir(exist_ok=True)
-        (snapshot_dir / 'parse_rss_urls' / 'urls.jsonl').write_text(
-            '{"url": "https://rss-item-1.com", "title": "RSS Item 1"}\n'
-            '{"url": "https://rss-item-2.com", "title": "RSS Item 2"}\n'
-        )
-
-        # Collect URLs
-        discovered = collect_urls_from_plugins(snapshot_dir)
-
-        self.assertEqual(len(discovered), 2)
-        self.assertTrue(all(d['plugin'] == 'parse_rss_urls' for d in discovered))
-
-    def test_multiple_parsers_dedupe(self):
-        """
-        Multiple parsers may discover the same URL - should be deduplicated.
-        """
-        from archivebox.hooks import collect_urls_from_plugins
-
-        # Create mock output with duplicate URLs from different parsers
-        snapshot_dir = Path(self.test_dir) / 'archive' / 'dedupe-test'
-        snapshot_dir.mkdir(parents=True, exist_ok=True)
-
-        (snapshot_dir / 'parse_html_urls').mkdir(exist_ok=True)
-        (snapshot_dir / 'parse_html_urls' / 'urls.jsonl').write_text(
-            '{"url": "https://same-url.com"}\n'
-        )
-
-        (snapshot_dir / 'wget').mkdir(exist_ok=True)
-        (snapshot_dir / 'wget' / 'urls.jsonl').write_text(
-            '{"url": "https://same-url.com"}\n'  # Same URL, different extractor
-        )
-
-        # Collect URLs
-        all_discovered = collect_urls_from_plugins(snapshot_dir)
-
-        # Both entries are returned (deduplication happens at the crawl command level)
-        self.assertEqual(len(all_discovered), 2)
-
-        # Verify both extractors found the same URL
-        urls = {d['url'] for d in all_discovered}
-        self.assertEqual(urls, {'https://same-url.com'})
-
-
 class TestEdgeCases(unittest.TestCase):
    """Test edge cases and error handling."""

@@ -858,8 +469,7 @@ class TestEdgeCases(unittest.TestCase):
        from archivebox.misc.jsonl import read_args_or_stdin

        # Empty args, TTY stdin (should not block)
-        stdin = StringIO('')
-        stdin.isatty = lambda: True
+        stdin = MockTTYStringIO('', is_tty=True)

        records = list(read_args_or_stdin((), stream=stdin))
        self.assertEqual(len(records), 0)
@@ -868,12 +478,12 @@ class TestEdgeCases(unittest.TestCase):
        """Should skip malformed JSONL lines."""
        from archivebox.misc.jsonl import read_args_or_stdin

-        stdin = StringIO(
+        stdin = MockTTYStringIO(
            '{"url": "https://good.com"}\n'
            'not valid json\n'
-            '{"url": "https://also-good.com"}\n'
+            '{"url": "https://also-good.com"}\n',
+            is_tty=False,
        )
-        stdin.isatty = lambda: False

        records = list(read_args_or_stdin((), stream=stdin))

@@ -885,12 +495,12 @@ class TestEdgeCases(unittest.TestCase):
        """Should handle mixed URLs and JSONL."""
        from archivebox.misc.jsonl import read_args_or_stdin

-        stdin = StringIO(
+        stdin = MockTTYStringIO(
            'https://plain-url.com\n'
            '{"type": "Snapshot", "url": "https://jsonl-url.com", "tags": "test"}\n'
-            '01234567-89ab-cdef-0123-456789abcdef\n'  # UUID
+            '01234567-89ab-cdef-0123-456789abcdef\n',  # UUID
+            is_tty=False,
        )
-        stdin.isatty = lambda: False

        records = list(read_args_or_stdin((), stream=stdin))

@@ -942,12 +552,12 @@ class TestPassThroughBehavior(unittest.TestCase):
        url_record = {'url': 'https://example.com'}

        # Mock stdin with both records
-        stdin = StringIO(
+        stdin = MockTTYStringIO(
            json.dumps(tag_record)
            + '\n'
-            + json.dumps(url_record)
+            + json.dumps(url_record),
+            is_tty=False,
        )
-        stdin.isatty = lambda: False

        # The Tag should be passed through, the URL should create a Crawl
        # (This is a unit test of the pass-through logic)
--- a/archivebox/config/permissions.py
+++ b/archivebox/config/permissions.py
@@ -5,6 +5,7 @@ import pwd
 import sys
 import socket
 import platform
+from typing import cast

 from rich import print

@@ -32,7 +33,7 @@ EGID                    = os.getegid()
 SUDO_UID                = int(os.environ.get('SUDO_UID', 0))
 SUDO_GID                = int(os.environ.get('SUDO_GID', 0))
 USER: str               = Path('~').expanduser().resolve().name
-HOSTNAME: str           = max([socket.gethostname(), platform.node()], key=len)
+HOSTNAME: str           = cast(str, max([socket.gethostname(), platform.node()], key=len))

 IS_ROOT = RUNNING_AS_UID == 0
 IN_DOCKER = os.environ.get('IN_DOCKER', False) in ('1', 'true', 'True', 'TRUE', 'yes')
--- a/archivebox/config/views.py
+++ b/archivebox/config/views.py
@@ -33,6 +33,11 @@ def is_superuser(request: HttpRequest) -> bool:
    return bool(getattr(request.user, 'is_superuser', False))


+def format_parsed_datetime(value: object) -> str:
+    parsed = parse_date(value)
+    return parsed.strftime("%Y-%m-%d %H:%M:%S") if parsed else ""
+
+
 def obj_to_yaml(obj: Any, indent: int = 0) -> str:
    indent_str = "  " * indent
    if indent == 0:
@@ -412,7 +417,7 @@ def worker_list_view(request: HttpRequest, **kwargs) -> TableContext:
        rows["Name"].append(ItemLink(proc_name, key=proc_name))
        rows["State"].append(str(proc_data.get("statename") or ""))
        rows['PID'].append(proc_description.replace('pid ', ''))
-        rows["Started"].append(parse_date(proc_start).strftime("%Y-%m-%d %H:%M:%S") if proc_start else '')
+        rows["Started"].append(format_parsed_datetime(proc_start))
        rows["Command"].append(str(proc_config.get("command") or ""))
        rows["Logfile"].append(
            format_html(
@@ -458,7 +463,8 @@ def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
        relevant_config = CONFIG_FILE.read_text()
        relevant_logs = str(supervisor.readLog(0, 10_000_000))
        start_ts = [line for line in relevant_logs.split("\n") if "RPC interface 'supervisor' initialized" in line][-1].split(",", 1)[0]
-        uptime = str(timezone.now() - parse_date(start_ts)).split(".")[0]
+        start_dt = parse_date(start_ts)
+        uptime = str(timezone.now() - start_dt).split(".")[0] if start_dt else ""
        supervisor_state = supervisor.getState()

        proc: Dict[str, object] = {
@@ -485,8 +491,8 @@ def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
            "Command": str(proc.get("name") or ""),
            "PID": str(proc.get("pid") or ""),
            "State": str(proc.get("statename") or ""),
-            "Started": parse_date(proc.get("start")).strftime("%Y-%m-%d %H:%M:%S") if proc.get("start") else "",
-            "Stopped": parse_date(proc.get("stop")).strftime("%Y-%m-%d %H:%M:%S") if proc.get("stop") else "",
+            "Started": format_parsed_datetime(proc.get("start")),
+            "Stopped": format_parsed_datetime(proc.get("stop")),
            "Exit Status": str(proc.get("exitstatus") or ""),
            "Logfile": str(proc.get("stdout_logfile") or ""),
            "Uptime": str(str(proc.get("description") or "").split("uptime ", 1)[-1]),
@@ -524,7 +530,7 @@ def log_list_view(request: HttpRequest, **kwargs) -> TableContext:
    for logfile in log_files:
        st = logfile.stat()
        rows["Name"].append(ItemLink("logs" + str(logfile).rsplit("/logs", 1)[-1], key=logfile.name))
-        rows["Last Updated"].append(parse_date(st.st_mtime).strftime("%Y-%m-%d %H:%M:%S"))
+        rows["Last Updated"].append(format_parsed_datetime(st.st_mtime))
        rows["Size"].append(f'{st.st_size//1000} kb')

        with open(logfile, 'rb') as f:
@@ -557,7 +563,7 @@ def log_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
        "fields": {
            "Path": str(log_file),
            "Size": f"{log_stat.st_size//1000} kb",
-            "Last Updated": parse_date(log_stat.st_mtime).strftime("%Y-%m-%d %H:%M:%S"),
+            "Last Updated": format_parsed_datetime(log_stat.st_mtime),
            "Tail": "\n".join(log_text[-10_000:].split("\n")[-20:]),
            "Full Log": log_text,
        },
--- a/archivebox/core/admin_site.py
+++ b/archivebox/core/admin_site.py
@@ -1,7 +1,20 @@
 __package__ = 'archivebox.core'

+from typing import TYPE_CHECKING, Any
+
 from django.contrib import admin
-from admin_data_views.admin import get_app_list, admin_data_index_view, get_admin_data_urls, get_urls
+from admin_data_views.admin import (
+    admin_data_index_view as adv_admin_data_index_view,
+    get_admin_data_urls as adv_get_admin_data_urls,
+    get_app_list as adv_get_app_list,
+)
+
+if TYPE_CHECKING:
+    from django.http import HttpRequest
+    from django.template.response import TemplateResponse
+    from django.urls import URLPattern, URLResolver
+
+    from admin_data_views.typing import AppDict


 class ArchiveBoxAdmin(admin.AdminSite):
@@ -10,6 +23,20 @@ class ArchiveBoxAdmin(admin.AdminSite):
    site_title = 'Admin'
    namespace = 'admin'

+    def get_app_list(self, request: 'HttpRequest', app_label: str | None = None) -> list['AppDict']:
+        if app_label is None:
+            return adv_get_app_list(self, request)
+        return adv_get_app_list(self, request, app_label)
+
+    def admin_data_index_view(self, request: 'HttpRequest', **kwargs: Any) -> 'TemplateResponse':
+        return adv_admin_data_index_view(self, request, **kwargs)
+
+    def get_admin_data_urls(self) -> list['URLResolver | URLPattern']:
+        return adv_get_admin_data_urls(self)
+
+    def get_urls(self) -> list['URLResolver | URLPattern']:
+        return self.get_admin_data_urls() + super().get_urls()
+

 archivebox_admin = ArchiveBoxAdmin()
 # Note: delete_selected is enabled per-model via actions = ['delete_selected'] in each ModelAdmin
@@ -17,13 +44,6 @@ archivebox_admin = ArchiveBoxAdmin()



-# patch admin with methods to add data views (implemented by admin_data_views package)
-# https://github.com/MrThearMan/django-admin-data-views
-# https://mrthearman.github.io/django-admin-data-views/setup/
-archivebox_admin.get_app_list = get_app_list.__get__(archivebox_admin, ArchiveBoxAdmin)
-archivebox_admin.admin_data_index_view = admin_data_index_view.__get__(archivebox_admin, ArchiveBoxAdmin)       # type: ignore
-archivebox_admin.get_admin_data_urls = get_admin_data_urls.__get__(archivebox_admin, ArchiveBoxAdmin)           # type: ignore
-archivebox_admin.get_urls = get_urls(archivebox_admin.get_urls).__get__(archivebox_admin, ArchiveBoxAdmin)
 ############### Admin Data View sections are defined in settings.ADMIN_DATA_VIEWS #########


--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -1,9 +1,9 @@
 __package__ = 'archivebox.core'

-from typing import Optional, Dict, Iterable, Any, List
+from typing import Optional, Dict, Iterable, Any, List, Sequence, cast
+import uuid
 from archivebox.uuid_compat import uuid7
 from datetime import datetime, timedelta
-from django_stubs_ext.db.models import TypedModelMeta

 import os
 import json
@@ -20,6 +20,7 @@ from django.core.cache import cache
 from django.urls import reverse_lazy
 from django.contrib import admin
 from django.conf import settings
+from django.utils.safestring import mark_safe

 from archivebox.config import CONSTANTS
 from archivebox.misc.system import get_dir_size, atomic_write
@@ -51,7 +52,7 @@ class Tag(ModelWithUUID):

    snapshot_set: models.Manager['Snapshot']

-    class Meta(TypedModelMeta):
+    class Meta(ModelWithUUID.Meta):
        app_label = 'core'
        verbose_name = "Tag"
        verbose_name_plural = "Tags"
@@ -88,7 +89,7 @@ class Tag(ModelWithUUID):

    @property
    def api_url(self) -> str:
-        return reverse_lazy('api-1:get_tag', args=[self.id])
+        return str(reverse_lazy('api-1:get_tag', args=[self.id]))

    def to_json(self) -> dict:
        """
@@ -104,7 +105,7 @@ class Tag(ModelWithUUID):
        }

    @staticmethod
-    def from_json(record: Dict[str, Any], overrides: Dict[str, Any] = None):
+    def from_json(record: Dict[str, Any], overrides: Dict[str, Any] | None = None):
        """
        Create/update Tag from JSON dict.

@@ -259,7 +260,7 @@ class SnapshotQuerySet(models.QuerySet):
        })


-class SnapshotManager(models.Manager.from_queryset(SnapshotQuerySet)):
+class SnapshotManager(models.Manager.from_queryset(SnapshotQuerySet)):  # ty: ignore[unsupported-base]
    """Manager for Snapshot model - uses SnapshotQuerySet for chainable methods"""

    def filter(self, *args, **kwargs):
@@ -283,8 +284,8 @@ class SnapshotManager(models.Manager.from_queryset(SnapshotQuerySet)):
        from django.db import transaction
        if atomic:
            with transaction.atomic():
-                return self.delete()
-        return self.delete()
+                return self.get_queryset().delete()
+        return self.get_queryset().delete()


 class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHealthStats, ModelWithStateMachine):
@@ -318,10 +319,20 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
    StatusChoices = ModelWithStateMachine.StatusChoices
    active_state = StatusChoices.STARTED

+    crawl_id: uuid.UUID
+    parent_snapshot_id: uuid.UUID | None
+    _prefetched_objects_cache: dict[str, Any]
+
    objects = SnapshotManager()
    archiveresult_set: models.Manager['ArchiveResult']

-    class Meta(TypedModelMeta):
+    class Meta(
+        ModelWithOutputDir.Meta,
+        ModelWithConfig.Meta,
+        ModelWithNotes.Meta,
+        ModelWithHealthStats.Meta,
+        ModelWithStateMachine.Meta,
+    ):
        app_label = 'core'
        verbose_name = "Snapshot"
        verbose_name_plural = "Snapshots"
@@ -663,6 +674,8 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
            candidates = cls.objects.filter(url=url, timestamp__startswith=timestamp)
            if candidates.count() == 1:
                snapshot = candidates.first()
+                if snapshot is None:
+                    return None
                print(f"[DEBUG load_from_directory] Found via fuzzy match: {snapshot.timestamp}")
                return snapshot
            elif candidates.count() > 1:
@@ -751,14 +764,16 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
        )

    @staticmethod
-    def _select_best_timestamp(index_timestamp: str, folder_name: str) -> Optional[str]:
+    def _select_best_timestamp(index_timestamp: object | None, folder_name: str) -> Optional[str]:
        """
        Select best timestamp from index.json vs folder name.

        Validates range (1995-2035).
        Prefers index.json if valid.
        """
-        def is_valid_timestamp(ts):
+        def is_valid_timestamp(ts: object | None) -> bool:
+            if not isinstance(ts, (str, int, float)):
+                return False
            try:
                ts_int = int(float(ts))
                # 1995-01-01 to 2035-12-31
@@ -769,12 +784,11 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
        index_valid = is_valid_timestamp(index_timestamp) if index_timestamp else False
        folder_valid = is_valid_timestamp(folder_name)

-        if index_valid:
-            return str(int(float(index_timestamp)))
-        elif folder_valid:
-            return str(int(float(folder_name)))
-        else:
-            return None
+        if index_valid and index_timestamp is not None:
+            return str(int(float(str(index_timestamp))))
+        if folder_valid:
+            return str(int(float(str(folder_name))))
+        return None

    @classmethod
    def _ensure_unique_timestamp(cls, url: str, timestamp: str) -> str:
@@ -1039,7 +1053,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
        )

        index_path = Path(self.output_dir) / CONSTANTS.JSONL_INDEX_FILENAME
-        result = {
+        result: dict[str, Any] = {
            'snapshot': None,
            'archive_results': [],
            'binaries': [],
@@ -1210,7 +1224,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
        return merged

    @classmethod
-    def _merge_snapshots(cls, snapshots: list['Snapshot']):
+    def _merge_snapshots(cls, snapshots: Sequence['Snapshot']):
        """
        Merge exact duplicates.
        Keep oldest, union files + ArchiveResults.
@@ -1271,19 +1285,21 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
    @admin.display(description='Tags')
    def tags_str(self, nocache=True) -> str | None:
        calc_tags_str = lambda: ','.join(sorted(tag.name for tag in self.tags.all()))
-        if hasattr(self, '_prefetched_objects_cache') and 'tags' in self._prefetched_objects_cache:
+        prefetched_cache = getattr(self, '_prefetched_objects_cache', {})
+        if 'tags' in prefetched_cache:
            return calc_tags_str()
        cache_key = f'{self.pk}-tags'
        return cache.get_or_set(cache_key, calc_tags_str) if not nocache else calc_tags_str()

    def icons(self, path: Optional[str] = None) -> str:
        """Generate HTML icons showing which extractor plugins have succeeded for this snapshot"""
-        from django.utils.html import format_html, mark_safe
+        from django.utils.html import format_html

        cache_key = f'result_icons:{self.pk}:{(self.downloaded_at or self.modified_at or self.created_at or self.bookmarked_at).timestamp()}'

        def calc_icons():
-            if hasattr(self, '_prefetched_objects_cache') and 'archiveresult_set' in self._prefetched_objects_cache:
+            prefetched_cache = getattr(self, '_prefetched_objects_cache', {})
+            if 'archiveresult_set' in prefetched_cache:
                archive_results = {r.plugin: r for r in self.archiveresult_set.all() if r.status == "succeeded" and (r.output_files or r.output_str)}
            else:
                # Filter for results that have either output_files or output_str
@@ -1331,7 +1347,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea

    @property
    def api_url(self) -> str:
-        return reverse_lazy('api-1:get_snapshot', args=[self.id])
+        return str(reverse_lazy('api-1:get_snapshot', args=[self.id]))

    def get_absolute_url(self):
        return f'/{self.archive_path}'
@@ -1341,23 +1357,28 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
        return url_domain(self.url)

    @property
-    def output_dir(self):
+    def title_stripped(self) -> str:
+        return (self.title or '').strip()
+
+    @property
+    def output_dir(self) -> Path:
        """The filesystem path to the snapshot's output directory."""
        import os

        current_path = self.get_storage_path_for_version(self.fs_version)

        if current_path.exists():
-            return str(current_path)
+            return current_path

        # Check for backwards-compat symlink
        old_path = CONSTANTS.ARCHIVE_DIR / self.timestamp
        if old_path.is_symlink():
-            return str(Path(os.readlink(old_path)).resolve())
+            link_target = Path(os.readlink(old_path))
+            return (old_path.parent / link_target).resolve() if not link_target.is_absolute() else link_target.resolve()
        elif old_path.exists():
-            return str(old_path)
+            return old_path

-        return str(current_path)
+        return current_path

    def ensure_legacy_archive_symlink(self) -> None:
        """Ensure the legacy archive/<timestamp> path resolves to this snapshot."""
@@ -1405,7 +1426,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
        date_base = crawl.created_at or self.created_at or timezone.now()
        date_str = date_base.strftime('%Y%m%d')
        domain = self.extract_domain_from_url(self.url)
-        username = crawl.created_by.username if crawl.created_by_id else 'system'
+        username = crawl.created_by.username if getattr(crawl, 'created_by_id', None) else 'system'

        crawl_dir = DATA_DIR / 'users' / username / 'crawls' / date_str / domain / str(crawl.id)
        link_path = crawl_dir / 'snapshots' / domain / str(self.id)
@@ -1591,7 +1612,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
        }

    @staticmethod
-    def from_json(record: Dict[str, Any], overrides: Dict[str, Any] = None, queue_for_extraction: bool = True):
+    def from_json(record: Dict[str, Any], overrides: Dict[str, Any] | None = None, queue_for_extraction: bool = True):
        """
        Create/update Snapshot from JSON dict.

@@ -1859,7 +1880,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
            'is_sealed': is_sealed,
        }

-    def retry_failed_archiveresults(self, retry_at: Optional['timezone.datetime'] = None) -> int:
+    def retry_failed_archiveresults(self, retry_at: Optional[datetime] = None) -> int:
        """
        Reset failed/skipped ArchiveResults to queued for retry.

@@ -2163,20 +2184,20 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
        cols = cols or ['timestamp', 'is_archived', 'url']
        return separator.join(to_json(data.get(col, ''), indent=None).ljust(ljust) for col in cols)

-    def write_json_details(self, out_dir: Optional[str] = None) -> None:
+    def write_json_details(self, out_dir: Path | str | None = None) -> None:
        """Write JSON index file for this snapshot to its output directory"""
-        out_dir = out_dir or self.output_dir
-        path = Path(out_dir) / CONSTANTS.JSON_INDEX_FILENAME
+        output_dir = Path(out_dir) if out_dir is not None else self.output_dir
+        path = output_dir / CONSTANTS.JSON_INDEX_FILENAME
        atomic_write(str(path), self.to_dict(extended=True))

-    def write_html_details(self, out_dir: Optional[str] = None) -> None:
+    def write_html_details(self, out_dir: Path | str | None = None) -> None:
        """Write HTML detail page for this snapshot to its output directory"""
        from django.template.loader import render_to_string
        from archivebox.config.common import SERVER_CONFIG
        from archivebox.config.configset import get_config
        from archivebox.misc.logging_util import printable_filesize

-        out_dir = out_dir or self.output_dir
+        output_dir = Path(out_dir) if out_dir is not None else self.output_dir
        config = get_config()
        SAVE_ARCHIVE_DOT_ORG = config.get('SAVE_ARCHIVE_DOT_ORG', True)
        TITLE_LOADING_MSG = 'Not yet archived...'
@@ -2198,12 +2219,12 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
        for plugin in preview_priority:
            out = outputs_by_plugin.get(plugin)
            if out and out.get('path'):
-                best_preview_path = out['path']
+                best_preview_path = str(out['path'])
                best_result = out
                break

        if best_preview_path == 'about:blank' and outputs:
-            best_preview_path = outputs[0].get('path') or 'about:blank'
+            best_preview_path = str(outputs[0].get('path') or 'about:blank')
            best_result = outputs[0]
        context = {
            **self.to_dict(extended=True),
@@ -2223,7 +2244,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
            'archiveresults': outputs,
        }
        rendered_html = render_to_string('snapshot.html', context)
-        atomic_write(str(Path(out_dir) / CONSTANTS.HTML_INDEX_FILENAME), rendered_html)
+        atomic_write(str(output_dir / CONSTANTS.HTML_INDEX_FILENAME), rendered_html)

    # =========================================================================
    # Helper Methods
@@ -2285,6 +2306,8 @@ class SnapshotMachine(BaseStateMachine):
    # Manual event (can also be triggered by last ArchiveResult finishing)
    seal = started.to(sealed)

+    snapshot: Snapshot
+
    def can_start(self) -> bool:
        can_start = bool(self.snapshot.url)
        return can_start
@@ -2332,7 +2355,7 @@ class SnapshotMachine(BaseStateMachine):
            if remaining_active == 0 and crawl.status == crawl.StatusChoices.STARTED:
                print(f'[cyan]🔒 All snapshots sealed for crawl {crawl.id}, sealing crawl[/cyan]', file=sys.stderr)
                # Seal the parent crawl
-                crawl.sm.seal()
+                cast(Any, crawl).sm.seal()


 class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithStateMachine):
@@ -2391,7 +2414,15 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
    state_field_name = 'status'
    active_state = StatusChoices.STARTED

-    class Meta(TypedModelMeta):
+    snapshot_id: uuid.UUID
+    process_id: uuid.UUID | None
+
+    class Meta(
+        ModelWithOutputDir.Meta,
+        ModelWithConfig.Meta,
+        ModelWithNotes.Meta,
+        ModelWithStateMachine.Meta,
+    ):
        app_label = 'core'
        verbose_name = 'Archive Result'
        verbose_name_plural = 'Archive Results Log'
@@ -2442,7 +2473,7 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
        return record

    @staticmethod
-    def from_json(record: Dict[str, Any], overrides: Dict[str, Any] = None):
+    def from_json(record: Dict[str, Any], overrides: Dict[str, Any] | None = None):
        """
        Create/update ArchiveResult from JSON dict.

@@ -2469,7 +2500,6 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi

        # Get or create by snapshot_id + plugin
        try:
-            from archivebox.core.models import Snapshot
            snapshot = Snapshot.objects.get(id=snapshot_id)

            result, _ = ArchiveResult.objects.get_or_create(
@@ -2531,7 +2561,7 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi

    @property
    def api_url(self) -> str:
-        return reverse_lazy('api-1:get_archiveresult', args=[self.id])
+        return str(reverse_lazy('api-1:get_archiveresult', args=[self.id]))

    def get_absolute_url(self):
        return f'/{self.snapshot.archive_path}/{self.plugin}'
@@ -3198,6 +3228,8 @@ class ArchiveResultMachine(BaseStateMachine):
        # Reason: backoff should always retry→started, then started→final states
    )

+    archiveresult: ArchiveResult
+
    def can_start(self) -> bool:
        """Pure function - check if AR can start (has valid URL)."""
        return bool(self.archiveresult.snapshot.url)
@@ -3259,7 +3291,7 @@ class ArchiveResultMachine(BaseStateMachine):
                process = self.archiveresult.process

                # If process is NOT running anymore, reap the background hook
-                if not process.is_running():
+                if not process.is_running:
                    self.archiveresult.update_from_output()
                    # Check if now in final state after reaping
                    return self.archiveresult.status in (
@@ -3331,7 +3363,7 @@ class ArchiveResultMachine(BaseStateMachine):
        if remaining_active == 0:
            print(f'[cyan]    🔒 All archiveresults finished for snapshot {snapshot.url}, sealing snapshot[/cyan]', file=sys.stderr)
            # Seal the parent snapshot
-            snapshot.sm.seal()
+            cast(Any, snapshot).sm.seal()

    @succeeded.enter
    def enter_succeeded(self):
--- a/archivebox/core/settings.py
+++ b/archivebox/core/settings.py
@@ -3,6 +3,8 @@ __package__ = "archivebox.core"
 import os
 import sys
 import inspect
+import importlib
+from typing import Any, cast

 from pathlib import Path

@@ -119,8 +121,8 @@ try:

        try:
            # Try to import django-auth-ldap (will fail if not installed)
-            from django_auth_ldap.config import LDAPSearch
-            import ldap
+            LDAPSearch = importlib.import_module("django_auth_ldap.config").LDAPSearch
+            ldap = importlib.import_module("ldap")

            # Configure LDAP authentication
            AUTH_LDAP_SERVER_URI = LDAP_CONFIG.LDAP_SERVER_URI
@@ -130,7 +132,7 @@ try:
            # Configure user search
            AUTH_LDAP_USER_SEARCH = LDAPSearch(
                LDAP_CONFIG.LDAP_USER_BASE,
-                ldap.SCOPE_SUBTREE,
+                getattr(ldap, "SCOPE_SUBTREE", 2),
                LDAP_CONFIG.LDAP_USER_FILTER,
            )

@@ -432,7 +434,7 @@ LOGGING = SETTINGS_LOGGING

 # Add default webhook configuration to the User model
 SIGNAL_WEBHOOKS_CUSTOM_MODEL = "archivebox.api.models.OutboundWebhook"
-SIGNAL_WEBHOOKS = {
+SIGNAL_WEBHOOKS: dict[str, object] = {
    "HOOKS": {
        # ... is a special sigil value that means "use the default autogenerated hooks"
        "django.contrib.auth.models.User": ...,
@@ -444,7 +446,8 @@ SIGNAL_WEBHOOKS = {
 }

 # Avoid background threads touching sqlite connections (especially during tests/migrations).
-if DATABASES["default"]["ENGINE"].endswith("sqlite3"):
+default_database = cast(dict[str, Any], DATABASES["default"])
+if str(default_database["ENGINE"]).endswith("sqlite3"):
    SIGNAL_WEBHOOKS["TASK_HANDLER"] = "signal_webhooks.handlers.sync_task_handler"

 ################################################################################
@@ -551,10 +554,8 @@ if DEBUG_TOOLBAR:
    MIDDLEWARE = [*MIDDLEWARE, "debug_toolbar.middleware.DebugToolbarMiddleware"]

 if DEBUG:
-    from django_autotyping.typing import AutotypingSettingsDict
-
    INSTALLED_APPS += ["django_autotyping"]
-    AUTOTYPING: AutotypingSettingsDict = {
+    AUTOTYPING = {
        "STUBS_GENERATION": {
            "LOCAL_STUBS_DIR": PACKAGE_DIR / "typings",
        }
--- a/archivebox/core/templatetags/config_tags.py
+++ b/archivebox/core/templatetags/config_tags.py
@@ -1,5 +1,7 @@
 """Template tags for accessing config values in templates."""

+from typing import Any
+
 from django import template

 from archivebox.config.configset import get_config as _get_config
@@ -8,7 +10,7 @@ register = template.Library()


@register.simple_tag
-def get_config(key: str) -> any:
+def get_config(key: str) -> Any:
    """
    Get a config value by key.

--- a/archivebox/core/tests.py
+++ b/archivebox/core/tests.py
@@ -4,6 +4,9 @@ import importlib
 import os
 import django
 from unittest.mock import patch
+from typing import TypeVar, cast
+
+from django.forms import BaseForm

 # Set up Django before importing any Django-dependent modules
 os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'archivebox.settings')
@@ -18,6 +21,14 @@ CrawlSchedule = importlib.import_module('archivebox.crawls.models').CrawlSchedul
 Tag = importlib.import_module('archivebox.core.models').Tag
 SERVER_CONFIG = importlib.import_module('archivebox.config.common').SERVER_CONFIG

+T = TypeVar('T')
+
+
+def require(value: T | None) -> T:
+    if value is None:
+        raise AssertionError('Expected value to be present')
+    return value
+

 class AddViewTests(TestCase):
    """Tests for the AddView (crawl creation form)."""
@@ -111,7 +122,7 @@ class AddViewTests(TestCase):

        # Check that crawl was created
        self.assertEqual(Crawl.objects.count(), 1)
-        crawl = Crawl.objects.first()
+        crawl = require(Crawl.objects.first())

        self.assertIn('https://example.com', crawl.urls)
        self.assertIn('https://example.org', crawl.urls)
@@ -140,8 +151,8 @@ class AddViewTests(TestCase):
        self.assertEqual(Crawl.objects.count(), 1)
        self.assertEqual(CrawlSchedule.objects.count(), 1)

-        crawl = Crawl.objects.first()
-        schedule = CrawlSchedule.objects.first()
+        crawl = require(Crawl.objects.first())
+        schedule = require(CrawlSchedule.objects.first())

        self.assertEqual(crawl.schedule, schedule)
        self.assertEqual(schedule.template, crawl)
@@ -159,7 +170,7 @@ class AddViewTests(TestCase):

        self.assertEqual(response.status_code, 302)

-        schedule = CrawlSchedule.objects.first()
+        schedule = require(CrawlSchedule.objects.first())
        self.assertEqual(schedule.schedule, '0 */6 * * *')

    def test_add_crawl_with_plugins(self):
@@ -173,7 +184,7 @@ class AddViewTests(TestCase):

        self.assertEqual(response.status_code, 302)

-        crawl = Crawl.objects.first()
+        crawl = require(Crawl.objects.first())
        plugins = crawl.config.get('PLUGINS', '')

        # Should contain the selected plugins
@@ -209,7 +220,7 @@ class AddViewTests(TestCase):

        self.assertEqual(response.status_code, 302)

-        crawl = Crawl.objects.first()
+        crawl = require(Crawl.objects.first())
        config = crawl.config

        self.assertEqual(config.get('DEFAULT_PERSONA'), 'CustomPersona')
@@ -236,7 +247,7 @@ class AddViewTests(TestCase):
            })

        self.assertEqual(response.status_code, 302)
-        crawl = Crawl.objects.order_by('-created_at').first()
+        crawl = require(Crawl.objects.order_by('-created_at').first())
        self.assertNotIn('YTDLP_ARGS_EXTRA', crawl.config)

    def test_add_authenticated_non_admin_custom_config_is_silently_stripped(self):
@@ -248,7 +259,7 @@ class AddViewTests(TestCase):
        })

        self.assertEqual(response.status_code, 302)
-        crawl = Crawl.objects.order_by('-created_at').first()
+        crawl = require(Crawl.objects.order_by('-created_at').first())
        self.assertNotIn('YTDLP_ARGS_EXTRA', crawl.config)

    def test_add_staff_admin_custom_config_is_allowed(self):
@@ -269,7 +280,7 @@ class AddViewTests(TestCase):
        })

        self.assertEqual(response.status_code, 302)
-        crawl = Crawl.objects.order_by('-created_at').first()
+        crawl = require(Crawl.objects.order_by('-created_at').first())
        self.assertEqual(crawl.config.get('YTDLP_ARGS_EXTRA'), ['--exec', 'echo hello'])

    def test_add_empty_urls_fails(self):
@@ -281,7 +292,7 @@ class AddViewTests(TestCase):

        # Should show form again with errors, not redirect
        self.assertEqual(response.status_code, 200)
-        self.assertFormError(response, 'form', 'url', 'This field is required.')
+        self.assertFormError(cast(BaseForm, response.context['form']), 'url', 'This field is required.')

    def test_add_invalid_urls_fails(self):
        """Test that invalid URLs fail validation."""
@@ -355,7 +366,7 @@ class AddViewTests(TestCase):

        self.assertEqual(response.status_code, 302)

-        crawl = Crawl.objects.first()
+        crawl = require(Crawl.objects.first())
        self.assertEqual(crawl.tags_str, 'tag1,tag2,tag3')

    def test_crawl_redirects_to_admin_change_page(self):
@@ -365,7 +376,7 @@ class AddViewTests(TestCase):
            'depth': '0',
        })

-        crawl = Crawl.objects.first()
+        crawl = require(Crawl.objects.first())
        expected_redirect = f'/admin/crawls/crawl/{crawl.id}/change/'

        self.assertRedirects(response, expected_redirect, fetch_redirect_response=False)
--- a/archivebox/core/urls.py
+++ b/archivebox/core/urls.py
@@ -4,6 +4,7 @@ from django.urls import path, re_path, include
 from django.views import static
 from django.conf import settings
 from django.views.generic.base import RedirectView
+from django.http import HttpRequest

 from archivebox.misc.serve_static import serve_static

@@ -53,7 +54,7 @@ urlpatterns = [
    path("api/",      include('archivebox.api.urls'), name='api'),

    path('health/', HealthCheckView.as_view(), name='healthcheck'),
-    path('error/', lambda *_: 1/0),                                             # type: ignore
+    path('error/', lambda request: _raise_test_error(request)),

    # path('jet_api/', include('jet_django.urls')),  Enable to use https://www.jetadmin.io/integrations/django

@@ -61,6 +62,10 @@ urlpatterns = [
    path('', HomepageView.as_view(), name='Home'),
 ]

+
+def _raise_test_error(_request: HttpRequest):
+    raise ZeroDivisionError('Intentional test error route')
+
 if settings.DEBUG_TOOLBAR:
    urlpatterns += [path('__debug__/', include("debug_toolbar.urls"))]

--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -5,13 +5,14 @@ import posixpath
 from glob import glob, escape
 from django.utils import timezone
 import inspect
-from typing import Callable, get_type_hints
+from typing import Callable, cast, get_type_hints
 from pathlib import Path
 from urllib.parse import urlparse

 from django.shortcuts import render, redirect
 from django.http import JsonResponse, HttpRequest, HttpResponse, Http404, HttpResponseForbidden
-from django.utils.html import format_html, mark_safe
+from django.utils.html import format_html
+from django.utils.safestring import mark_safe
 from django.views import View
 from django.views.generic.list import ListView
 from django.views.generic import FormView
@@ -21,7 +22,7 @@ from django.contrib.auth.mixins import UserPassesTestMixin
 from django.views.decorators.csrf import csrf_exempt
 from django.utils.decorators import method_decorator

-from admin_data_views.typing import TableContext, ItemContext
+from admin_data_views.typing import TableContext, ItemContext, SectionData
 from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink

 from archivebox.config import CONSTANTS, CONSTANTS_CONFIG, DATA_DIR, VERSION
@@ -854,7 +855,7 @@ class AddView(UserPassesTestMixin, FormView):

    def _can_override_crawl_config(self) -> bool:
        user = self.request.user
-        return bool(user.is_authenticated and (user.is_superuser or user.is_staff))
+        return bool(user.is_authenticated and (getattr(user, 'is_superuser', False) or getattr(user, 'is_staff', False)))

    def _get_custom_config_overrides(self, form: AddLinkForm) -> dict:
        custom_config = form.cleaned_data.get("config") or {}
@@ -906,7 +907,7 @@ class AddView(UserPassesTestMixin, FormView):
                from archivebox.base_models.models import get_or_create_system_user_pk
                created_by_id = get_or_create_system_user_pk()

-        created_by_name = self.request.user.username if self.request.user.is_authenticated else 'web'
+        created_by_name = getattr(self.request.user, 'username', 'web') if self.request.user.is_authenticated else 'web'

        # 1. save the provided urls to sources/2024-11-05__23-59-59__web_ui_add_by_user_<user_pk>.txt
        sources_file = CONSTANTS.SOURCES_DIR / f'{timezone.now().strftime("%Y-%m-%d__%H-%M-%S")}__web_ui_add_by_user_{created_by_id}.txt'
@@ -1015,8 +1016,8 @@ class WebAddView(AddView):

        return super().dispatch(request, *args, **kwargs)

-    def get(self, request, url: str):
-        requested_url = urldecode(url)
+    def get(self, request: HttpRequest, *args: object, **kwargs: object):
+        requested_url = urldecode(str(kwargs.get('url') or (args[0] if args else '')))
        if not requested_url:
            raise Http404

@@ -1025,6 +1026,7 @@ class WebAddView(AddView):
            return redirect(f'/{snapshot.url_path}')

        add_url = self._normalize_add_url(requested_url)
+        assert self.form_class is not None
        defaults_form = self.form_class()
        form_data = {
            'url': add_url,
@@ -1045,6 +1047,7 @@ class WebAddView(AddView):

        crawl = self._create_crawl_from_form(form)
        snapshot = Snapshot.from_json({'url': add_url, 'tags': form.cleaned_data.get('tag', '')}, overrides={'crawl': crawl})
+        assert snapshot is not None
        return redirect(f'/{snapshot.url_path}')


@@ -1385,7 +1388,7 @@ def find_config_type(key: str) -> str:
            # Try to get from pydantic model_fields first (more reliable)
            if hasattr(config, 'model_fields') and key in config.model_fields:
                field = config.model_fields[key]
-                if hasattr(field, 'annotation'):
+                if hasattr(field, 'annotation') and field.annotation is not None:
                    try:
                        return str(field.annotation.__name__)
                    except AttributeError:
@@ -1448,7 +1451,7 @@ def find_config_source(key: str, merged_config: dict) -> str:
 def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
    CONFIGS = get_all_configs()

-    assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
+    assert getattr(request.user, 'is_superuser', False), 'Must be a superuser to view configuration settings.'

    # Get merged config that includes Machine.config overrides
    try:
@@ -1519,7 +1522,7 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
    CONFIGS = get_all_configs()
    FLAT_CONFIG = get_flat_config()

-    assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
+    assert getattr(request.user, 'is_superuser', False), 'Must be a superuser to view configuration settings.'

    # Get merged config
    merged_config = get_config()
@@ -1575,62 +1578,62 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
        section_header = mark_safe(f'[DYNAMIC CONFIG]   &nbsp; <b><code style="color: lightgray">{key}</code></b> &nbsp; <small>(read-only, calculated at runtime)</small>')


+    section_data = cast(SectionData, {
+        "name": section_header,
+        "description": None,
+        "fields": {
+            'Key': key,
+            'Type': find_config_type(key),
+            'Value': final_value,
+            'Source': find_config_source(key, merged_config),
+        },
+        "help_texts": {
+            'Key': mark_safe(f'''
+                <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#{key.lower()}">Documentation</a>  &nbsp;
+                <span style="display: {"inline" if aliases else "none"}">
+                    Aliases: {", ".join(aliases)}
+                </span>
+            '''),
+            'Type': mark_safe(f'''
+                <a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig+{key}&type=code">
+                    See full definition in <code>archivebox/config</code>...
+                </a>
+            '''),
+            'Value': mark_safe(f'''
+                {'<b style="color: red">Value is redacted for your security. (Passwords, secrets, API tokens, etc. cannot be viewed in the Web UI)</b><br/><br/>' if not key_is_safe(key) else ''}
+                <br/><hr/><br/>
+                <b>Configuration Sources (in priority order):</b><br/><br/>
+                {sources_html}
+                <br/><br/>
+                <p style="display: {"block" if key in FLAT_CONFIG and key not in CONSTANTS_CONFIG else "none"}">
+                    <i>To change this value, edit <code>data/ArchiveBox.conf</code> or run:</i>
+                    <br/><br/>
+                    <code>archivebox config --set {key}="{
+                        val.strip("'")
+                        if (val := find_config_default(key)) else
+                        (str(FLAT_CONFIG[key] if key_is_safe(key) else '********')).strip("'")
+                    }"</code>
+                </p>
+            '''),
+            'Source': mark_safe(f'''
+                The value shown in the "Value" field comes from the <b>{find_config_source(key, merged_config)}</b> source.
+                <br/><br/>
+                Priority order (highest to lowest):
+                <ol>
+                    <li><b style="color: purple">Machine</b> - Machine-specific overrides (e.g., resolved binary paths)
+                        {f'<br/><a href="{machine_admin_url}">→ Edit <code>{key}</code> in Machine.config for this server</a>' if machine_admin_url else ''}
+                    </li>
+                    <li><b style="color: blue">Environment</b> - Environment variables</li>
+                    <li><b style="color: green">Config File</b> - data/ArchiveBox.conf</li>
+                    <li><b style="color: gray">Default</b> - Default value from code</li>
+                </ol>
+                {f'<br/><b>Tip:</b> To override <code>{key}</code> on this machine, <a href="{machine_admin_url}">edit the Machine.config field</a> and add:<br/><code>{{"\\"{key}\\": "your_value_here"}}</code>' if machine_admin_url and key not in CONSTANTS_CONFIG else ''}
+            '''),
+        },
+    })
+
    return ItemContext(
        slug=key,
        title=key,
-        data=[
-            {
-                "name": section_header,
-                "description": None,
-                "fields": {
-                    'Key': key,
-                    'Type': find_config_type(key),
-                    'Value': final_value,
-                    'Source': find_config_source(key, merged_config),
-                },
-                "help_texts": {
-                    'Key': mark_safe(f'''
-                        <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#{key.lower()}">Documentation</a>  &nbsp;
-                        <span style="display: {"inline" if aliases else "none"}">
-                            Aliases: {", ".join(aliases)}
-                        </span>
-                    '''),
-                    'Type': mark_safe(f'''
-                        <a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig+{key}&type=code">
-                            See full definition in <code>archivebox/config</code>...
-                        </a>
-                    '''),
-                    'Value': mark_safe(f'''
-                        {'<b style="color: red">Value is redacted for your security. (Passwords, secrets, API tokens, etc. cannot be viewed in the Web UI)</b><br/><br/>' if not key_is_safe(key) else ''}
-                        <br/><hr/><br/>
-                        <b>Configuration Sources (in priority order):</b><br/><br/>
-                        {sources_html}
-                        <br/><br/>
-                        <p style="display: {"block" if key in FLAT_CONFIG and key not in CONSTANTS_CONFIG else "none"}">
-                            <i>To change this value, edit <code>data/ArchiveBox.conf</code> or run:</i>
-                            <br/><br/>
-                            <code>archivebox config --set {key}="{
-                                val.strip("'")
-                                if (val := find_config_default(key)) else
-                                (str(FLAT_CONFIG[key] if key_is_safe(key) else '********')).strip("'")
-                            }"</code>
-                        </p>
-                    '''),
-                    'Source': mark_safe(f'''
-                        The value shown in the "Value" field comes from the <b>{find_config_source(key, merged_config)}</b> source.
-                        <br/><br/>
-                        Priority order (highest to lowest):
-                        <ol>
-                            <li><b style="color: purple">Machine</b> - Machine-specific overrides (e.g., resolved binary paths)
-                                {f'<br/><a href="{machine_admin_url}">→ Edit <code>{key}</code> in Machine.config for this server</a>' if machine_admin_url else ''}
-                            </li>
-                            <li><b style="color: blue">Environment</b> - Environment variables</li>
-                            <li><b style="color: green">Config File</b> - data/ArchiveBox.conf</li>
-                            <li><b style="color: gray">Default</b> - Default value from code</li>
-                        </ol>
-                        {f'<br/><b>💡 Tip:</b> To override <code>{key}</code> on this machine, <a href="{machine_admin_url}">edit the Machine.config field</a> and add:<br/><code>{{"\\"{key}\\": "your_value_here"}}</code>' if machine_admin_url and key not in CONSTANTS_CONFIG else ''}
-                    '''),
-                },
-            },
-        ],
+        data=[section_data],
    )
--- a/archivebox/core/widgets.py
+++ b/archivebox/core/widgets.py
@@ -16,7 +16,7 @@ class TagEditorWidget(forms.Widget):
    - Press Enter or Space to create new tags (auto-creates if doesn't exist)
    - Uses AJAX for autocomplete and tag creation
    """
-    template_name = None  # We render manually
+    template_name = ""  # We render manually

    class Media:
        css = {'all': []}
--- a/archivebox/crawls/admin.py
+++ b/archivebox/crawls/admin.py
@@ -2,7 +2,8 @@ __package__ = 'archivebox.crawls'


 from django import forms
-from django.utils.html import format_html, format_html_join, mark_safe
+from django.utils.html import format_html, format_html_join
+from django.utils.safestring import mark_safe
 from django.contrib import admin, messages
 from django.db.models import Count, Q

--- a/archivebox/crawls/models.py
+++ b/archivebox/crawls/models.py
@@ -1,6 +1,7 @@
 __package__ = 'archivebox.crawls'

 from typing import TYPE_CHECKING
+import uuid
 from datetime import timedelta
 from archivebox.uuid_compat import uuid7
 from pathlib import Path
@@ -10,7 +11,6 @@ from django.core.validators import MaxValueValidator, MinValueValidator
 from django.conf import settings
 from django.urls import reverse_lazy
 from django.utils import timezone
-from django_stubs_ext.db.models import TypedModelMeta
 from statemachine import State, registry
 from rich import print

@@ -36,7 +36,7 @@ class CrawlSchedule(ModelWithUUID, ModelWithNotes):

    crawl_set: models.Manager['Crawl']

-    class Meta(TypedModelMeta):
+    class Meta(ModelWithUUID.Meta, ModelWithNotes.Meta):
        app_label = 'crawls'
        verbose_name = 'Scheduled Crawl'
        verbose_name_plural = 'Scheduled Crawls'
@@ -47,7 +47,7 @@ class CrawlSchedule(ModelWithUUID, ModelWithNotes):

    @property
    def api_url(self) -> str:
-        return reverse_lazy('api-1:get_any', args=[self.id])
+        return str(reverse_lazy('api-1:get_any', args=[self.id]))

    def save(self, *args, **kwargs):
        self.schedule = (self.schedule or '').strip()
@@ -119,9 +119,17 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
    StatusChoices = ModelWithStateMachine.StatusChoices
    active_state = StatusChoices.STARTED

+    schedule_id: uuid.UUID | None
+    sm: 'CrawlMachine'
+
    snapshot_set: models.Manager['Snapshot']

-    class Meta(TypedModelMeta):
+    class Meta(
+        ModelWithOutputDir.Meta,
+        ModelWithConfig.Meta,
+        ModelWithHealthStats.Meta,
+        ModelWithStateMachine.Meta,
+    ):
        app_label = 'crawls'
        verbose_name = 'Crawl'
        verbose_name_plural = 'Crawls'
@@ -152,7 +160,7 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith

    @property
    def api_url(self) -> str:
-        return reverse_lazy('api-1:get_crawl', args=[self.id])
+        return str(reverse_lazy('api-1:get_crawl', args=[self.id]))

    def to_json(self) -> dict:
        """
@@ -172,7 +180,7 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
        }

    @staticmethod
-    def from_json(record: dict, overrides: dict = None):
+    def from_json(record: dict, overrides: dict | None = None):
        """
        Create or get a Crawl from a JSON dict.

@@ -746,6 +754,8 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
 # =============================================================================

 class CrawlMachine(BaseStateMachine):
+    crawl: Crawl
+
    """
    State machine for managing Crawl lifecycle.

--- a/archivebox/hooks.py
+++ b/archivebox/hooks.py
@@ -1013,7 +1013,7 @@ def get_plugin_icon(plugin: str) -> str:



-def process_hook_records(records: List[Dict[str, Any]], overrides: Dict[str, Any] = None) -> Dict[str, int]:
+def process_hook_records(records: List[Dict[str, Any]], overrides: Dict[str, Any] | None = None) -> Dict[str, int]:
    """
    Process JSONL records from hook output.
    Dispatches to Model.from_json() for each record type.
--- a/archivebox/ideas/process_plugin.py
+++ b/archivebox/ideas/process_plugin.py
@@ -1,6 +1,7 @@
 __package__ = 'archivebox.ideas'

 import asyncio
+import importlib
 import json
 import os
 import shlex
@@ -13,12 +14,14 @@ from typing import Any, Callable, Mapping, MutableMapping, Optional
 from pydantic import BaseModel, Field

 try:
-    from bubus import BaseEvent, EventBus
+    bubus = importlib.import_module("bubus")
+    BaseEvent = bubus.BaseEvent
+    EventBus = bubus.EventBus
 except Exception as exc:  # pragma: no cover - optional dependency
    raise ImportError('ProcessPlugin requires bubus to be installed') from exc

 try:
-    from bubus.service import uuid7str
+    uuid7str = importlib.import_module("bubus.service").uuid7str
 except Exception:  # pragma: no cover - optional dependency
    from uuid import uuid4 as _uuid4

--- a/archivebox/ldap/auth.py
+++ b/archivebox/ldap/auth.py
@@ -6,18 +6,15 @@ This module extends django-auth-ldap to support the LDAP_CREATE_SUPERUSER flag.

 __package__ = "archivebox.ldap"

-from typing import TYPE_CHECKING
+import importlib

-if TYPE_CHECKING:
-    from django_auth_ldap.backend import LDAPBackend as BaseLDAPBackend
-else:
-    try:
-        from django_auth_ldap.backend import LDAPBackend as BaseLDAPBackend
-    except ImportError:
-        # If django-auth-ldap is not installed, create a dummy base class
-        class BaseLDAPBackend:
-            """Dummy LDAP backend when django-auth-ldap is not installed."""
-            pass
+try:
+    BaseLDAPBackend = importlib.import_module("django_auth_ldap.backend").LDAPBackend
+except ImportError:
+    class BaseLDAPBackend:
+        """Dummy LDAP backend when django-auth-ldap is not installed."""
+
+        pass


 class ArchiveBoxLDAPBackend(BaseLDAPBackend):
@@ -36,7 +33,11 @@ class ArchiveBoxLDAPBackend(BaseLDAPBackend):
        """
        from archivebox.config.ldap import LDAP_CONFIG

-        user = super().authenticate_ldap_user(ldap_user, password)
+        base_authenticate = getattr(super(), "authenticate_ldap_user", None)
+        if base_authenticate is None:
+            return None
+
+        user = base_authenticate(ldap_user, password)

        if user and LDAP_CONFIG.LDAP_CREATE_SUPERUSER:
            # Grant superuser privileges to all LDAP-authenticated users
--- a/archivebox/machine/models.py
+++ b/archivebox/machine/models.py
@@ -1,11 +1,15 @@
+from __future__ import annotations
+
 __package__ = 'archivebox.machine'

 import os
 import sys
+import uuid
 import socket
 from pathlib import Path
 from archivebox.uuid_compat import uuid7
 from datetime import timedelta, datetime
+from typing import TYPE_CHECKING, Any, cast

 from statemachine import State, registry

@@ -13,21 +17,31 @@ from django.db import models
 from django.db.models import QuerySet
 from django.utils import timezone
 from django.utils.functional import cached_property
+from django_stubs_ext.db.models import TypedModelMeta

 from archivebox.base_models.models import ModelWithHealthStats
 from archivebox.workers.models import BaseStateMachine, ModelWithStateMachine
 from .detect import get_host_guid, get_os_info, get_vm_info, get_host_network, get_host_stats

+_psutil: Any | None = None
 try:
-    import psutil
+    import psutil as _psutil_import
    PSUTIL_AVAILABLE = True
 except ImportError:
    PSUTIL_AVAILABLE = False
+else:
+    _psutil = _psutil_import

-_CURRENT_MACHINE = None
-_CURRENT_INTERFACE = None
-_CURRENT_BINARIES = {}
-_CURRENT_PROCESS = None
+if TYPE_CHECKING:
+    import psutil
+    from archivebox.core.models import ArchiveResult
+else:
+    psutil = cast(Any, _psutil)
+
+_CURRENT_MACHINE: Machine | None = None
+_CURRENT_INTERFACE: NetworkInterface | None = None
+_CURRENT_BINARIES: dict[str, Binary] = {}
+_CURRENT_PROCESS: Process | None = None

 MACHINE_RECHECK_INTERVAL = 7 * 24 * 60 * 60
 NETWORK_INTERFACE_RECHECK_INTERVAL = 1 * 60 * 60
@@ -64,10 +78,10 @@ class Machine(ModelWithHealthStats):
    num_uses_failed = models.PositiveIntegerField(default=0)
    num_uses_succeeded = models.PositiveIntegerField(default=0)

-    objects: MachineManager = MachineManager()
+    objects = MachineManager()  # pyright: ignore[reportIncompatibleVariableOverride]
    networkinterface_set: models.Manager['NetworkInterface']

-    class Meta:
+    class Meta(ModelWithHealthStats.Meta):
        app_label = 'machine'

    @classmethod
@@ -127,7 +141,7 @@ class Machine(ModelWithHealthStats):
        }

    @staticmethod
-    def from_json(record: dict, overrides: dict = None):
+    def from_json(record: dict[str, Any], overrides: dict[str, Any] | None = None):
        """
        Update Machine config from JSON dict.

@@ -172,9 +186,10 @@ class NetworkInterface(ModelWithHealthStats):
    # num_uses_failed = models.PositiveIntegerField(default=0)  # from ModelWithHealthStats
    # num_uses_succeeded = models.PositiveIntegerField(default=0)  # from ModelWithHealthStats

-    objects: NetworkInterfaceManager = NetworkInterfaceManager()
+    objects = NetworkInterfaceManager()  # pyright: ignore[reportIncompatibleVariableOverride]
+    machine_id: uuid.UUID

-    class Meta:
+    class Meta(ModelWithHealthStats.Meta):
        app_label = 'machine'
        unique_together = (('machine', 'ip_public', 'ip_local', 'mac_address', 'dns_server'),)

@@ -185,7 +200,7 @@ class NetworkInterface(ModelWithHealthStats):
            if timezone.now() < _CURRENT_INTERFACE.modified_at + timedelta(seconds=NETWORK_INTERFACE_RECHECK_INTERVAL):
                return _CURRENT_INTERFACE
            _CURRENT_INTERFACE = None
-        machine = Machine.objects.current()
+        machine = Machine.current()
        net_info = get_host_network()
        _CURRENT_INTERFACE, _ = cls.objects.update_or_create(
            machine=machine, ip_public=net_info.pop('ip_public'), ip_local=net_info.pop('ip_local'),
@@ -202,7 +217,7 @@ class BinaryManager(models.Manager):
        if cached and timezone.now() < cached.modified_at + timedelta(seconds=BINARY_RECHECK_INTERVAL):
            return cached
        _CURRENT_BINARIES[name], _ = self.update_or_create(
-            machine=Machine.objects.current(), name=name, binprovider=binprovider,
+            machine=Machine.current(), name=name, binprovider=binprovider,
            version=version, abspath=abspath, sha256=sha256,
        )
        return _CURRENT_BINARIES[name]
@@ -263,12 +278,14 @@ class Binary(ModelWithHealthStats, ModelWithStateMachine):
    num_uses_failed = models.PositiveIntegerField(default=0)
    num_uses_succeeded = models.PositiveIntegerField(default=0)

-    state_machine_name: str = 'archivebox.machine.models.BinaryMachine'
+    machine_id: uuid.UUID
+
+    state_machine_name: str | None = 'archivebox.machine.models.BinaryMachine'
    active_state: str = StatusChoices.QUEUED

-    objects: BinaryManager = BinaryManager()
+    objects = BinaryManager()  # pyright: ignore[reportIncompatibleVariableOverride]

-    class Meta:
+    class Meta(ModelWithHealthStats.Meta, ModelWithStateMachine.Meta):
        app_label = 'machine'
        verbose_name = 'Binary'
        verbose_name_plural = 'Binaries'
@@ -321,7 +338,7 @@ class Binary(ModelWithHealthStats, ModelWithStateMachine):
        }

    @staticmethod
-    def from_json(record: dict, overrides: dict = None):
+    def from_json(record: dict[str, Any], overrides: dict[str, Any] | None = None):
        """
        Create/update Binary from JSON dict.

@@ -418,7 +435,7 @@ class Binary(ModelWithHealthStats, ModelWithStateMachine):

        return None

-    def update_and_requeue(self, **kwargs):
+    def update_and_requeue(self, **kwargs) -> bool:
        """
        Update binary fields and requeue for worker state machine.

@@ -429,6 +446,7 @@ class Binary(ModelWithHealthStats, ModelWithStateMachine):
            setattr(self, key, value)
        self.modified_at = timezone.now()
        self.save()
+        return True

    def _allowed_binproviders(self) -> set[str] | None:
        """Return the allowed binproviders for this binary, or None for wildcard."""
@@ -513,21 +531,14 @@ class Binary(ModelWithHealthStats, ModelWithStateMachine):
            plugin_output_dir = output_dir / plugin_name
            plugin_output_dir.mkdir(parents=True, exist_ok=True)

-            # Build kwargs for hook
-            hook_kwargs = {
-                'binary_id': str(self.id),
-                'machine_id': str(self.machine_id),
-                'name': self.name,
-                'binproviders': self.binproviders,
-            }
-
+            custom_cmd = None
+            overrides_json = None
            if plugin_name == 'custom':
                custom_cmd = self._get_custom_install_command()
                if not custom_cmd:
                    continue
-                hook_kwargs['custom_cmd'] = custom_cmd
            elif self.overrides:
-                hook_kwargs['overrides'] = json.dumps(self.overrides)
+                overrides_json = json.dumps(self.overrides)

            # Run the hook
            process = run_hook(
@@ -535,7 +546,12 @@ class Binary(ModelWithHealthStats, ModelWithStateMachine):
                output_dir=plugin_output_dir,
                config=config,
                timeout=600,  # 10 min timeout for binary installation
-                **hook_kwargs
+                binary_id=str(self.id),
+                machine_id=str(self.machine_id),
+                name=self.name,
+                binproviders=self.binproviders,
+                custom_cmd=custom_cmd,
+                overrides=overrides_json,
            )

            # Background hook (unlikely for binary installation, but handle it)
@@ -679,7 +695,7 @@ class ProcessManager(models.Manager):
        """Get the Process record for the current OS process."""
        return Process.current()

-    def get_by_pid(self, pid: int, machine: 'Machine' = None) -> 'Process | None':
+    def get_by_pid(self, pid: int, machine: 'Machine | None' = None) -> 'Process | None':
        """
        Find a Process by PID with proper validation against PID reuse.

@@ -880,11 +896,17 @@ class Process(models.Model):
        help_text='When to retry this process'
    )

+    machine_id: uuid.UUID
+    parent_id: uuid.UUID | None
+    binary_id: uuid.UUID | None
+    children: models.Manager['Process']
+    archiveresult: 'ArchiveResult'
+
    state_machine_name: str = 'archivebox.machine.models.ProcessMachine'

-    objects: ProcessManager = ProcessManager()
+    objects = ProcessManager()  # pyright: ignore[reportIncompatibleVariableOverride]

-    class Meta:
+    class Meta(TypedModelMeta):
        app_label = 'machine'
        verbose_name = 'Process'
        verbose_name_plural = 'Processes'
@@ -971,7 +993,7 @@ class Process(models.Model):
        return self.parse_records_from_text(stdout or '')

    @staticmethod
-    def from_json(record: dict, overrides: dict = None):
+    def from_json(record: dict[str, Any], overrides: dict[str, Any] | None = None):
        """
        Create/update Process from JSON dict.

@@ -990,7 +1012,7 @@ class Process(models.Model):
                pass
        return None

-    def update_and_requeue(self, **kwargs):
+    def update_and_requeue(self, **kwargs) -> bool:
        """
        Update process fields and requeue for worker state machine.
        Sets modified_at to ensure workers pick up changes.
@@ -999,6 +1021,7 @@ class Process(models.Model):
            setattr(self, key, value)
        self.modified_at = timezone.now()
        self.save()
+        return True

    # =========================================================================
    # Process.current() and hierarchy methods
@@ -1094,7 +1117,7 @@ class Process(models.Model):
        return _CURRENT_PROCESS

    @classmethod
-    def _find_parent_process(cls, machine: 'Machine' = None) -> 'Process | None':
+    def _find_parent_process(cls, machine: 'Machine | None' = None) -> 'Process | None':
        """
        Find the parent Process record by looking up PPID.

@@ -1163,7 +1186,7 @@ class Process(models.Model):
            return cls.TypeChoices.BINARY

    @classmethod
-    def cleanup_stale_running(cls, machine: 'Machine' = None) -> int:
+    def cleanup_stale_running(cls, machine: 'Machine | None' = None) -> int:
        """
        Mark stale RUNNING processes as EXITED.

@@ -1374,25 +1397,25 @@ class Process(models.Model):
    # =========================================================================

    @property
-    def pid_file(self) -> Path:
+    def pid_file(self) -> Path | None:
        """Path to PID file for this process."""
        runtime_dir = self.runtime_dir
        return runtime_dir / 'process.pid' if runtime_dir else None

    @property
-    def cmd_file(self) -> Path:
+    def cmd_file(self) -> Path | None:
        """Path to cmd.sh script for this process."""
        runtime_dir = self.runtime_dir
        return runtime_dir / 'cmd.sh' if runtime_dir else None

    @property
-    def stdout_file(self) -> Path:
+    def stdout_file(self) -> Path | None:
        """Path to stdout log."""
        runtime_dir = self.runtime_dir
        return runtime_dir / 'stdout.log' if runtime_dir else None

    @property
-    def stderr_file(self) -> Path:
+    def stderr_file(self) -> Path | None:
        """Path to stderr log."""
        runtime_dir = self.runtime_dir
        return runtime_dir / 'stderr.log' if runtime_dir else None
@@ -1647,6 +1670,8 @@ class Process(models.Model):
            stdout_path.parent.mkdir(parents=True, exist_ok=True)
        if stderr_path:
            stderr_path.parent.mkdir(parents=True, exist_ok=True)
+        if stdout_path is None or stderr_path is None:
+            raise RuntimeError('Process log paths could not be determined')

        with open(stdout_path, 'a') as out, open(stderr_path, 'a') as err:
            proc = subprocess.Popen(
@@ -2006,7 +2031,7 @@ class Process(models.Model):
    # =========================================================================

    @classmethod
-    def get_running(cls, process_type: str = None, machine: 'Machine' = None) -> 'QuerySet[Process]':
+    def get_running(cls, process_type: str | None = None, machine: 'Machine | None' = None) -> 'QuerySet[Process]':
        """
        Get all running processes, optionally filtered by type.

@@ -2031,7 +2056,7 @@ class Process(models.Model):
        return qs

    @classmethod
-    def get_running_count(cls, process_type: str = None, machine: 'Machine' = None) -> int:
+    def get_running_count(cls, process_type: str | None = None, machine: 'Machine | None' = None) -> int:
        """
        Get count of running processes.

@@ -2041,7 +2066,7 @@ class Process(models.Model):
        return cls.get_running(process_type=process_type, machine=machine).count()

    @classmethod
-    def stop_all(cls, process_type: str = None, machine: 'Machine' = None, graceful: bool = True) -> int:
+    def stop_all(cls, process_type: str | None = None, machine: 'Machine | None' = None, graceful: bool = True) -> int:
        """
        Stop all running processes of a given type.

@@ -2064,7 +2089,7 @@ class Process(models.Model):
        return stopped

    @classmethod
-    def get_next_worker_id(cls, process_type: str = 'worker', machine: 'Machine' = None) -> int:
+    def get_next_worker_id(cls, process_type: str = 'worker', machine: 'Machine | None' = None) -> int:
        """
        Get the next available worker ID for spawning new workers.

@@ -2190,6 +2215,7 @@ class BinaryMachine(BaseStateMachine):
    """

    model_attr_name = 'binary'
+    binary: Binary

    # States
    queued = State(value=Binary.StatusChoices.QUEUED, initial=True)
@@ -2293,6 +2319,7 @@ class ProcessMachine(BaseStateMachine):
    """

    model_attr_name = 'process'
+    process: Process

    # States
    queued = State(value=Process.StatusChoices.QUEUED, initial=True)
--- a/archivebox/machine/tests/test_machine_models.py
+++ b/archivebox/machine/tests/test_machine_models.py
@@ -13,6 +13,7 @@ Tests cover:

 import os
 from datetime import timedelta
+from typing import cast
 from unittest.mock import patch

 import pytest
@@ -20,6 +21,7 @@ from django.test import TestCase
 from django.utils import timezone

 from archivebox.machine.models import (
+    BinaryManager,
    Machine,
    NetworkInterface,
    Binary,
@@ -94,7 +96,7 @@ class TestMachineModel(TestCase):

    def test_machine_manager_current(self):
        """Machine.objects.current() should return current machine."""
-        machine = Machine.objects.current()
+        machine = Machine.current()
        self.assertIsNotNone(machine)
        self.assertEqual(machine.id, Machine.current().id)

@@ -126,7 +128,7 @@ class TestNetworkInterfaceModel(TestCase):

    def test_networkinterface_manager_current(self):
        """NetworkInterface.objects.current() should return current interface."""
-        interface = NetworkInterface.objects.current()
+        interface = NetworkInterface.current()
        self.assertIsNotNone(interface)


@@ -177,7 +179,7 @@ class TestBinaryModel(TestCase):
            version='1.21',
        )

-        result = Binary.objects.get_valid_binary('wget')
+        result = cast(BinaryManager, Binary.objects).get_valid_binary('wget')

        self.assertIsNotNone(result)
        assert result is not None
--- a/archivebox/misc/logging.py
+++ b/archivebox/misc/logging.py
@@ -79,8 +79,8 @@ def hint(text: Union[Tuple[str, ...], List[str], str], prefix='    ', config: Op
    ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI

    if isinstance(text, str):
-        stderr('{}{lightred}Hint:{reset} {}'.format(prefix, text, **ansi))
+        stderr(f"{prefix}{ansi['lightred']}Hint:{ansi['reset']} {text}")
    else:
-        stderr('{}{lightred}Hint:{reset} {}'.format(prefix, text[0], **ansi))
+        stderr(f"{prefix}{ansi['lightred']}Hint:{ansi['reset']} {text[0]}")
        for line in text[1:]:
-            stderr('{}      {}'.format(prefix, line))
+            stderr(f'{prefix}      {line}')
--- a/archivebox/misc/serve_static.py
+++ b/archivebox/misc/serve_static.py
@@ -5,6 +5,8 @@ import os
 import stat
 import posixpath
 import mimetypes
+import importlib
+from collections.abc import Callable
 from pathlib import Path

 from django.contrib.staticfiles import finders
@@ -69,9 +71,9 @@ mimetypes.add_type("application/xml", ".xml")
 mimetypes.add_type("image/svg+xml", ".svg")

 try:
-    import markdown as _markdown
-except Exception:
-    _markdown = None
+    _markdown = getattr(importlib.import_module('markdown'), 'markdown')
+except ImportError:
+    _markdown: Callable[..., str] | None = None

 MARKDOWN_INLINE_LINK_RE = re.compile(r'\[([^\]]+)\]\(([^)\s]+(?:\([^)]*\)[^)\s]*)*)\)')
 MARKDOWN_INLINE_IMAGE_RE = re.compile(r'!\[([^\]]*)\]\(([^)]+)\)')
@@ -108,7 +110,7 @@ def _looks_like_markdown(text: str) -> bool:
 def _render_markdown_fallback(text: str) -> str:
    if _markdown is not None and not HTML_TAG_RE.search(text):
        try:
-            return _markdown.markdown(
+            return _markdown(
                text,
                extensions=["extra", "toc", "sane_lists"],
                output_format="html",
--- a/archivebox/misc/toml_util.py
+++ b/archivebox/misc/toml_util.py
@@ -1,4 +1,4 @@
-from typing import Any, List, Callable
+from typing import Any, List, Callable, cast

 import json
 import ast
@@ -94,7 +94,8 @@ class JSONSchemaWithLambdas(GenerateJsonSchema):

 def better_toml_dump_str(val: Any) -> str:
    try:
-        return toml.encoder._dump_str(val)     # type: ignore
+        dump_str = cast(Callable[[Any], str], getattr(toml.encoder, '_dump_str'))
+        return dump_str(val)
    except Exception:
        # if we hit any of toml's numerous encoding bugs,
        # fall back to using json representation of string
@@ -108,7 +109,8 @@ class CustomTOMLEncoder(toml.encoder.TomlEncoder):
    """
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
-        self.dump_funcs[Path] = lambda x: json.dumps(str(x))
-        self.dump_funcs[PosixPath] = lambda x: json.dumps(str(x))
-        self.dump_funcs[str] = better_toml_dump_str
-        self.dump_funcs[re.RegexFlag] = better_toml_dump_str
+        dump_funcs = cast(dict[Any, Callable[[Any], str]], self.dump_funcs)
+        dump_funcs[Path] = lambda x: json.dumps(str(x))
+        dump_funcs[PosixPath] = lambda x: json.dumps(str(x))
+        dump_funcs[str] = better_toml_dump_str
+        dump_funcs[re.RegexFlag] = better_toml_dump_str
--- a/archivebox/misc/util.py
+++ b/archivebox/misc/util.py
@@ -16,7 +16,7 @@ from datetime import datetime, timezone
 from dateparser import parse as dateparser
 from requests.exceptions import RequestException, ReadTimeout

-from base32_crockford import encode as base32_encode                            # type: ignore
+from base32_crockford import encode as base32_encode
 from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
 try:
    import chardet    # type:ignore
@@ -200,7 +200,7 @@ def parse_date(date: Any) -> datetime | None:
    """Parse unix timestamps, iso format, and human-readable strings"""
    
    if date is None:
-        return None    # type: ignore
+        return None

    if isinstance(date, datetime):
        if date.tzinfo is None:
--- a/archivebox/personas/models.py
+++ b/archivebox/personas/models.py
@@ -16,7 +16,7 @@ import subprocess
 import sys
 from contextlib import contextmanager
 from pathlib import Path
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any

 from django.db import models
 from django.conf import settings
@@ -25,13 +25,18 @@ from django.utils import timezone
 from archivebox.base_models.models import ModelWithConfig, get_or_create_system_user_pk
 from archivebox.uuid_compat import uuid7

+_fcntl: Any | None = None
 try:
-    import fcntl
+    import fcntl as _fcntl_import
 except ImportError:  # pragma: no cover
-    fcntl = None
+    pass
+else:
+    _fcntl = _fcntl_import

 if TYPE_CHECKING:
-    pass
+    import fcntl
+else:
+    fcntl = _fcntl


 VOLATILE_PROFILE_DIR_NAMES = {
@@ -79,7 +84,7 @@ class Persona(ModelWithConfig):
    created_at = models.DateTimeField(default=timezone.now, db_index=True)
    created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk)

-    class Meta:
+    class Meta(ModelWithConfig.Meta):
        app_label = 'personas'

    def __str__(self) -> str:
--- a/archivebox/workers/models.py
+++ b/archivebox/workers/models.py
@@ -8,6 +8,7 @@ from django.db import models
 from django.core import checks
 from django.utils import timezone
 from django.utils.functional import classproperty
+from django_stubs_ext.db.models import TypedModelMeta

 from statemachine import registry, StateMachine, State

@@ -31,7 +32,7 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
    # status: models.CharField
    # retry_at: models.DateTimeField

-    state_machine_name: str | None
+    state_machine_name: str | None = None
    state_field_name: str
    state_machine_attr: str = 'sm'
    bind_events_as_methods: bool = True
@@ -39,7 +40,7 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
    active_state: ObjectState
    retry_at_field_name: str

-    class Meta:
+    class Meta(TypedModelMeta):
        app_label = 'workers'
        abstract = True

@@ -92,7 +93,7 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
        if not found_id_field:
            errors.append(checks.Error(
                f'{cls.__name__} must have an id field that is a primary key',
-                hint=f'{cls.__name__}.id = {cls.id!r}',
+                hint=f'{cls.__name__}.id field missing or not configured as primary key',
                obj=cls,
                id='workers.E014',
            ))
--- a/archivebox/workers/tests/test_orchestrator.py
+++ b/archivebox/workers/tests/test_orchestrator.py
@@ -11,14 +11,26 @@ Tests cover:

 import os
 import time
-from datetime import timedelta
-from unittest.mock import patch, MagicMock
+from datetime import datetime, timedelta
+from unittest.mock import patch
+from typing import ClassVar

 import pytest
 from django.test import TestCase
 from django.utils import timezone

 from archivebox.workers.orchestrator import Orchestrator
+from archivebox.workers.worker import Worker
+
+
+class FakeWorker(Worker):
+    name: ClassVar[str] = 'crawl'
+    MAX_CONCURRENT_TASKS: ClassVar[int] = 5
+    running_workers: ClassVar[list[dict[str, object]]] = []
+
+    @classmethod
+    def get_running_workers(cls) -> list[dict[str, object]]:
+        return cls.running_workers


 class TestOrchestratorUnit(TestCase):
@@ -99,31 +111,25 @@ class TestOrchestratorUnit(TestCase):
        """should_spawn_worker should return False when queue is empty."""
        orchestrator = Orchestrator()

-        # Create a mock worker class
-        mock_worker = MagicMock()
-        mock_worker.get_running_workers.return_value = []
-
-        self.assertFalse(orchestrator.should_spawn_worker(mock_worker, 0))
+        FakeWorker.running_workers = []
+        self.assertFalse(orchestrator.should_spawn_worker(FakeWorker, 0))

    def test_should_spawn_worker_at_limit(self):
        """should_spawn_worker should return False when at per-type limit."""
        orchestrator = Orchestrator()

-        mock_worker = MagicMock()
-        mock_worker.get_running_workers.return_value = [{}] * orchestrator.MAX_WORKERS_PER_TYPE
-
-        self.assertFalse(orchestrator.should_spawn_worker(mock_worker, 10))
+        running_workers: list[dict[str, object]] = [{'worker_id': worker_id} for worker_id in range(orchestrator.MAX_CRAWL_WORKERS)]
+        FakeWorker.running_workers = running_workers
+        self.assertFalse(orchestrator.should_spawn_worker(FakeWorker, 10))

    @patch.object(Orchestrator, 'get_total_worker_count')
    def test_should_spawn_worker_at_total_limit(self, mock_total):
        """should_spawn_worker should return False when at total limit."""
        orchestrator = Orchestrator()
-        mock_total.return_value = orchestrator.MAX_TOTAL_WORKERS
-
-        mock_worker = MagicMock()
-        mock_worker.get_running_workers.return_value = []
-
-        self.assertFalse(orchestrator.should_spawn_worker(mock_worker, 10))
+        mock_total.return_value = 0
+        running_workers: list[dict[str, object]] = [{'worker_id': worker_id} for worker_id in range(orchestrator.MAX_CRAWL_WORKERS)]
+        FakeWorker.running_workers = running_workers
+        self.assertFalse(orchestrator.should_spawn_worker(FakeWorker, 10))

    @patch.object(Orchestrator, 'get_total_worker_count')
    def test_should_spawn_worker_success(self, mock_total):
@@ -131,11 +137,8 @@ class TestOrchestratorUnit(TestCase):
        orchestrator = Orchestrator()
        mock_total.return_value = 0

-        mock_worker = MagicMock()
-        mock_worker.get_running_workers.return_value = []
-        mock_worker.MAX_CONCURRENT_TASKS = 5
-
-        self.assertTrue(orchestrator.should_spawn_worker(mock_worker, 10))
+        FakeWorker.running_workers = []
+        self.assertTrue(orchestrator.should_spawn_worker(FakeWorker, 10))

    @patch.object(Orchestrator, 'get_total_worker_count')
    def test_should_spawn_worker_enough_workers(self, mock_total):
@@ -143,12 +146,8 @@ class TestOrchestratorUnit(TestCase):
        orchestrator = Orchestrator()
        mock_total.return_value = 2

-        mock_worker = MagicMock()
-        mock_worker.get_running_workers.return_value = [{}]  # 1 worker running
-        mock_worker.MAX_CONCURRENT_TASKS = 5  # Can handle 5 items
-
-        # Queue size (3) <= running_workers (1) * MAX_CONCURRENT_TASKS (5)
-        self.assertFalse(orchestrator.should_spawn_worker(mock_worker, 3))
+        FakeWorker.running_workers = [{}]  # 1 worker running
+        self.assertFalse(orchestrator.should_spawn_worker(FakeWorker, 3))


 class TestOrchestratorWithProcess(TestCase):
@@ -178,8 +177,10 @@ class TestOrchestratorWithProcess(TestCase):
    def test_is_running_with_orchestrator_process(self):
        """is_running should return True when orchestrator Process exists."""
        from archivebox.machine.models import Process, Machine
+        import psutil

        machine = Machine.current()
+        current_proc = psutil.Process(os.getpid())

        # Create an orchestrator Process record
        proc = Process.objects.create(
@@ -187,8 +188,8 @@ class TestOrchestratorWithProcess(TestCase):
            process_type=Process.TypeChoices.ORCHESTRATOR,
            status=Process.StatusChoices.RUNNING,
            pid=os.getpid(),  # Use current PID so it appears alive
-            started_at=timezone.now(),
-            cmd=['archivebox', 'manage', 'orchestrator'],
+            started_at=datetime.fromtimestamp(current_proc.create_time(), tz=timezone.get_current_timezone()),
+            cmd=current_proc.cmdline(),
        )

        try:
@@ -393,14 +394,7 @@ class TestProcessLifecycle(TestCase):
    def test_process_is_running_property(self):
        """Process.is_running should check actual OS process."""
        from archivebox.machine.models import Process
-
-        # Create a process with current PID (should be running)
-        proc = Process.objects.create(
-            machine=self.machine,
-            status=Process.StatusChoices.RUNNING,
-            pid=os.getpid(),
-            started_at=timezone.now(),
-        )
+        proc = Process.current()

        # Should be running (current process exists)
        self.assertTrue(proc.is_running)