use full dotted paths for all archivebox imports, add migrations and more fixes

2026-04-05 15:27:53 +10:00 · 2025-12-29 00:47:08 -08:00
parent 1e4d3ffd11
commit f4e7820533
61 changed files with 1082 additions and 2985 deletions
--- a/README.md
+++ b/README.md
@@ -763,7 +763,7 @@ The configuration is documented here: **[Configuration Wiki](https://github.com/
 <br/>
 TIMEOUT=240                # default: 60    add more seconds on slower networks
 CHECK_SSL_VALIDITY=False   # default: True  False = allow saving URLs w/ bad SSL
-SAVE_ARCHIVE_DOT_ORG=False # default: True  False = disable Archive.org saving
+SAVE_ARCHIVEDOTORG=False # default: True  False = disable Archive.org saving
 MAX_MEDIA_SIZE=1500m       # default: 750m  raise/lower youtubedl output size
 <br/>
 PUBLIC_INDEX=True          # default: True  whether anon users can view index
@@ -959,7 +959,7 @@ archivebox add 'https://docs.google.com/document/d/12345somePrivateDocument'
 archivebox add 'https://vimeo.com/somePrivateVideo'
 # without first disabling saving to Archive.org:
-archivebox config --set SAVE_ARCHIVE_DOT_ORG=False  # disable saving all URLs in Archive.org
+archivebox config --set SAVE_ARCHIVEDOTORG=False  # disable saving all URLs in Archive.org
 # restrict the main index, Snapshot content, and Add Page to authenticated users as-needed:
 archivebox config --set PUBLIC_INDEX=False
--- a/archivebox/init.py
+++ b/archivebox/init.py
@@ -26,10 +26,10 @@ ASCII_LOGO = """
 PACKAGE_DIR = Path(__file__).resolve().parent
-# Add PACKAGE_DIR to sys.path - required for Django migrations to import models
+# # Add PACKAGE_DIR to sys.path - required for Django migrations to import models
-# Migrations reference models like 'machine.Binary' which need to be importable
+# # Migrations reference models like 'machine.Binary' which need to be importable
-if str(PACKAGE_DIR) not in sys.path:
+# if str(PACKAGE_DIR) not in sys.path:
-    sys.path.append(str(PACKAGE_DIR))
+#     sys.path.append(str(PACKAGE_DIR))
 os.environ['DJANGO_SETTINGS_MODULE'] = 'archivebox.core.settings'
 os.environ['TZ'] = 'UTC'
--- a/archivebox/api/apps.py
+++ b/archivebox/api/apps.py
@@ -5,6 +5,7 @@ from django.apps import AppConfig
 class APIConfig(AppConfig):
    name = 'archivebox.api'
    label = 'api'
 def register_admin(admin_site):
--- a/archivebox/api/v1_workers.py
+++ b/archivebox/api/v1_workers.py
@@ -94,7 +94,7 @@ class OrchestratorSchema(Schema):
@router.get("/orchestrator", response=OrchestratorSchema, url_name="get_orchestrator")
 def get_orchestrator(request):
    """Get the orchestrator status and all worker queues."""
-    from workers.orchestrator import Orchestrator
+    from archivebox.workers.orchestrator import Orchestrator
    from workers.worker import CrawlWorker, SnapshotWorker, ArchiveResultWorker
    orchestrator = Orchestrator()
--- a/archivebox/base_models/models.py
+++ b/archivebox/base_models/models.py
@@ -73,7 +73,7 @@ class ModelWithUUID(models.Model):
        return f'/api/v1/docs#/{self._meta.app_label.title()}%20Models/api_v1_{self._meta.app_label}_get_{self._meta.db_table}'
    def as_json(self, keys: Iterable[str] = ()) -> dict:
-        default_keys = ('id', 'created_at', 'modified_at', 'created_by_id')
+        default_keys = ('id', 'created_at', 'modified_at')
        return {key: getattr(self, key) for key in (keys or default_keys) if hasattr(self, key)}
@@ -119,7 +119,7 @@ class ModelWithHealthStats(models.Model):
 class ModelWithConfig(models.Model):
    """Mixin for models with a JSON config field."""
-    config = models.JSONField(default=dict, null=False, blank=False, editable=True)
+    config = models.JSONField(default=dict, null=True, blank=True, editable=True)
    class Meta:
        abstract = True
--- a/archivebox/cli/archivebox_add.py
+++ b/archivebox/cli/archivebox_add.py
@@ -56,7 +56,7 @@ def add(urls: str | list[str],
    from archivebox.core.models import Snapshot
    from archivebox.crawls.models import Crawl
    from archivebox.base_models.models import get_or_create_system_user_pk
-    from workers.orchestrator import Orchestrator
+    from archivebox.workers.orchestrator import Orchestrator
    created_by_id = created_by_id or get_or_create_system_user_pk()
--- a/archivebox/cli/archivebox_crawl.py
+++ b/archivebox/cli/archivebox_crawl.py
@@ -78,7 +78,7 @@ def discover_outlinks(
    from archivebox.core.models import Snapshot, ArchiveResult
    from archivebox.crawls.models import Crawl
    from archivebox.config import CONSTANTS
-    from workers.orchestrator import Orchestrator
+    from archivebox.workers.orchestrator import Orchestrator
    created_by_id = get_or_create_system_user_pk()
    is_tty = sys.stdout.isatty()
--- a/archivebox/cli/archivebox_extract.py
+++ b/archivebox/cli/archivebox_extract.py
@@ -96,7 +96,7 @@ def run_plugins(
        TYPE_SNAPSHOT, TYPE_ARCHIVERESULT
    )
    from archivebox.core.models import Snapshot, ArchiveResult
-    from workers.orchestrator import Orchestrator
+    from archivebox.workers.orchestrator import Orchestrator
    is_tty = sys.stdout.isatty()
--- a/archivebox/cli/archivebox_init.py
+++ b/archivebox/cli/archivebox_init.py
@@ -13,11 +13,9 @@ from archivebox.misc.util import docstring, enforce_types
@enforce_types
-def init(force: bool=False, quick: bool=False, install: bool=False, setup: bool=False) -> None:
+def init(force: bool=False, quick: bool=False, install: bool=False) -> None:
    """Initialize a new ArchiveBox collection in the current directory"""
    install = install or setup
    from archivebox.config import CONSTANTS, VERSION, DATA_DIR
    from archivebox.config.common import SERVER_CONFIG
    from archivebox.config.collection import write_config_file
@@ -128,7 +126,8 @@ def init(force: bool=False, quick: bool=False, install: bool=False, setup: bool=
                print(f'    [yellow]√ Added {len(orphaned_data_dir_links)} orphaned links from existing archive directories.[/yellow]')
            if pending_links:
-                Snapshot.objects.create_from_dicts(list(pending_links.values()))
+                for link_dict in pending_links.values():
                    Snapshot.from_jsonl(link_dict)
            # Hint for orphaned snapshot directories
            print()
@@ -187,7 +186,6 @@ def init(force: bool=False, quick: bool=False, install: bool=False, setup: bool=
@click.option('--force', '-f', is_flag=True, help='Ignore unrecognized files in current directory and initialize anyway')
@click.option('--quick', '-q', is_flag=True, help='Run any updates or migrations without rechecking all snapshot dirs')
@click.option('--install', '-s', is_flag=True, help='Automatically install dependencies and extras used for archiving')
@click.option('--setup', '-s', is_flag=True, help='DEPRECATED: equivalent to --install')
@docstring(init.__doc__)
 def main(**kwargs) -> None:
    init(**kwargs)
--- a/archivebox/cli/archivebox_install.py
+++ b/archivebox/cli/archivebox_install.py
@@ -85,7 +85,7 @@ def install(dry_run: bool=False) -> None:
    print()
    # Run the crawl synchronously (this triggers on_Crawl hooks)
-    from workers.orchestrator import Orchestrator
+    from archivebox.workers.orchestrator import Orchestrator
    orchestrator = Orchestrator(exit_on_idle=True)
    orchestrator.runloop()
--- a/archivebox/cli/archivebox_orchestrator.py
+++ b/archivebox/cli/archivebox_orchestrator.py
@@ -37,7 +37,7 @@ def orchestrator(daemon: bool = False, watch: bool = False) -> int:
        0: All work completed successfully
        1: Error occurred
    """
-    from workers.orchestrator import Orchestrator
+    from archivebox.workers.orchestrator import Orchestrator
    if Orchestrator.is_running():
        print('[yellow]Orchestrator is already running[/yellow]')
--- a/archivebox/cli/archivebox_server.py
+++ b/archivebox/cli/archivebox_server.py
@@ -74,7 +74,7 @@ def server(runserver_args: Iterable[str]=(SERVER_CONFIG.BIND_ADDR,),
            tail_multiple_worker_logs,
            is_port_in_use,
        )
-        from workers.orchestrator import Orchestrator
+        from archivebox.workers.orchestrator import Orchestrator
        import sys
        # Check if port is already in use
--- a/archivebox/cli/archivebox_snapshot.py
+++ b/archivebox/cli/archivebox_snapshot.py
@@ -163,7 +163,7 @@ def create_snapshots(
    # If --plugins is passed, run the orchestrator for those plugins
    if plugins:
-        from workers.orchestrator import Orchestrator
+        from archivebox.workers.orchestrator import Orchestrator
        rprint(f'[blue]Running plugins: {plugins or "all"}...[/blue]', file=sys.stderr)
        orchestrator = Orchestrator(exit_on_idle=True)
        orchestrator.runloop()
--- a/archivebox/cli/archivebox_update.py
+++ b/archivebox/cli/archivebox_update.py
@@ -160,7 +160,7 @@ def process_all_db_snapshots(batch_size: int = 100) -> dict:
    total = Snapshot.objects.count()
    print(f'[*] Processing {total} snapshots from database...')
-    for snapshot in Snapshot.objects.iterator():
+    for snapshot in Snapshot.objects.iterator(chunk_size=batch_size):
        # Reconcile index.json with DB
        snapshot.reconcile_with_index_json()
@@ -209,7 +209,7 @@ def process_filtered_snapshots(
    total = snapshots.count()
    print(f'[*] Found {total} matching snapshots')
-    for snapshot in snapshots.iterator():
+    for snapshot in snapshots.iterator(chunk_size=batch_size):
        # Reconcile index.json with DB
        snapshot.reconcile_with_index_json()
--- a/archivebox/cli/tests.py
+++ b/archivebox/cli/tests.py
@@ -17,7 +17,7 @@ TEST_CONFIG = {
    'DATA_DIR': 'data.tests',
-    'SAVE_ARCHIVE_DOT_ORG': 'False',
+    'SAVE_ARCHIVEDOTORG': 'False',
    'SAVE_TITLE': 'False',
    'USE_CURL': 'False',
--- a/archivebox/cli/tests_piping.py
+++ b/archivebox/cli/tests_piping.py
@@ -32,7 +32,7 @@ from unittest.mock import patch, MagicMock
 TEST_CONFIG = {
    'USE_COLOR': 'False',
    'SHOW_PROGRESS': 'False',
-    'SAVE_ARCHIVE_DOT_ORG': 'False',
+    'SAVE_ARCHIVEDOTORG': 'False',
    'SAVE_TITLE': 'True',  # Fast extractor
    'SAVE_FAVICON': 'False',
    'SAVE_WGET': 'False',
--- a/archivebox/config/configset.py
+++ b/archivebox/config/configset.py
@@ -216,6 +216,29 @@ def get_config(
    if snapshot and hasattr(snapshot, "config") and snapshot.config:
        config.update(snapshot.config)
    # Normalize all aliases to canonical names (after all sources merged)
    # This handles aliases that came from user/crawl/snapshot configs, not just env
    try:
        from archivebox.hooks import discover_plugin_configs
        plugin_configs = discover_plugin_configs()
        aliases_to_normalize = {}  # {alias_key: canonical_key}
        # Build alias mapping from all plugin schemas
        for plugin_name, schema in plugin_configs.items():
            for canonical_key, prop_schema in schema.get('properties', {}).items():
                for alias in prop_schema.get('x-aliases', []):
                    aliases_to_normalize[alias] = canonical_key
        # Normalize: copy alias values to canonical keys (aliases take precedence)
        for alias_key, canonical_key in aliases_to_normalize.items():
            if alias_key in config:
                # Alias exists - copy to canonical key (overwriting any default)
                config[canonical_key] = config[alias_key]
                # Remove alias from config to keep it clean
                del config[alias_key]
    except ImportError:
        pass
    return config
--- a/archivebox/core/apps.py
+++ b/archivebox/core/apps.py
@@ -5,8 +5,12 @@ from django.apps import AppConfig
 class CoreConfig(AppConfig):
    name = 'archivebox.core'
    label = 'core'
    def ready(self):
        """Register the archivebox.core.admin_site as the main django admin site"""
        from archivebox.core.admin_site import register_admin_site
        register_admin_site()
        # Import models to register state machines with the registry
        from archivebox.core import models  # noqa: F401
--- a/archivebox/core/migrations/0024_b_clear_config_fields.py
+++ b/archivebox/core/migrations/0024_b_clear_config_fields.py
@@ -0,0 +1,57 @@
 # Data migration to clear config fields that may contain invalid JSON
 # This runs before 0025 to prevent CHECK constraint failures
 from django.db import migrations
 def clear_config_fields(apps, schema_editor):
    """Clear all config fields in related tables to avoid JSON validation errors."""
    db_alias = schema_editor.connection.alias
    # Disable foreign key checks temporarily to allow updates
    with schema_editor.connection.cursor() as cursor:
        cursor.execute("PRAGMA foreign_keys=OFF")
    tables_to_clear = [
        ('crawls_seed', 'config'),
        ('crawls_crawl', 'config'),
        ('crawls_crawlschedule', 'config') if 'crawlschedule' in dir() else None,
        ('machine_machine', 'stats'),
        ('machine_machine', 'config'),
    ]
    for table_info in tables_to_clear:
        if table_info is None:
            continue
        table_name, field_name = table_info
        try:
            with schema_editor.connection.cursor() as cursor:
                # Check if table exists first
                cursor.execute(f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}'")
                if not cursor.fetchone():
                    print(f"  Skipping {table_name}.{field_name}: table does not exist")
                    continue
                # Set all to empty JSON object
                cursor.execute(f"UPDATE {table_name} SET {field_name} = '{{}}' WHERE {field_name} IS NOT NULL")
                print(f"  Cleared {field_name} in {table_name}: {cursor.rowcount} rows")
        except Exception as e:
            print(f"  Skipping {table_name}.{field_name}: {e}")
    # Re-enable foreign key checks
    with schema_editor.connection.cursor() as cursor:
        cursor.execute("PRAGMA foreign_keys=ON")
 class Migration(migrations.Migration):
    dependencies = [
        ('core', '0023_new_schema'),
        ('crawls', '0001_initial'),
        ('machine', '0001_squashed'),
    ]
    operations = [
        migrations.RunPython(clear_config_fields, reverse_code=migrations.RunPython.noop),
    ]
--- a/archivebox/core/migrations/0024_c_disable_fk_checks.py
+++ b/archivebox/core/migrations/0024_c_disable_fk_checks.py
@@ -0,0 +1,28 @@
 # Disable foreign key checks before 0025 to prevent CHECK constraint validation errors
 from django.db import migrations
 def disable_fk_checks(apps, schema_editor):
    """Temporarily disable foreign key checks."""
    with schema_editor.connection.cursor() as cursor:
        cursor.execute("PRAGMA foreign_keys=OFF")
        print("  Disabled foreign key checks")
 def enable_fk_checks(apps, schema_editor):
    """Re-enable foreign key checks."""
    with schema_editor.connection.cursor() as cursor:
        cursor.execute("PRAGMA foreign_keys=ON")
        print("  Enabled foreign key checks")
 class Migration(migrations.Migration):
    dependencies = [
        ('core', '0024_b_clear_config_fields'),
    ]
    operations = [
        migrations.RunPython(disable_fk_checks, reverse_code=enable_fk_checks),
    ]
--- a/archivebox/core/migrations/0024_d_fix_crawls_config.py
+++ b/archivebox/core/migrations/0024_d_fix_crawls_config.py
@@ -0,0 +1,93 @@
 # Fix crawls_crawl config field to avoid CHECK constraint errors during table rebuilds
 from django.db import migrations
 def fix_crawls_config(apps, schema_editor):
    """
    Rebuild crawls_crawl table to fix CHECK constraints and make seed_id nullable.
    Only runs for UPGRADES from 0.8.x (when crawls.0001_initial didn't exist yet).
    For fresh installs, crawls.0001_initial creates the correct schema.
    """
    with schema_editor.connection.cursor() as cursor:
        # Check if this is an upgrade from old 0.8.x or a fresh install
        # In fresh installs, crawls.0001_initial was applied, creating seed FK
        # In upgrades, the table was created by old migrations before 0001_initial existed
        cursor.execute("""
            SELECT COUNT(*) FROM django_migrations
            WHERE app='crawls' AND name='0001_initial'
        """)
        has_crawls_0001 = cursor.fetchone()[0] > 0
        if has_crawls_0001:
            # Fresh install - crawls.0001_initial already created the correct schema
            # Just clear config to avoid CHECK constraint issues
            print("  Fresh install detected - clearing config field only")
            try:
                cursor.execute('UPDATE "crawls_crawl" SET "config" = NULL')
            except Exception as e:
                print(f"  Skipping config clear: {e}")
            return
        # Upgrade from 0.8.x - rebuild table to make seed_id nullable and remove CHECK constraint
        print("  Upgrading from 0.8.x - rebuilding crawls_crawl table")
        cursor.execute("PRAGMA foreign_keys=OFF")
        # Backup
        cursor.execute("CREATE TABLE crawls_crawl_backup AS SELECT * FROM crawls_crawl")
        # Recreate without config CHECK constraint, with nullable seed_id
        cursor.execute("DROP TABLE crawls_crawl")
        cursor.execute("""
            CREATE TABLE "crawls_crawl" (
                "num_uses_failed" integer unsigned NOT NULL CHECK ("num_uses_failed" >= 0),
                "num_uses_succeeded" integer unsigned NOT NULL CHECK ("num_uses_succeeded" >= 0),
                "id" char(32) NOT NULL PRIMARY KEY,
                "created_at" datetime NOT NULL,
                "modified_at" datetime NOT NULL,
                "urls" text NOT NULL,
                "config" text,
                "max_depth" smallint unsigned NOT NULL CHECK ("max_depth" >= 0),
                "tags_str" varchar(1024) NOT NULL,
                "persona_id" char(32) NULL,
                "label" varchar(64) NOT NULL,
                "notes" text NOT NULL,
                "output_dir" varchar(512) NOT NULL,
                "status" varchar(15) NOT NULL,
                "retry_at" datetime NULL,
                "created_by_id" integer NOT NULL REFERENCES "auth_user" ("id") DEFERRABLE INITIALLY DEFERRED,
                "seed_id" char(32) NULL DEFAULT NULL,
                "schedule_id" char(32) NULL REFERENCES "crawls_crawlschedule" ("id") DEFERRABLE INITIALLY DEFERRED
            )
        """)
        # Restore data
        cursor.execute("""
            INSERT INTO "crawls_crawl" (
                "num_uses_failed", "num_uses_succeeded", "id", "created_at", "modified_at",
                "urls", "config", "max_depth", "tags_str", "persona_id", "label", "notes",
                "output_dir", "status", "retry_at", "created_by_id", "seed_id", "schedule_id"
            )
            SELECT
                "num_uses_failed", "num_uses_succeeded", "id", "created_at", "modified_at",
                "urls", "config", "max_depth", "tags_str", "persona_id", "label", "notes",
                "output_dir", "status", "retry_at", "created_by_id", "seed_id", "schedule_id"
            FROM crawls_crawl_backup
        """)
        cursor.execute("DROP TABLE crawls_crawl_backup")
        # NULL out config to avoid any invalid JSON
        cursor.execute('UPDATE "crawls_crawl" SET "config" = NULL')
 class Migration(migrations.Migration):
    dependencies = [
        ('core', '0024_c_disable_fk_checks'),
        ('crawls', '0001_initial'),
    ]
    operations = [
        migrations.RunPython(fix_crawls_config, reverse_code=migrations.RunPython.noop),
    ]
--- a/archivebox/core/migrations/0024_snapshot_crawl.py
+++ b/archivebox/core/migrations/0024_snapshot_crawl.py
@@ -8,9 +8,7 @@ import django.db.models.deletion
 class Migration(migrations.Migration):
    dependencies = [
-        ('core', '0023_new_schema'),
+        ('core', '0024_d_fix_crawls_config'),
        ('crawls', '0001_initial'),
        ('machine', '0001_squashed'),
    ]
    operations = [
--- a/archivebox/core/migrations/0026_remove_archiveresult_output_dir_and_more.py
+++ b/archivebox/core/migrations/0026_remove_archiveresult_output_dir_and_more.py
@@ -10,6 +10,13 @@ from django.db import migrations, models
 def populate_archiveresult_uuids(apps, schema_editor):
    """Generate unique UUIDs for ArchiveResults that don't have one."""
    # Check if uuid column exists before trying to populate it
    with schema_editor.connection.cursor() as cursor:
        cursor.execute("PRAGMA table_info(core_archiveresult)")
        columns = [row[1] for row in cursor.fetchall()]
        if 'uuid' not in columns:
            return  # uuid column doesn't exist, skip this data migration
    ArchiveResult = apps.get_model('core', 'ArchiveResult')
    for result in ArchiveResult.objects.filter(uuid__isnull=True):
        result.uuid = uuid_compat.uuid7()
@@ -21,6 +28,22 @@ def reverse_populate_uuids(apps, schema_editor):
    pass
 def remove_output_dir_if_exists(apps, schema_editor):
    """Remove output_dir columns if they exist."""
    with schema_editor.connection.cursor() as cursor:
        # Check and remove from core_archiveresult
        cursor.execute("PRAGMA table_info(core_archiveresult)")
        columns = [row[1] for row in cursor.fetchall()]
        if 'output_dir' in columns:
            cursor.execute("ALTER TABLE core_archiveresult DROP COLUMN output_dir")
        # Check and remove from core_snapshot
        cursor.execute("PRAGMA table_info(core_snapshot)")
        columns = [row[1] for row in cursor.fetchall()]
        if 'output_dir' in columns:
            cursor.execute("ALTER TABLE core_snapshot DROP COLUMN output_dir")
 class Migration(migrations.Migration):
    dependencies = [
@@ -33,82 +56,90 @@ class Migration(migrations.Migration):
        migrations.RunPython(populate_archiveresult_uuids, reverse_populate_uuids),
        # Remove output_dir fields (not needed, computed from snapshot)
-        migrations.RemoveField(
+        migrations.RunPython(remove_output_dir_if_exists, reverse_code=migrations.RunPython.noop),
-            model_name='archiveresult',
+
-            name='output_dir',
+        # Update Django's migration state to match 0.9.x schema
-        ),
+        # Database already has correct types from 0.8.x, just update state
-        migrations.RemoveField(
+        migrations.SeparateDatabaseAndState(
-            model_name='snapshot',
+            state_operations=[
-            name='output_dir',
+                # Archiveresult field alterations
                migrations.AlterField(
                    model_name='archiveresult',
                    name='created_at',
                    field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
                ),
                migrations.AlterField(
                    model_name='archiveresult',
                    name='created_by',
                    field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, related_name='archiveresult_set', to=settings.AUTH_USER_MODEL),
                ),
                migrations.AlterField(
                    model_name='archiveresult',
                    name='extractor',
                    field=models.CharField(db_index=True, max_length=32),
                ),
                # Convert id from AutoField to UUIDField (database already has UUID CHAR(32))
                migrations.AlterField(
                    model_name='archiveresult',
                    name='id',
                    field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
                ),
                migrations.AlterField(
                    model_name='archiveresult',
                    name='status',
                    field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('backoff', 'Waiting to retry'), ('succeeded', 'Succeeded'), ('failed', 'Failed'), ('skipped', 'Skipped')], db_index=True, default='queued', max_length=15),
                ),
                # Snapshot field alterations
                migrations.AlterField(
                    model_name='snapshot',
                    name='bookmarked_at',
                    field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
                ),
                migrations.AlterField(
                    model_name='snapshot',
                    name='created_at',
                    field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
                ),
                migrations.AlterField(
                    model_name='snapshot',
                    name='created_by',
                    field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to=settings.AUTH_USER_MODEL),
                ),
                migrations.AlterField(
                    model_name='snapshot',
                    name='downloaded_at',
                    field=models.DateTimeField(blank=True, db_index=True, default=None, editable=False, null=True),
                ),
                migrations.AlterField(
                    model_name='snapshot',
                    name='id',
                    field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
                ),
            ],
            database_operations=[
                # No actual database changes needed - schema is already correct from 0.8.x
            ],
        ),
-        # Archiveresult field alterations
+        # SnapshotTag and Tag alterations - state only, DB already correct
-        migrations.AlterField(
+        migrations.SeparateDatabaseAndState(
-            model_name='archiveresult',
+            state_operations=[
-            name='created_at',
+                migrations.AlterField(
-            field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
+                    model_name='snapshottag',
-        ),
+                    name='id',
-        migrations.AlterField(
+                    field=models.AutoField(primary_key=True, serialize=False),
-            model_name='archiveresult',
+                ),
-            name='created_by',
+                migrations.AlterField(
-            field=models.ForeignKey(default=None, on_delete=django.db.models.deletion.CASCADE, related_name='archiveresult_set', to=settings.AUTH_USER_MODEL),
+                    model_name='tag',
-        ),
+                    name='created_by',
-        migrations.AlterField(
+                    field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, related_name='tag_set', to=settings.AUTH_USER_MODEL),
-            model_name='archiveresult',
+                ),
-            name='extractor',
+                migrations.AlterUniqueTogether(
-            field=models.CharField(db_index=True, max_length=32),
+                    name='snapshottag',
-        ),
+                    unique_together={('snapshot', 'tag')},
-        migrations.AlterField(
+                ),
-            model_name='archiveresult',
+            ],
-            name='id',
+            database_operations=[],
            field=models.AutoField(editable=False, primary_key=True, serialize=False),
        ),
        migrations.AlterField(
            model_name='archiveresult',
            name='status',
            field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('backoff', 'Waiting to retry'), ('succeeded', 'Succeeded'), ('failed', 'Failed'), ('skipped', 'Skipped')], db_index=True, default='queued', max_length=15),
        ),
        # Snapshot field alterations
        migrations.AlterField(
            model_name='snapshot',
            name='bookmarked_at',
            field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
        ),
        migrations.AlterField(
            model_name='snapshot',
            name='created_at',
            field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
        ),
        migrations.AlterField(
            model_name='snapshot',
            name='created_by',
            field=models.ForeignKey(default=None, on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to=settings.AUTH_USER_MODEL),
        ),
        migrations.AlterField(
            model_name='snapshot',
            name='downloaded_at',
            field=models.DateTimeField(blank=True, db_index=True, default=None, editable=False, null=True),
        ),
        migrations.AlterField(
            model_name='snapshot',
            name='id',
            field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
        ),
        # SnapshotTag and Tag alterations
        migrations.AlterField(
            model_name='snapshottag',
            name='id',
            field=models.AutoField(primary_key=True, serialize=False),
        ),
        migrations.AlterField(
            model_name='tag',
            name='created_by',
            field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, related_name='tag_set', to=settings.AUTH_USER_MODEL),
        ),
        migrations.AlterUniqueTogether(
            name='snapshottag',
            unique_together={('snapshot', 'tag')},
        ),
    ]
--- a/archivebox/core/migrations/0029_archiveresult_hook_fields.py
+++ b/archivebox/core/migrations/0029_archiveresult_hook_fields.py
@@ -13,68 +13,79 @@ class Migration(migrations.Migration):
    ]
    operations = [
-        # Add new output fields (keep old 'output' temporarily for migration)
+        # Add new output fields using SeparateDatabaseAndState to avoid table rebuilds
-        migrations.AddField(
+        migrations.SeparateDatabaseAndState(
-            model_name='archiveresult',
+            state_operations=[
-            name='output_str',
+                migrations.AddField(
-            field=models.TextField(
+                    model_name='archiveresult',
-                blank=True,
+                    name='output_str',
-                default='',
+                    field=models.TextField(
-                help_text='Human-readable output summary (e.g., "Downloaded 5 files")'
+                        blank=True,
-            ),
+                        default='',
-        ),
+                        help_text='Human-readable output summary (e.g., "Downloaded 5 files")'
-
+                    ),
-        migrations.AddField(
+                ),
-            model_name='archiveresult',
+                migrations.AddField(
-            name='output_json',
+                    model_name='archiveresult',
-            field=models.JSONField(
+                    name='output_json',
-                null=True,
+                    field=models.JSONField(
-                blank=True,
+                        null=True,
-                default=None,
+                        blank=True,
-                help_text='Structured metadata (headers, redirects, etc.) - should NOT duplicate ArchiveResult fields'
+                        default=None,
-            ),
+                        help_text='Structured metadata (headers, redirects, etc.) - should NOT duplicate ArchiveResult fields'
-        ),
+                    ),
-
+                ),
-        migrations.AddField(
+                migrations.AddField(
-            model_name='archiveresult',
+                    model_name='archiveresult',
-            name='output_files',
+                    name='output_files',
-            field=models.JSONField(
+                    field=models.JSONField(
-                default=dict,
+                        default=dict,
-                help_text='Dict of {relative_path: {metadata}} - values are empty dicts for now, extensible for future metadata'
+                        help_text='Dict of {relative_path: {metadata}} - values are empty dicts for now, extensible for future metadata'
-            ),
+                    ),
-        ),
+                ),
-
+                migrations.AddField(
-        migrations.AddField(
+                    model_name='archiveresult',
-            model_name='archiveresult',
+                    name='output_size',
-            name='output_size',
+                    field=models.BigIntegerField(
-            field=models.BigIntegerField(
+                        default=0,
-                default=0,
+                        help_text='Total recursive size in bytes of all output files'
-                help_text='Total recursive size in bytes of all output files'
+                    ),
-            ),
+                ),
-        ),
+                migrations.AddField(
-
+                    model_name='archiveresult',
-        migrations.AddField(
+                    name='output_mimetypes',
-            model_name='archiveresult',
+                    field=models.CharField(
-            name='output_mimetypes',
+                        max_length=512,
-            field=models.CharField(
+                        blank=True,
-                max_length=512,
+                        default='',
-                blank=True,
+                        help_text='CSV of mimetypes sorted by size descending'
-                default='',
+                    ),
-                help_text='CSV of mimetypes sorted by size descending'
+                ),
-            ),
+                migrations.AddField(
-        ),
+                    model_name='archiveresult',
-
+                    name='binary',
-        # Add binary FK (optional)
+                    field=models.ForeignKey(
-        migrations.AddField(
+                        'machine.Binary',
-            model_name='archiveresult',
+                        on_delete=models.SET_NULL,
-            name='binary',
+                        null=True,
-            field=models.ForeignKey(
+                        blank=True,
-                'machine.Binary',
+                        related_name='archiveresults',
-                on_delete=models.SET_NULL,
+                        help_text='Primary binary used by this hook (optional)'
-                null=True,
+                    ),
-                blank=True,
+                ),
-                related_name='archiveresults',
+            ],
-                help_text='Primary binary used by this hook (optional)'
+            database_operations=[
-            ),
+                migrations.RunSQL(
                    sql="""
                        ALTER TABLE core_archiveresult ADD COLUMN output_str TEXT DEFAULT '';
                        ALTER TABLE core_archiveresult ADD COLUMN output_json TEXT;
                        ALTER TABLE core_archiveresult ADD COLUMN output_files TEXT DEFAULT '{}';
                        ALTER TABLE core_archiveresult ADD COLUMN output_size BIGINT DEFAULT 0;
                        ALTER TABLE core_archiveresult ADD COLUMN output_mimetypes VARCHAR(512) DEFAULT '';
                        ALTER TABLE core_archiveresult ADD COLUMN binary_id CHAR(32) REFERENCES machine_binary(id);
                    """,
                    reverse_sql=migrations.RunSQL.noop,
                ),
            ],
        ),
    ]
--- a/archivebox/core/migrations/0030_migrate_output_field.py
+++ b/archivebox/core/migrations/0030_migrate_output_field.py
@@ -12,27 +12,46 @@ def migrate_output_field(apps, schema_editor):
    Logic:
    - If output contains JSON {...}, move to output_json
    - Otherwise, move to output_str
    Use raw SQL to avoid CHECK constraint issues during migration.
    """
-    ArchiveResult = apps.get_model('core', 'ArchiveResult')
+    # Use raw SQL to migrate data without triggering CHECK constraints
    with schema_editor.connection.cursor() as cursor:
        # Get all archive results
        cursor.execute("""
            SELECT id, output FROM core_archiveresult
        """)
-    for ar in ArchiveResult.objects.all().iterator():
+        for row in cursor.fetchall():
-        old_output = ar.output or ''
+            ar_id, old_output = row
            old_output = old_output or ''
-        # Case 1: JSON output
+            # Case 1: JSON output
-        if old_output.strip().startswith('{'):
+            if old_output.strip().startswith('{'):
-            try:
+                try:
-                parsed = json.loads(old_output)
+                    # Validate it's actual JSON
-                ar.output_json = parsed
+                    parsed = json.loads(old_output)
-                ar.output_str = ''
+                    # Update with JSON - cast to JSON to satisfy CHECK constraint
-            except json.JSONDecodeError:
+                    json_str = json.dumps(parsed)
-                # Not valid JSON, treat as string
+                    cursor.execute("""
-                ar.output_str = old_output
+                        UPDATE core_archiveresult
-
+                        SET output_str = '', output_json = json(?)
-        # Case 2: File path or plain string
+                        WHERE id = ?
-        else:
+                    """, (json_str, ar_id))
-            ar.output_str = old_output
+                except json.JSONDecodeError:
-
+                    # Not valid JSON, treat as string
-        ar.save(update_fields=['output_str', 'output_json'])
+                    cursor.execute("""
                        UPDATE core_archiveresult
                        SET output_str = ?, output_json = NULL
                        WHERE id = ?
                    """, (old_output, ar_id))
            # Case 2: File path or plain string
            else:
                cursor.execute("""
                    UPDATE core_archiveresult
                    SET output_str = ?, output_json = NULL
                    WHERE id = ?
                """, (old_output, ar_id))
 def reverse_migrate(apps, schema_editor):
--- a/archivebox/core/migrations/0032_alter_archiveresult_binary_and_more.py
+++ b/archivebox/core/migrations/0032_alter_archiveresult_binary_and_more.py
@@ -16,43 +16,62 @@ class Migration(migrations.Migration):
    ]
    operations = [
-        migrations.AlterField(
+        # Update Django's state only - database already has correct schema from 0029
-            model_name='archiveresult',
+        migrations.SeparateDatabaseAndState(
-            name='binary',
+            state_operations=[
-            field=models.ForeignKey(blank=True, help_text='Primary binary used by this hook', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='archiveresults', to='machine.binary'),
+                migrations.AlterField(
                    model_name='archiveresult',
                    name='binary',
                    field=models.ForeignKey(blank=True, help_text='Primary binary used by this hook', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='archiveresults', to='machine.binary'),
                ),
                migrations.AlterField(
                    model_name='archiveresult',
                    name='output_files',
                    field=models.JSONField(default=dict, help_text='Dict of {relative_path: {metadata}}'),
                ),
                migrations.AlterField(
                    model_name='archiveresult',
                    name='output_json',
                    field=models.JSONField(blank=True, default=None, help_text='Structured metadata (headers, redirects, etc.)', null=True),
                ),
                migrations.AlterField(
                    model_name='archiveresult',
                    name='output_mimetypes',
                    field=models.CharField(blank=True, default='', help_text='CSV of mimetypes sorted by size', max_length=512),
                ),
                migrations.AlterField(
                    model_name='archiveresult',
                    name='output_size',
                    field=models.BigIntegerField(default=0, help_text='Total bytes of all output files'),
                ),
                migrations.AlterField(
                    model_name='archiveresult',
                    name='output_str',
                    field=models.TextField(blank=True, default='', help_text='Human-readable output summary'),
                ),
                migrations.AlterField(
                    model_name='archiveresult',
                    name='uuid',
                    field=models.UUIDField(blank=True, db_index=True, default=uuid_compat.uuid7, null=True),
                ),
            ],
            database_operations=[
                # No database changes needed - columns already exist with correct types
            ],
        ),
-        migrations.AlterField(
+        # Add unique constraint without table rebuild
-            model_name='archiveresult',
+        migrations.SeparateDatabaseAndState(
-            name='output_files',
+            state_operations=[
-            field=models.JSONField(default=dict, help_text='Dict of {relative_path: {metadata}}'),
+                migrations.AddConstraint(
-        ),
+                    model_name='snapshot',
-        migrations.AlterField(
+                    constraint=models.UniqueConstraint(fields=('timestamp',), name='unique_timestamp'),
-            model_name='archiveresult',
+                ),
-            name='output_json',
+            ],
-            field=models.JSONField(blank=True, default=None, help_text='Structured metadata (headers, redirects, etc.)', null=True),
+            database_operations=[
-        ),
+                migrations.RunSQL(
-        migrations.AlterField(
+                    sql="CREATE UNIQUE INDEX IF NOT EXISTS unique_timestamp ON core_snapshot (timestamp);",
-            model_name='archiveresult',
+                    reverse_sql="DROP INDEX IF EXISTS unique_timestamp;",
-            name='output_mimetypes',
+                ),
-            field=models.CharField(blank=True, default='', help_text='CSV of mimetypes sorted by size', max_length=512),
+            ],
        ),
        migrations.AlterField(
            model_name='archiveresult',
            name='output_size',
            field=models.BigIntegerField(default=0, help_text='Total bytes of all output files'),
        ),
        migrations.AlterField(
            model_name='archiveresult',
            name='output_str',
            field=models.TextField(blank=True, default='', help_text='Human-readable output summary'),
        ),
        migrations.AlterField(
            model_name='archiveresult',
            name='uuid',
            field=models.UUIDField(blank=True, db_index=True, default=uuid_compat.uuid7, null=True),
        ),
        migrations.AddConstraint(
            model_name='snapshot',
            constraint=models.UniqueConstraint(fields=('timestamp',), name='unique_timestamp'),
        ),
    ]
--- a/archivebox/core/migrations/0033_rename_extractor_add_hook_name.py
+++ b/archivebox/core/migrations/0033_rename_extractor_add_hook_name.py
@@ -10,20 +10,35 @@ class Migration(migrations.Migration):
    ]
    operations = [
-        migrations.RenameField(
+        # Use SeparateDatabaseAndState to avoid table rebuilds that would re-add CHECK constraints
-            model_name='archiveresult',
+        migrations.SeparateDatabaseAndState(
-            old_name='extractor',
+            state_operations=[
-            new_name='plugin',
+                migrations.RenameField(
-        ),
+                    model_name='archiveresult',
-        migrations.AddField(
+                    old_name='extractor',
-            model_name='archiveresult',
+                    new_name='plugin',
-            name='hook_name',
+                ),
-            field=models.CharField(
+                migrations.AddField(
-                blank=True,
+                    model_name='archiveresult',
-                default='',
+                    name='hook_name',
-                max_length=255,
+                    field=models.CharField(
-                db_index=True,
+                        blank=True,
-                help_text='Full filename of the hook that executed (e.g., on_Snapshot__50_wget.py)'
+                        default='',
-            ),
+                        max_length=255,
                        db_index=True,
                        help_text='Full filename of the hook that executed (e.g., on_Snapshot__50_wget.py)'
                    ),
                ),
            ],
            database_operations=[
                migrations.RunSQL(
                    sql="""
                        ALTER TABLE core_archiveresult RENAME COLUMN extractor TO plugin;
                        ALTER TABLE core_archiveresult ADD COLUMN hook_name VARCHAR(255) DEFAULT '' NOT NULL;
                        CREATE INDEX IF NOT EXISTS core_archiveresult_hook_name_idx ON core_archiveresult (hook_name);
                    """,
                    reverse_sql=migrations.RunSQL.noop,
                ),
            ],
        ),
    ]
--- a/archivebox/core/migrations/0034_snapshot_current_step.py
+++ b/archivebox/core/migrations/0034_snapshot_current_step.py
@@ -11,13 +11,27 @@ class Migration(migrations.Migration):
    ]
    operations = [
-        migrations.AddField(
+        # Use SeparateDatabaseAndState to avoid table rebuild that would fail on config NOT NULL constraint
-            model_name='snapshot',
+        migrations.SeparateDatabaseAndState(
-            name='current_step',
+            state_operations=[
-            field=models.PositiveSmallIntegerField(
+                migrations.AddField(
-                default=0,
+                    model_name='snapshot',
-                db_index=True,
+                    name='current_step',
-                help_text='Current hook step being executed (0-9). Used for sequential hook execution.'
+                    field=models.PositiveSmallIntegerField(
-            ),
+                        default=0,
                        db_index=True,
                        help_text='Current hook step being executed (0-9). Used for sequential hook execution.'
                    ),
                ),
            ],
            database_operations=[
                migrations.RunSQL(
                    sql="""
                        ALTER TABLE core_snapshot ADD COLUMN current_step SMALLINT UNSIGNED DEFAULT 0 NOT NULL;
                        CREATE INDEX IF NOT EXISTS core_snapshot_current_step_idx ON core_snapshot (current_step);
                    """,
                    reverse_sql=migrations.RunSQL.noop,
                ),
            ],
        ),
    ]
--- a/archivebox/core/migrations/0035_snapshot_crawl_non_nullable_remove_created_by.py
+++ b/archivebox/core/migrations/0035_snapshot_crawl_non_nullable_remove_created_by.py
@@ -54,7 +54,7 @@ class Migration(migrations.Migration):
    dependencies = [
        ('core', '0034_snapshot_current_step'),
-        ('crawls', '0004_alter_crawl_output_dir'),
+        ('crawls', '0005_drop_seed_id_column'),
    ]
    operations = [
@@ -64,16 +64,24 @@ class Migration(migrations.Migration):
            reverse_code=migrations.RunPython.noop,
        ),
-        # Step 2: Make crawl non-nullable
+        # Step 2 & 3: Update Django's state only - leave created_by_id column in database (unused but harmless)
-        migrations.AlterField(
+        migrations.SeparateDatabaseAndState(
-            model_name='snapshot',
+            state_operations=[
-            name='crawl',
+                # Make crawl non-nullable
-            field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to='crawls.crawl'),
+                migrations.AlterField(
-        ),
+                    model_name='snapshot',
-
+                    name='crawl',
-        # Step 3: Remove created_by field
+                    field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to='crawls.crawl'),
-        migrations.RemoveField(
+                ),
-            model_name='snapshot',
+                # Remove created_by field from Django's state
-            name='created_by',
+                migrations.RemoveField(
                    model_name='snapshot',
                    name='created_by',
                ),
            ],
            database_operations=[
                # No database changes - crawl_id already exists and NOT NULL constraint will be enforced by model
                # created_by_id column remains in database but is unused
            ],
        ),
    ]
--- a/archivebox/core/migrations/0036_remove_archiveresult_created_by.py
+++ b/archivebox/core/migrations/0036_remove_archiveresult_created_by.py
@@ -10,10 +10,18 @@ class Migration(migrations.Migration):
    ]
    operations = [
-        # Remove created_by field from ArchiveResult
+        # Remove created_by field from ArchiveResult (state only)
        # No data migration needed - created_by can be accessed via snapshot.crawl.created_by
-        migrations.RemoveField(
+        # Leave created_by_id column in database (unused but harmless, avoids table rebuild)
-            model_name='archiveresult',
+        migrations.SeparateDatabaseAndState(
-            name='created_by',
+            state_operations=[
                migrations.RemoveField(
                    model_name='archiveresult',
                    name='created_by',
                ),
            ],
            database_operations=[
                # No database changes - leave created_by_id column in place to avoid table rebuild
            ],
        ),
    ]
--- a/archivebox/core/migrations/0037_remove_archiveresult_output_dir_and_more.py
+++ b/archivebox/core/migrations/0037_remove_archiveresult_output_dir_and_more.py
@@ -0,0 +1,44 @@
 # Generated by Django 6.0 on 2025-12-29 06:45
 from django.db import migrations, models
 class Migration(migrations.Migration):
    dependencies = [
        ('core', '0036_remove_archiveresult_created_by'),
    ]
    operations = [
        # Update Django's state only - database columns remain for backwards compat
        migrations.SeparateDatabaseAndState(
            state_operations=[
                migrations.RemoveField(
                    model_name='archiveresult',
                    name='output_dir',
                ),
                migrations.RemoveField(
                    model_name='snapshot',
                    name='output_dir',
                ),
                migrations.AlterField(
                    model_name='archiveresult',
                    name='config',
                    field=models.JSONField(blank=True, default=dict, null=True),
                ),
                migrations.AlterField(
                    model_name='snapshot',
                    name='config',
                    field=models.JSONField(blank=True, default=dict, null=True),
                ),
                migrations.AlterField(
                    model_name='snapshot',
                    name='tags',
                    field=models.ManyToManyField(blank=True, related_name='snapshot_set', through='core.SnapshotTag', through_fields=('snapshot', 'tag'), to='core.tag'),
                ),
            ],
            database_operations=[
                # No database changes - columns remain in place to avoid table rebuilds
            ],
        ),
    ]
--- a/archivebox/core/migrations/0038_fix_missing_columns.py
+++ b/archivebox/core/migrations/0038_fix_missing_columns.py
@@ -0,0 +1,84 @@
 # Add missing columns to ArchiveResult and remove created_by_id from Snapshot
 from django.db import migrations, models, connection
 import django.utils.timezone
 def add_columns_if_not_exist(apps, schema_editor):
    """Add columns to ArchiveResult only if they don't already exist."""
    with connection.cursor() as cursor:
        # Get existing columns
        cursor.execute("PRAGMA table_info(core_archiveresult)")
        existing_columns = {row[1] for row in cursor.fetchall()}
        # Add num_uses_failed if it doesn't exist
        if 'num_uses_failed' not in existing_columns:
            cursor.execute("ALTER TABLE core_archiveresult ADD COLUMN num_uses_failed integer unsigned NOT NULL DEFAULT 0 CHECK (num_uses_failed >= 0)")
        # Add num_uses_succeeded if it doesn't exist
        if 'num_uses_succeeded' not in existing_columns:
            cursor.execute("ALTER TABLE core_archiveresult ADD COLUMN num_uses_succeeded integer unsigned NOT NULL DEFAULT 0 CHECK (num_uses_succeeded >= 0)")
        # Add config if it doesn't exist
        if 'config' not in existing_columns:
            cursor.execute("ALTER TABLE core_archiveresult ADD COLUMN config text NULL")
        # Add retry_at if it doesn't exist
        if 'retry_at' not in existing_columns:
            cursor.execute("ALTER TABLE core_archiveresult ADD COLUMN retry_at datetime NULL")
            cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_retry_at_idx ON core_archiveresult(retry_at)")
 class Migration(migrations.Migration):
    dependencies = [
        ('core', '0037_remove_archiveresult_output_dir_and_more'),
    ]
    operations = [
        # Add missing columns to ArchiveResult
        migrations.SeparateDatabaseAndState(
            state_operations=[
                migrations.AddField(
                    model_name='archiveresult',
                    name='num_uses_failed',
                    field=models.PositiveIntegerField(default=0),
                ),
                migrations.AddField(
                    model_name='archiveresult',
                    name='num_uses_succeeded',
                    field=models.PositiveIntegerField(default=0),
                ),
                migrations.AddField(
                    model_name='archiveresult',
                    name='config',
                    field=models.JSONField(blank=True, default=dict, null=True),
                ),
                migrations.AddField(
                    model_name='archiveresult',
                    name='retry_at',
                    field=models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True),
                ),
            ],
            database_operations=[
                migrations.RunPython(add_columns_if_not_exist, reverse_code=migrations.RunPython.noop),
            ],
        ),
        # Drop created_by_id from Snapshot (database only, already removed from model in 0035)
        migrations.SeparateDatabaseAndState(
            state_operations=[
                # No state changes - field already removed in 0035
            ],
            database_operations=[
                migrations.RunSQL(
                    sql="""
                        -- Drop index first, then column
                        DROP INDEX IF EXISTS core_snapshot_created_by_id_6dbd6149;
                        ALTER TABLE core_snapshot DROP COLUMN created_by_id;
                    """,
                    reverse_sql=migrations.RunSQL.noop,
                ),
            ],
        ),
    ]
--- a/archivebox/core/migrations/0039_fix_num_uses_values.py
+++ b/archivebox/core/migrations/0039_fix_num_uses_values.py
@@ -0,0 +1,30 @@
 # Fix num_uses_failed and num_uses_succeeded string values to integers
 from django.db import migrations
 class Migration(migrations.Migration):
    dependencies = [
        ('core', '0038_fix_missing_columns'),
    ]
    operations = [
        # Fix string values that got inserted as literals instead of integers
        migrations.RunSQL(
            sql="""
                UPDATE core_snapshot
                SET num_uses_failed = 0
                WHERE typeof(num_uses_failed) = 'text' OR num_uses_failed = 'num_uses_failed';
                UPDATE core_snapshot
                SET num_uses_succeeded = 0
                WHERE typeof(num_uses_succeeded) = 'text' OR num_uses_succeeded = 'num_uses_succeeded';
                UPDATE core_snapshot
                SET depth = 0
                WHERE typeof(depth) = 'text' OR depth = 'depth';
            """,
            reverse_sql=migrations.RunSQL.noop,
        ),
    ]
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -911,7 +911,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
        )
        merged = 0
-        for dup in duplicates.iterator():
+        for dup in duplicates.iterator(chunk_size=500):
            snapshots = list(
                cls.objects
                .filter(url=dup['url'], timestamp=dup['timestamp'])
--- a/archivebox/core/models.py.bak
+++ b/archivebox/core/models.py.bak
--- a/archivebox/core/templatetags/core_tags.py
+++ b/archivebox/core/templatetags/core_tags.py
@@ -91,7 +91,11 @@ def plugin_thumbnail(context, result) -> str:
            'output_path': output_path,
            'plugin': plugin,
        })
-        return mark_safe(tpl.render(ctx))
+        rendered = tpl.render(ctx)
        # Only return non-empty content (strip whitespace to check)
        if rendered.strip():
            return mark_safe(rendered)
        return ''
    except Exception:
        return ''
@@ -119,7 +123,11 @@ def plugin_embed(context, result) -> str:
            'output_path': output_path,
            'plugin': plugin,
        })
-        return mark_safe(tpl.render(ctx))
+        rendered = tpl.render(ctx)
        # Only return non-empty content (strip whitespace to check)
        if rendered.strip():
            return mark_safe(rendered)
        return ''
    except Exception:
        return ''
@@ -147,7 +155,11 @@ def plugin_fullscreen(context, result) -> str:
            'output_path': output_path,
            'plugin': plugin,
        })
-        return mark_safe(tpl.render(ctx))
+        rendered = tpl.render(ctx)
        # Only return non-empty content (strip whitespace to check)
        if rendered.strip():
            return mark_safe(rendered)
        return ''
    except Exception:
        return ''
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -539,7 +539,7 @@ from django.http import JsonResponse
 def live_progress_view(request):
    """Simple JSON endpoint for live progress status - used by admin progress monitor."""
    try:
-        from workers.orchestrator import Orchestrator
+        from archivebox.workers.orchestrator import Orchestrator
        from archivebox.crawls.models import Crawl
        from archivebox.core.models import Snapshot, ArchiveResult
        from django.db.models import Case, When, Value, IntegerField
--- a/archivebox/crawls/apps.py
+++ b/archivebox/crawls/apps.py
@@ -4,3 +4,8 @@ from django.apps import AppConfig
 class CrawlsConfig(AppConfig):
    default_auto_field = "django.db.models.BigAutoField"
    name = "archivebox.crawls"
    label = "crawls"
    def ready(self):
        """Import models to register state machines with the registry"""
        from archivebox.crawls.models import CrawlMachine  # noqa: F401
--- a/archivebox/crawls/migrations/0002_drop_seed_model.py
+++ b/archivebox/crawls/migrations/0002_drop_seed_model.py
@@ -17,39 +17,62 @@ class Migration(migrations.Migration):
    ]
    operations = [
-        # Remove the seed foreign key from Crawl
+        # Remove the seed foreign key from Crawl (no-op if already removed by core/0024_d)
-        migrations.RemoveField(
+        migrations.RunPython(
-            model_name='crawl',
+            code=lambda apps, schema_editor: None,
-            name='seed',
+            reverse_code=migrations.RunPython.noop,
        ),
-        # Delete the Seed model entirely
+        # Delete the Seed model entirely (already done)
-        migrations.DeleteModel(
+        migrations.RunPython(
-            name='Seed',
+            code=lambda apps, schema_editor: None,
            reverse_code=migrations.RunPython.noop,
        ),
-        # Update fields to new schema
+        # Drop seed_id column if it exists, then update Django's migration state
-        migrations.AlterField(
+        migrations.SeparateDatabaseAndState(
-            model_name='crawl',
+            state_operations=[
-            name='created_by',
+                # Update fields to new schema
-            field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
+                migrations.AlterField(
-        ),
+                    model_name='crawl',
-        migrations.AlterField(
+                    name='created_by',
-            model_name='crawl',
+                    field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
-            name='id',
+                ),
-            field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
+                migrations.AlterField(
-        ),
+                    model_name='crawl',
-        migrations.AlterField(
+                    name='id',
-            model_name='crawl',
+                    field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
-            name='urls',
+                ),
-            field=models.TextField(help_text='Newline-separated list of URLs to crawl'),
+                migrations.AlterField(
-        ),
+                    model_name='crawl',
-        migrations.AlterField(
+                    name='urls',
-            model_name='crawlschedule',
+                    field=models.TextField(help_text='Newline-separated list of URLs to crawl'),
-            name='created_by',
+                ),
-            field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
+                migrations.AlterField(
-        ),
+                    model_name='crawlschedule',
-        migrations.AlterField(
+                    name='created_by',
-            model_name='crawlschedule',
+                    field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
-            name='id',
+                ),
-            field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
+                migrations.AlterField(
                    model_name='crawlschedule',
                    name='id',
                    field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
                ),
            ],
            database_operations=[
                # Drop seed table and NULL out seed_id FK values
                migrations.RunSQL(
                    sql="""
                        PRAGMA foreign_keys=OFF;
                        -- NULL out seed_id values in crawls_crawl
                        UPDATE crawls_crawl SET seed_id = NULL;
                        -- Drop seed table if it exists
                        DROP TABLE IF EXISTS crawls_seed;
                        PRAGMA foreign_keys=ON;
                    """,
                    reverse_sql=migrations.RunSQL.noop,
                ),
            ],
        ),
    ]
--- a/archivebox/crawls/migrations/0003_alter_crawl_output_dir.py
+++ b/archivebox/crawls/migrations/0003_alter_crawl_output_dir.py
@@ -8,12 +8,21 @@ class Migration(migrations.Migration):
    dependencies = [
        ('crawls', '0002_drop_seed_model'),
        ('core', '0024_d_fix_crawls_config'),  # Depends on config fix
    ]
    operations = [
-        migrations.AlterField(
+        # Update Django's state only to avoid table rebuild that would re-apply old constraints
-            model_name='crawl',
+        migrations.SeparateDatabaseAndState(
-            name='output_dir',
+            state_operations=[
-            field=models.FilePathField(blank=True, default='', path=pathlib.PurePosixPath('/private/tmp/test_archivebox_migrations/archive')),
+                migrations.AlterField(
                    model_name='crawl',
                    name='output_dir',
                    field=models.FilePathField(blank=True, default='', path=pathlib.PurePosixPath('/private/tmp/test_archivebox_migrations/archive')),
                ),
            ],
            database_operations=[
                # No database changes - output_dir type change is cosmetic for Django admin
            ],
        ),
    ]
--- a/archivebox/crawls/migrations/0004_alter_crawl_output_dir.py
+++ b/archivebox/crawls/migrations/0004_alter_crawl_output_dir.py
@@ -11,9 +11,17 @@ class Migration(migrations.Migration):
    ]
    operations = [
-        migrations.AlterField(
+        # Update Django's state only to avoid table rebuild that would re-apply old constraints
-            model_name='crawl',
+        migrations.SeparateDatabaseAndState(
-            name='output_dir',
+            state_operations=[
-            field=models.FilePathField(blank=True, default='', path=pathlib.PurePosixPath('/private/tmp/archivebox-makemigrations/archive')),
+                migrations.AlterField(
                    model_name='crawl',
                    name='output_dir',
                    field=models.FilePathField(blank=True, default='', path=pathlib.PurePosixPath('/private/tmp/archivebox-makemigrations/archive')),
                ),
            ],
            database_operations=[
                # No database changes - output_dir type change is cosmetic for Django admin
            ],
        ),
    ]
--- a/archivebox/crawls/migrations/0005_drop_seed_id_column.py
+++ b/archivebox/crawls/migrations/0005_drop_seed_id_column.py
@@ -0,0 +1,28 @@
 # Drop seed_id column from Django's state (leave in database to avoid FK issues)
 from django.db import migrations
 class Migration(migrations.Migration):
    dependencies = [
        ('crawls', '0004_alter_crawl_output_dir'),
    ]
    operations = [
        # Update Django's state only - leave seed_id column in database (unused but harmless)
        # This avoids FK mismatch errors with crawls_crawlschedule
        migrations.SeparateDatabaseAndState(
            state_operations=[
                # Remove seed field from Django's migration state
                migrations.RemoveField(
                    model_name='crawl',
                    name='seed',
                ),
            ],
            database_operations=[
                # No database changes - seed_id column remains to avoid FK rebuild issues
                # crawls_seed table can be manually dropped by DBA if needed
            ],
        ),
    ]
--- a/archivebox/crawls/migrations/0006_alter_crawl_config_alter_crawl_output_dir_and_more.py
+++ b/archivebox/crawls/migrations/0006_alter_crawl_config_alter_crawl_output_dir_and_more.py
@@ -0,0 +1,35 @@
 # Generated by Django 6.0 on 2025-12-29 06:45
 import pathlib
 from django.db import migrations, models
 class Migration(migrations.Migration):
    dependencies = [
        ('crawls', '0005_drop_seed_id_column'),
    ]
    operations = [
        # Update Django's state only - database already correct
        migrations.SeparateDatabaseAndState(
            state_operations=[
                migrations.AlterField(
                    model_name='crawl',
                    name='config',
                    field=models.JSONField(blank=True, default=dict, null=True),
                ),
                migrations.AlterField(
                    model_name='crawl',
                    name='output_dir',
                    field=models.FilePathField(blank=True, default='', path=pathlib.PurePosixPath('/Users/squash/Local/Code/archiveboxes/archivebox-nue/data/archive')),
                ),
                migrations.DeleteModel(
                    name='Seed',
                ),
            ],
            database_operations=[
                # No database changes - Seed table already dropped in 0005
            ],
        ),
    ]
--- a/archivebox/crawls/models.py
+++ b/archivebox/crawls/models.py
@@ -65,7 +65,7 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
    modified_at = models.DateTimeField(auto_now=True)
    urls = models.TextField(blank=False, null=False, help_text='Newline-separated list of URLs to crawl')
-    config = models.JSONField(default=dict)
+    config = models.JSONField(default=dict, null=True, blank=True)
    max_depth = models.PositiveSmallIntegerField(default=0, validators=[MinValueValidator(0), MaxValueValidator(4)])
    tags_str = models.CharField(max_length=1024, blank=True, null=False, default='')
    persona_id = models.UUIDField(null=True, blank=True)
@@ -77,7 +77,7 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
    status = ModelWithStateMachine.StatusField(choices=ModelWithStateMachine.StatusChoices, default=ModelWithStateMachine.StatusChoices.QUEUED)
    retry_at = ModelWithStateMachine.RetryAtField(default=timezone.now)
-    state_machine_name = 'crawls.models.CrawlMachine'
+    state_machine_name = 'archivebox.crawls.models.CrawlMachine'
    retry_at_field_name = 'retry_at'
    state_field_name = 'status'
    StatusChoices = ModelWithStateMachine.StatusChoices
@@ -190,7 +190,6 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
                'status': Snapshot.INITIAL_STATE,
                'retry_at': timezone.now(),
                'timestamp': str(timezone.now().timestamp()),
                'created_by_id': self.created_by_id,
                'depth': 0,
            },
        )
@@ -290,7 +289,7 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
                    'timestamp': timestamp or str(timezone.now().timestamp()),
                    'status': Snapshot.INITIAL_STATE,
                    'retry_at': timezone.now(),
-                    'created_by_id': self.created_by_id,
+                    # Note: created_by removed in 0.9.0 - Snapshot inherits from Crawl
                }
            )
--- a/archivebox/machine/apps.py
+++ b/archivebox/machine/apps.py
@@ -7,8 +7,13 @@ class MachineConfig(AppConfig):
    default_auto_field = 'django.db.models.BigAutoField'
    name = 'archivebox.machine'
    label = 'machine'  # Explicit label for migrations
    verbose_name = 'Machine Info'
    def ready(self):
        """Import models to register state machines with the registry"""
        from archivebox.machine import models  # noqa: F401
 def register_admin(admin_site):
    from archivebox.machine.admin import register_admin
--- a/archivebox/machine/migrations/0001_squashed.py
+++ b/archivebox/machine/migrations/0001_squashed.py
@@ -85,6 +85,12 @@ class Migration(migrations.Migration):
                ('version', models.CharField(blank=True, default=None, max_length=32)),
                ('sha256', models.CharField(blank=True, default=None, max_length=64)),
                ('machine', models.ForeignKey(blank=True, default=None, on_delete=django.db.models.deletion.CASCADE, to='machine.machine')),
                # Fields added in migration 0005 (included here for fresh installs)
                ('binproviders', models.CharField(blank=True, default='env', max_length=127)),
                ('output_dir', models.CharField(blank=True, default='', max_length=255)),
                ('overrides', models.JSONField(blank=True, default=dict)),
                ('retry_at', models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True)),
                ('status', models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('succeeded', 'Succeeded'), ('failed', 'Failed')], db_index=True, default='queued', max_length=16)),
                # dependency FK removed - Dependency model deleted
            ],
            options={
--- a/archivebox/machine/migrations/0005_binary_binproviders_binary_output_dir_and_more.py
+++ b/archivebox/machine/migrations/0005_binary_binproviders_binary_output_dir_and_more.py
@@ -0,0 +1,104 @@
 # Generated by Django 6.0 on 2025-12-29 06:45
 import django.db.models.deletion
 import django.utils.timezone
 from archivebox.uuid_compat import uuid7
 from django.db import migrations, models
 class Migration(migrations.Migration):
    dependencies = [
        ('machine', '0004_drop_dependency_table'),
    ]
    operations = [
        # Update Django's state only - database already has correct schema
        migrations.SeparateDatabaseAndState(
            state_operations=[
                migrations.AddField(
                    model_name='binary',
                    name='binproviders',
                    field=models.CharField(blank=True, default='env', help_text='Comma-separated list of allowed providers: apt,brew,pip,npm,env', max_length=127),
                ),
                migrations.AddField(
                    model_name='binary',
                    name='output_dir',
                    field=models.CharField(blank=True, default='', help_text='Directory where installation hook logs are stored', max_length=255),
                ),
                migrations.AddField(
                    model_name='binary',
                    name='overrides',
                    field=models.JSONField(blank=True, default=dict, help_text="Provider-specific overrides: {'apt': {'packages': ['pkg']}, ...}"),
                ),
                migrations.AddField(
                    model_name='binary',
                    name='retry_at',
                    field=models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, help_text='When to retry this binary installation', null=True),
                ),
                migrations.AddField(
                    model_name='binary',
                    name='status',
                    field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('succeeded', 'Succeeded'), ('failed', 'Failed')], db_index=True, default='queued', max_length=16),
                ),
                migrations.AlterField(
                    model_name='binary',
                    name='abspath',
                    field=models.CharField(blank=True, default='', max_length=255),
                ),
                migrations.AlterField(
                    model_name='binary',
                    name='binprovider',
                    field=models.CharField(blank=True, default='', help_text='Provider that successfully installed this binary', max_length=31),
                ),
                migrations.AlterField(
                    model_name='binary',
                    name='id',
                    field=models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True),
                ),
                migrations.AlterField(
                    model_name='binary',
                    name='machine',
                    field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='machine.machine'),
                ),
                migrations.AlterField(
                    model_name='binary',
                    name='name',
                    field=models.CharField(blank=True, db_index=True, default='', max_length=63),
                ),
                migrations.AlterField(
                    model_name='binary',
                    name='sha256',
                    field=models.CharField(blank=True, default='', max_length=64),
                ),
                migrations.AlterField(
                    model_name='binary',
                    name='version',
                    field=models.CharField(blank=True, default='', max_length=32),
                ),
                migrations.AlterField(
                    model_name='machine',
                    name='config',
                    field=models.JSONField(blank=True, default=dict, help_text='Machine-specific config overrides (e.g., resolved binary paths like WGET_BINARY)', null=True),
                ),
                migrations.AlterField(
                    model_name='machine',
                    name='id',
                    field=models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True),
                ),
                migrations.AlterField(
                    model_name='machine',
                    name='stats',
                    field=models.JSONField(blank=True, default=dict, null=True),
                ),
                migrations.AlterField(
                    model_name='networkinterface',
                    name='id',
                    field=models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True),
                ),
            ],
            database_operations=[
                # No database changes - schema already correct from previous migrations
            ],
        ),
    ]
--- a/archivebox/machine/models.py
+++ b/archivebox/machine/models.py
@@ -44,8 +44,8 @@ class Machine(ModelWithHealthStats):
    os_platform = models.CharField(max_length=63, default=None, null=False)
    os_release = models.CharField(max_length=63, default=None, null=False)
    os_kernel = models.CharField(max_length=255, default=None, null=False)
-    stats = models.JSONField(default=dict, null=False)
+    stats = models.JSONField(default=dict, null=True, blank=True)
-    config = models.JSONField(default=dict, null=False, blank=True,
+    config = models.JSONField(default=dict, null=True, blank=True,
        help_text="Machine-specific config overrides (e.g., resolved binary paths like WGET_BINARY)")
    num_uses_failed = models.PositiveIntegerField(default=0)
    num_uses_succeeded = models.PositiveIntegerField(default=0)
@@ -213,7 +213,7 @@ class Binary(ModelWithHealthStats):
    num_uses_failed = models.PositiveIntegerField(default=0)
    num_uses_succeeded = models.PositiveIntegerField(default=0)
-    state_machine_name: str = 'machine.models.BinaryMachine'
+    state_machine_name: str = 'archivebox.machine.models.BinaryMachine'
    objects: BinaryManager = BinaryManager()
--- a/archivebox/personas/apps.py
+++ b/archivebox/personas/apps.py
@@ -4,3 +4,4 @@ from django.apps import AppConfig
 class SessionsConfig(AppConfig):
    default_auto_field = "django.db.models.BigAutoField"
    name = "archivebox.personas"
    label = "personas"
--- a/archivebox/personas/models.py
+++ b/archivebox/personas/models.py
@@ -21,7 +21,7 @@
 #     #    COOKIES_TXT_FILE: '/path/to/cookies.txt',
 #     #    CHROME_USER_DATA_DIR: '/path/to/chrome/user/data/dir',
 #     #    CHECK_SSL_VALIDITY: False,
-#     #    SAVE_ARCHIVE_DOT_ORG: True,
+#     #    SAVE_ARCHIVEDOTORG: True,
 #     #    CHROME_BINARY: 'chromium'
 #     #    ...
 #     # }
--- a/archivebox/plugins/search_backend_ripgrep/tests/test_ripgrep_detection.py
+++ b/archivebox/plugins/search_backend_ripgrep/tests/test_ripgrep_detection.py
@@ -63,7 +63,7 @@ def test_ripgrep_hook_detects_binary_from_path():
 def test_ripgrep_hook_skips_when_backend_not_ripgrep():
    """Test that ripgrep hook exits silently when search backend is not ripgrep."""
-    hook_path = Path(__file__).parent.parent / 'on_Crawl__00_validate_ripgrep.py'
+    hook_path = Path(__file__).parent.parent / 'on_Crawl__00_install_ripgrep.py'
    env = os.environ.copy()
    env['SEARCH_BACKEND_ENGINE'] = 'sqlite'  # Different backend
@@ -82,7 +82,7 @@ def test_ripgrep_hook_skips_when_backend_not_ripgrep():
 def test_ripgrep_hook_handles_absolute_path():
    """Test that ripgrep hook works when RIPGREP_BINARY is an absolute path."""
-    hook_path = Path(__file__).parent.parent / 'on_Crawl__00_validate_ripgrep.py'
+    hook_path = Path(__file__).parent.parent / 'on_Crawl__00_install_ripgrep.py'
    rg_path = shutil.which('rg')
    if not rg_path:
@@ -222,7 +222,7 @@ def test_ripgrep_only_detected_when_backend_enabled():
    if not shutil.which('rg'):
        pytest.skip("ripgrep not installed")
-    hook_path = Path(__file__).parent.parent / 'on_Crawl__00_validate_ripgrep.py'
+    hook_path = Path(__file__).parent.parent / 'on_Crawl__00_install_ripgrep.py'
    # Test 1: With ripgrep backend - should output Binary record
    env1 = os.environ.copy()
--- a/archivebox/templates/core/snapshot.html
+++ b/archivebox/templates/core/snapshot.html
@@ -360,9 +360,11 @@
                <div class="row header-bottom-frames">
                    {% for result_info in archiveresults %}
                        {% if result_info.result %}
                            {% plugin_thumbnail result_info.result as thumbnail_html %}
                            {% if thumbnail_html %}
                            <div class="col-lg-2">
                                <div class="card{% if forloop.first %} selected-card{% endif %}">
-                                    {% plugin_thumbnail result_info.result %}
+                                    {{ thumbnail_html }}
                                    <div class="card-body">
                                        <a href="{{ result_info.path }}" title="Open in new tab..." target="_blank" rel="noopener">
                                            <p class="card-text"><code>{{ result_info.path }}</code></p>
@@ -373,6 +375,7 @@
                                    </div>
                                </div>
                            </div>
                            {% endif %}
                        {% endif %}
                    {% endfor %}
@@ -395,7 +398,7 @@
                </div>
            </div>
        </header>
-        <iframe sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" class="full-page-iframe" src="{{singlefile_path}}" name="preview"></iframe>
+        <iframe sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" class="full-page-iframe" src="{{best_preview_path}}" name="preview"></iframe>
        <script>
            /*! jQuery v3.2.1 -ajax,-ajax/jsonp,-ajax/load,-ajax/parseXML,-ajax/script,-ajax/var/location,-ajax/var/nonce,-ajax/var/rquery,-ajax/xhr,-manipulation/_evalUrl,-event/ajax,-effects,-effects/Tween,-effects/animatedSelector | (c) JS Foundation and other contributors | jquery.org/license */
--- a/archivebox/tests/test_hooks.py
+++ b/archivebox/tests/test_hooks.py
@@ -429,19 +429,6 @@ class TestInstallHookOutput(unittest.TestCase):
        self.assertEqual(data['name'], 'wget')
        self.assertTrue(data['abspath'].startswith('/'))
    def test_install_hook_outputs_dependency(self):
        """Install hook should output Dependency JSONL when binary not found."""
        hook_output = json.dumps({
            'type': 'Dependency',
            'bin_name': 'wget',
            'bin_providers': 'apt,brew,env',
        })
        data = json.loads(hook_output)
        self.assertEqual(data['type'], 'Dependency')
        self.assertEqual(data['bin_name'], 'wget')
        self.assertIn('apt', data['bin_providers'])
    def test_install_hook_outputs_machine_config(self):
        """Install hook should output Machine config update JSONL."""
        hook_output = json.dumps({
--- a/archivebox/tests/test_migrations_08_to_09.py
+++ b/archivebox/tests/test_migrations_08_to_09.py
@@ -459,7 +459,7 @@ class TestFilesystemMigration08to09(unittest.TestCase):
                    'SAVE_MERCURY': 'True',
                    'SAVE_PDF': 'True',
                    'SAVE_MEDIA': 'True',
-                    'SAVE_ARCHIVE_DOT_ORG': 'True',
+                    'SAVE_ARCHIVEDOTORG': 'True',
                    'SAVE_HEADERS': 'True',
                    'SAVE_HTMLTOTEXT': 'True',
                    'SAVE_GIT': 'True',
--- a/archivebox/tests/test_migrations_helpers.py
+++ b/archivebox/tests/test_migrations_helpers.py
@@ -949,19 +949,30 @@ def seed_0_8_data(db_path: Path) -> Dict[str, List[Dict]]:
        ('core', '0072_rename_added_snapshot_bookmarked_at_and_more'),
        ('core', '0073_rename_created_archiveresult_created_at_and_more'),
        ('core', '0074_alter_snapshot_downloaded_at'),
-        ('core', '0023_new_schema'),
+        # For 0.8.x: DO NOT record 0023_new_schema - it replaces 0023-0074 for fresh installs
        # We already recorded 0023-0074 above, so Django will know the state
        # For 0.8.x: Record original machine migrations (before squashing)
        # DO NOT record 0001_squashed here - it replaces 0001-0004 for fresh installs
        ('machine', '0001_initial'),
        ('machine', '0002_alter_machine_stats_installedbinary'),
        ('machine', '0003_alter_installedbinary_options_and_more'),
        ('machine', '0004_alter_installedbinary_abspath_and_more'),
-        ('machine', '0001_squashed'),
+        # Then the new migrations after squashing
        ('machine', '0002_rename_custom_cmds_to_overrides'),
        ('machine', '0003_alter_dependency_id_alter_installedbinary_dependency_and_more'),
        ('machine', '0004_drop_dependency_table'),
        # Crawls must come before core.0024 because 0024_b depends on it
        ('crawls', '0001_initial'),
        # Core 0024 migrations chain (in dependency order)
        ('core', '0024_b_clear_config_fields'),
        ('core', '0024_c_disable_fk_checks'),
        ('core', '0024_d_fix_crawls_config'),
        ('core', '0024_snapshot_crawl'),
        ('core', '0024_f_add_snapshot_config'),
        ('core', '0025_allow_duplicate_urls_per_crawl'),
        # For 0.8.x: Record original api migration (before squashing)
        # DO NOT record 0001_squashed here - it replaces 0001 for fresh installs
        ('api', '0001_initial'),
        ('api', '0001_squashed'),
        ('api', '0002_alter_apitoken_options'),
        ('api', '0003_rename_user_apitoken_created_by_apitoken_abid_and_more'),
        ('api', '0004_alter_apitoken_id_alter_apitoken_uuid'),
@@ -970,11 +981,9 @@ def seed_0_8_data(db_path: Path) -> Dict[str, List[Dict]]:
        ('api', '0007_alter_apitoken_created_by'),
        ('api', '0008_alter_apitoken_created_alter_apitoken_created_by_and_more'),
        ('api', '0009_rename_created_apitoken_created_at_and_more'),
-        ('crawls', '0001_initial'),
+        # Note: crawls.0001_initial moved earlier (before core.0024) due to dependencies
-        ('crawls', '0002_drop_seed_model'),
+        # Stop here - 0.8.x ends at core.0025, crawls.0001, and we want to TEST the later migrations
-        ('crawls', '0003_alter_crawl_output_dir'),
+        # Do NOT record 0026+ as they need to be tested during migration
        ('crawls', '0004_alter_crawl_output_dir'),
        ('core', '0035_snapshot_crawl_non_nullable_remove_created_by'),
    ]
    for app, name in migrations:
@@ -1000,7 +1009,7 @@ def run_archivebox(data_dir: Path, args: list, timeout: int = 60, env: dict = No
    base_env['USE_COLOR'] = 'False'
    base_env['SHOW_PROGRESS'] = 'False'
    # Disable ALL extractors for faster tests (can be overridden by env parameter)
-    base_env['SAVE_ARCHIVE_DOT_ORG'] = 'False'
+    base_env['SAVE_ARCHIVEDOTORG'] = 'False'
    base_env['SAVE_TITLE'] = 'False'
    base_env['SAVE_FAVICON'] = 'False'
    base_env['SAVE_WGET'] = 'False'
--- a/archivebox/workers/apps.py
+++ b/archivebox/workers/apps.py
@@ -4,4 +4,5 @@ from django.apps import AppConfig
 class WorkersConfig(AppConfig):
    default_auto_field = 'django.db.models.BigAutoField'
    name = 'archivebox.workers'
    label = 'workers'
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,7 +2,7 @@
 #     mkdir -p ~/archivebox/data && cd ~/archivebox
 #     curl -fsSL 'https://docker-compose.archivebox.io' > docker-compose.yml
 #     docker compose run archivebox version
-#     docker compose run archivebox config --set SAVE_ARCHIVE_DOT_ORG=False
+#     docker compose run archivebox config --set SAVE_ARCHIVEDOTORG=False
 #     docker compose run archivebox add --depth=1 'https://news.ycombinator.com'
 #     docker compose run -T archivebox add < bookmarks.txt
 #     docker compose up -d && open 'https://localhost:8000'
@@ -35,7 +35,7 @@ services:
            # - MEDIA_MAX_SIZE=750m             # increase this filesize limit to allow archiving larger audio/video files
            # - TIMEOUT=60                      # increase this number to 120+ seconds if you see many slow downloads timing out
            # - CHECK_SSL_VALIDITY=True         # set to False to disable strict SSL checking (allows saving URLs w/ broken certs)
-            # - SAVE_ARCHIVE_DOT_ORG=True       # set to False to disable submitting all URLs to Archive.org when archiving
+            # - SAVE_ARCHIVEDOTORG=True       # set to False to disable submitting all URLs to Archive.org when archiving
            # - USER_AGENT="..."                # set a custom USER_AGENT to avoid being blocked as a bot
            # ...
            # For more info, see: https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#configuration
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -85,9 +85,9 @@ dependencies = [
    ### Binary/Package Management
    "abx-pkg>=0.1.0",        # for: detecting, versioning, and installing binaries via apt/brew/pip/npm
    "gallery-dl>=1.31.1",
    ### UUID7 backport for Python <3.14
    "uuid7>=0.1.0; python_version < '3.14'",  # for: uuid7 support on Python 3.13 (provides uuid_extensions module)
    "pytest-django>=4.11.1",
 ]
 [project.optional-dependencies]
@@ -183,6 +183,7 @@ ignore = ["E731", "E303", "E266", "E241", "E222"]
 [tool.pytest.ini_options]
 testpaths = [ "tests" ]
 DJANGO_SETTINGS_MODULE = "archivebox.core.settings"
 [tool.mypy]
 mypy_path = "archivebox,archivebox/typings"
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -24,7 +24,7 @@ def disable_extractors_dict():
        "SAVE_HEADERS": "false",
        "USE_GIT": "false",
        "SAVE_MEDIA": "false",
-        "SAVE_ARCHIVE_DOT_ORG": "false",
+        "SAVE_ARCHIVEDOTORG": "false",
        "SAVE_TITLE": "false",
        "SAVE_FAVICON": "false",
    })
--- a/tests/test_recursive_crawl.py
+++ b/tests/test_recursive_crawl.py
@@ -33,7 +33,7 @@ def test_background_hooks_dont_block_parser_extractors(tmp_path, process):
        "SAVE_HEADERS": "false",
        "USE_GIT": "false",
        "SAVE_MEDIA": "false",
-        "SAVE_ARCHIVE_DOT_ORG": "false",
+        "SAVE_ARCHIVEDOTORG": "false",
        "SAVE_TITLE": "false",
        "SAVE_FAVICON": "false",
        # Enable chrome session (required for background hooks to start)
@@ -133,7 +133,7 @@ def test_parser_extractors_emit_snapshot_jsonl(tmp_path, process):
        "SAVE_HEADERS": "false",
        "USE_GIT": "false",
        "SAVE_MEDIA": "false",
-        "SAVE_ARCHIVE_DOT_ORG": "false",
+        "SAVE_ARCHIVEDOTORG": "false",
        "SAVE_TITLE": "false",
        "SAVE_FAVICON": "false",
        "USE_CHROME": "false",
--- a/uv.lock
+++ b/uv.lock
@@ -88,6 +88,7 @@ dependencies = [
    { name = "py-machineid", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pydantic", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pydantic-settings", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pytest-django", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "python-benedict", extra = ["io", "parse"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "python-crontab", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "python-statemachine", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -186,6 +187,7 @@ requires-dist = [
    { name = "py-machineid", specifier = ">=0.6.0" },
    { name = "pydantic", specifier = ">=2.8.0" },
    { name = "pydantic-settings", specifier = ">=2.5.2" },
    { name = "pytest-django", specifier = ">=4.11.1" },
    { name = "python-benedict", extras = ["io", "parse"], specifier = ">=0.33.2" },
    { name = "python-crontab", specifier = ">=3.2.0" },
    { name = "python-ldap", marker = "extra == 'ldap'", specifier = ">=3.4.3" },
@@ -1848,6 +1850,18 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" },
 ]
 [[package]]
 name = "pytest-django"
 version = "4.11.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "pytest", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/b1/fb/55d580352db26eb3d59ad50c64321ddfe228d3d8ac107db05387a2fadf3a/pytest_django-4.11.1.tar.gz", hash = "sha256:a949141a1ee103cb0e7a20f1451d355f83f5e4a5d07bdd4dcfdd1fd0ff227991", size = 86202, upload-time = "2025-04-03T18:56:09.338Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/be/ac/bd0608d229ec808e51a21044f3f2f27b9a37e7a0ebaca7247882e67876af/pytest_django-4.11.1-py3-none-any.whl", hash = "sha256:1b63773f648aa3d8541000c26929c1ea63934be1cfa674c76436966d73fe6a10", size = 25281, upload-time = "2025-04-03T18:56:07.678Z" },
 ]
 [[package]]
 name = "python-benedict"
 version = "0.35.0"