much better tests and add page ui

2026-01-04 18:05:36 +10:00 · 2025-12-29 04:02:11 -08:00
parent 9487f8a0de
commit 30c60eef76
93 changed files with 2998 additions and 2712 deletions
--- a/archivebox/core/migrations/0023_new_schema.py
+++ b/archivebox/core/migrations/0023_new_schema.py
@@ -1,494 +0,0 @@
-# Generated by Django 5.0.6 on 2024-12-25
-# Transforms schema from 0022 to new simplified schema (ABID system removed)
-
-from uuid import uuid4
-from django.conf import settings
-from django.db import migrations, models
-import django.db.models.deletion
-import django.utils.timezone
-
-
-def get_or_create_system_user_pk(apps, schema_editor):
-    """Get or create system user for migrations."""
-    User = apps.get_model('auth', 'User')
-    user, _ = User.objects.get_or_create(
-        username='system',
-        defaults={'is_active': False, 'password': '!'}
-    )
-    return user.pk
-
-
-def populate_created_by_snapshot(apps, schema_editor):
-    """Populate created_by for existing snapshots."""
-    User = apps.get_model('auth', 'User')
-    Snapshot = apps.get_model('core', 'Snapshot')
-
-    system_user, _ = User.objects.get_or_create(
-        username='system',
-        defaults={'is_active': False, 'password': '!'}
-    )
-
-    Snapshot.objects.filter(created_by__isnull=True).update(created_by=system_user)
-
-
-def populate_created_by_archiveresult(apps, schema_editor):
-    """Populate created_by for existing archive results."""
-    User = apps.get_model('auth', 'User')
-    ArchiveResult = apps.get_model('core', 'ArchiveResult')
-
-    system_user, _ = User.objects.get_or_create(
-        username='system',
-        defaults={'is_active': False, 'password': '!'}
-    )
-
-    ArchiveResult.objects.filter(created_by__isnull=True).update(created_by=system_user)
-
-
-def populate_created_by_tag(apps, schema_editor):
-    """Populate created_by for existing tags."""
-    User = apps.get_model('auth', 'User')
-    Tag = apps.get_model('core', 'Tag')
-
-    system_user, _ = User.objects.get_or_create(
-        username='system',
-        defaults={'is_active': False, 'password': '!'}
-    )
-
-    Tag.objects.filter(created_by__isnull=True).update(created_by=system_user)
-
-
-def generate_uuid_for_archiveresults(apps, schema_editor):
-    """Generate UUIDs for archive results that don't have them."""
-    ArchiveResult = apps.get_model('core', 'ArchiveResult')
-    for ar in ArchiveResult.objects.filter(uuid__isnull=True).iterator(chunk_size=500):
-        ar.uuid = uuid4()
-        ar.save(update_fields=['uuid'])
-
-
-def generate_uuid_for_tags(apps, schema_editor):
-    """Generate UUIDs for tags that don't have them."""
-    Tag = apps.get_model('core', 'Tag')
-    for tag in Tag.objects.filter(uuid__isnull=True).iterator(chunk_size=500):
-        tag.uuid = uuid4()
-        tag.save(update_fields=['uuid'])
-
-
-def copy_bookmarked_at_from_added(apps, schema_editor):
-    """Copy added timestamp to bookmarked_at."""
-    Snapshot = apps.get_model('core', 'Snapshot')
-    Snapshot.objects.filter(bookmarked_at__isnull=True).update(
-        bookmarked_at=models.F('added')
-    )
-
-
-def copy_created_at_from_added(apps, schema_editor):
-    """Copy added timestamp to created_at for snapshots."""
-    Snapshot = apps.get_model('core', 'Snapshot')
-    Snapshot.objects.filter(created_at__isnull=True).update(
-        created_at=models.F('added')
-    )
-
-
-def copy_created_at_from_start_ts(apps, schema_editor):
-    """Copy start_ts to created_at for archive results."""
-    ArchiveResult = apps.get_model('core', 'ArchiveResult')
-    ArchiveResult.objects.filter(created_at__isnull=True).update(
-        created_at=models.F('start_ts')
-    )
-
-
-class Migration(migrations.Migration):
-    """
-    This migration transforms the schema from the main branch (0022) to the new
-    simplified schema without the ABID system.
-
-    For dev branch users who had ABID migrations (0023-0074), this replaces them
-    with a clean transformation.
-    """
-
-    replaces = [
-        ('core', '0023_alter_archiveresult_options_archiveresult_abid_and_more'),
-        ('core', '0024_auto_20240513_1143'),
-        ('core', '0025_alter_archiveresult_uuid'),
-        ('core', '0026_archiveresult_created_archiveresult_created_by_and_more'),
-        ('core', '0027_update_snapshot_ids'),
-        ('core', '0028_alter_archiveresult_uuid'),
-        ('core', '0029_alter_archiveresult_id'),
-        ('core', '0030_alter_archiveresult_uuid'),
-        ('core', '0031_alter_archiveresult_id_alter_archiveresult_uuid_and_more'),
-        ('core', '0032_alter_archiveresult_id'),
-        ('core', '0033_rename_id_archiveresult_old_id'),
-        ('core', '0034_alter_archiveresult_old_id_alter_archiveresult_uuid'),
-        ('core', '0035_remove_archiveresult_uuid_archiveresult_id'),
-        ('core', '0036_alter_archiveresult_id_alter_archiveresult_old_id'),
-        ('core', '0037_rename_id_snapshot_old_id'),
-        ('core', '0038_rename_uuid_snapshot_id'),
-        ('core', '0039_rename_snapshot_archiveresult_snapshot_old'),
-        ('core', '0040_archiveresult_snapshot'),
-        ('core', '0041_alter_archiveresult_snapshot_and_more'),
-        ('core', '0042_remove_archiveresult_snapshot_old'),
-        ('core', '0043_alter_archiveresult_snapshot_alter_snapshot_id_and_more'),
-        ('core', '0044_alter_archiveresult_snapshot_alter_tag_uuid_and_more'),
-        ('core', '0045_alter_snapshot_old_id'),
-        ('core', '0046_alter_archiveresult_snapshot_alter_snapshot_id_and_more'),
-        ('core', '0047_alter_snapshottag_unique_together_and_more'),
-        ('core', '0048_alter_archiveresult_snapshot_and_more'),
-        ('core', '0049_rename_snapshot_snapshottag_snapshot_old_and_more'),
-        ('core', '0050_alter_snapshottag_snapshot_old'),
-        ('core', '0051_snapshottag_snapshot_alter_snapshottag_snapshot_old'),
-        ('core', '0052_alter_snapshottag_unique_together_and_more'),
-        ('core', '0053_remove_snapshottag_snapshot_old'),
-        ('core', '0054_alter_snapshot_timestamp'),
-        ('core', '0055_alter_tag_slug'),
-        ('core', '0056_remove_tag_uuid'),
-        ('core', '0057_rename_id_tag_old_id'),
-        ('core', '0058_alter_tag_old_id'),
-        ('core', '0059_tag_id'),
-        ('core', '0060_alter_tag_id'),
-        ('core', '0061_rename_tag_snapshottag_old_tag_and_more'),
-        ('core', '0062_alter_snapshottag_old_tag'),
-        ('core', '0063_snapshottag_tag_alter_snapshottag_old_tag'),
-        ('core', '0064_alter_snapshottag_unique_together_and_more'),
-        ('core', '0065_remove_snapshottag_old_tag'),
-        ('core', '0066_alter_snapshottag_tag_alter_tag_id_alter_tag_old_id'),
-        ('core', '0067_alter_snapshottag_tag'),
-        ('core', '0068_alter_archiveresult_options'),
-        ('core', '0069_alter_archiveresult_created_alter_snapshot_added_and_more'),
-        ('core', '0070_alter_archiveresult_created_by_alter_snapshot_added_and_more'),
-        ('core', '0071_remove_archiveresult_old_id_remove_snapshot_old_id_and_more'),
-        ('core', '0072_rename_added_snapshot_bookmarked_at_and_more'),
-        ('core', '0073_rename_created_archiveresult_created_at_and_more'),
-        ('core', '0074_alter_snapshot_downloaded_at'),
-    ]
-
-    dependencies = [
-        ('core', '0022_auto_20231023_2008'),
-        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
-    ]
-
-    operations = [
-        # === SNAPSHOT CHANGES ===
-
-        # Add health stats fields to Snapshot
-        migrations.AddField(
-            model_name='snapshot',
-            name='num_uses_failed',
-            field=models.PositiveIntegerField(default=0),
-        ),
-        migrations.AddField(
-            model_name='snapshot',
-            name='num_uses_succeeded',
-            field=models.PositiveIntegerField(default=0),
-        ),
-
-        # Add new fields to Snapshot
-        migrations.AddField(
-            model_name='snapshot',
-            name='created_by',
-            field=models.ForeignKey(
-                default=None, null=True, blank=True,
-                on_delete=django.db.models.deletion.CASCADE,
-                related_name='snapshot_set',
-                to=settings.AUTH_USER_MODEL,
-            ),
-        ),
-        migrations.AddField(
-            model_name='snapshot',
-            name='created_at',
-            field=models.DateTimeField(default=django.utils.timezone.now, db_index=True, null=True),
-        ),
-        migrations.AddField(
-            model_name='snapshot',
-            name='modified_at',
-            field=models.DateTimeField(auto_now=True),
-        ),
-        migrations.AddField(
-            model_name='snapshot',
-            name='bookmarked_at',
-            field=models.DateTimeField(default=django.utils.timezone.now, db_index=True, null=True),
-        ),
-        migrations.AddField(
-            model_name='snapshot',
-            name='downloaded_at',
-            field=models.DateTimeField(default=None, null=True, blank=True, db_index=True),
-        ),
-        migrations.AddField(
-            model_name='snapshot',
-            name='depth',
-            field=models.PositiveSmallIntegerField(default=0, db_index=True),
-        ),
-        migrations.AddField(
-            model_name='snapshot',
-            name='status',
-            field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('sealed', 'Sealed')], default='queued', max_length=15, db_index=True),
-        ),
-        migrations.AddField(
-            model_name='snapshot',
-            name='retry_at',
-            field=models.DateTimeField(default=django.utils.timezone.now, null=True, blank=True, db_index=True),
-        ),
-        migrations.AddField(
-            model_name='snapshot',
-            name='config',
-            field=models.JSONField(default=dict, blank=False),
-        ),
-        migrations.AddField(
-            model_name='snapshot',
-            name='notes',
-            field=models.TextField(blank=True, default=''),
-        ),
-        migrations.AddField(
-            model_name='snapshot',
-            name='output_dir',
-            field=models.CharField(max_length=256, default=None, null=True, blank=True),
-        ),
-
-        # Copy data from old fields to new
-        migrations.RunPython(copy_bookmarked_at_from_added, migrations.RunPython.noop),
-        migrations.RunPython(copy_created_at_from_added, migrations.RunPython.noop),
-        migrations.RunPython(populate_created_by_snapshot, migrations.RunPython.noop),
-
-        # Make created_by non-nullable after population
-        migrations.AlterField(
-            model_name='snapshot',
-            name='created_by',
-            field=models.ForeignKey(
-                on_delete=django.db.models.deletion.CASCADE,
-                related_name='snapshot_set',
-                to=settings.AUTH_USER_MODEL,
-                db_index=True,
-            ),
-        ),
-
-        # Update timestamp field constraints
-        migrations.AlterField(
-            model_name='snapshot',
-            name='timestamp',
-            field=models.CharField(max_length=32, unique=True, db_index=True, editable=False),
-        ),
-
-        # Update title field size
-        migrations.AlterField(
-            model_name='snapshot',
-            name='title',
-            field=models.CharField(max_length=512, null=True, blank=True, db_index=True),
-        ),
-
-        # Remove old 'added' and 'updated' fields
-        migrations.RemoveField(model_name='snapshot', name='added'),
-        migrations.RemoveField(model_name='snapshot', name='updated'),
-
-        # Register SnapshotTag through model (table already exists from 0006's ManyToManyField)
-        migrations.SeparateDatabaseAndState(
-            state_operations=[
-                migrations.CreateModel(
-                    name='SnapshotTag',
-                    fields=[
-                        ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
-                        ('snapshot', models.ForeignKey(db_column='snapshot_id', on_delete=django.db.models.deletion.CASCADE, to='core.snapshot')),
-                        ('tag', models.ForeignKey(db_column='tag_id', on_delete=django.db.models.deletion.CASCADE, to='core.tag')),
-                    ],
-                    options={
-                        'db_table': 'core_snapshot_tags',
-                    },
-                ),
-            ],
-            database_operations=[],  # Table already exists from 0006
-        ),
-
-        # === TAG CHANGES ===
-        # Tag keeps AutoField (integer) id for migration compatibility
-
-        # Add tracking fields to Tag
-        migrations.AddField(
-            model_name='tag',
-            name='created_by',
-            field=models.ForeignKey(
-                default=None, null=True, blank=True,
-                on_delete=django.db.models.deletion.CASCADE,
-                related_name='tag_set',
-                to=settings.AUTH_USER_MODEL,
-            ),
-        ),
-        migrations.AddField(
-            model_name='tag',
-            name='created_at',
-            field=models.DateTimeField(default=django.utils.timezone.now, db_index=True, null=True),
-        ),
-        migrations.AddField(
-            model_name='tag',
-            name='modified_at',
-            field=models.DateTimeField(auto_now=True),
-        ),
-
-        # Populate created_by for tags
-        migrations.RunPython(populate_created_by_tag, migrations.RunPython.noop),
-
-        # Update slug field
-        migrations.AlterField(
-            model_name='tag',
-            name='slug',
-            field=models.SlugField(unique=True, max_length=100, editable=False),
-        ),
-
-        # === ARCHIVERESULT CHANGES ===
-
-        # Add health stats fields to ArchiveResult
-        migrations.AddField(
-            model_name='archiveresult',
-            name='num_uses_failed',
-            field=models.PositiveIntegerField(default=0),
-        ),
-        migrations.AddField(
-            model_name='archiveresult',
-            name='num_uses_succeeded',
-            field=models.PositiveIntegerField(default=0),
-        ),
-
-        # Add uuid field for new ID
-        migrations.AddField(
-            model_name='archiveresult',
-            name='uuid',
-            field=models.UUIDField(default=uuid4, null=True, blank=True),
-        ),
-        migrations.AddField(
-            model_name='archiveresult',
-            name='created_by',
-            field=models.ForeignKey(
-                default=None, null=True, blank=True,
-                on_delete=django.db.models.deletion.CASCADE,
-                related_name='archiveresult_set',
-                to=settings.AUTH_USER_MODEL,
-            ),
-        ),
-        migrations.AddField(
-            model_name='archiveresult',
-            name='created_at',
-            field=models.DateTimeField(default=django.utils.timezone.now, db_index=True, null=True),
-        ),
-        migrations.AddField(
-            model_name='archiveresult',
-            name='modified_at',
-            field=models.DateTimeField(auto_now=True),
-        ),
-        migrations.AddField(
-            model_name='archiveresult',
-            name='retry_at',
-            field=models.DateTimeField(default=django.utils.timezone.now, null=True, blank=True, db_index=True),
-        ),
-        migrations.AddField(
-            model_name='archiveresult',
-            name='notes',
-            field=models.TextField(blank=True, default=''),
-        ),
-        migrations.AddField(
-            model_name='archiveresult',
-            name='output_dir',
-            field=models.CharField(max_length=256, default=None, null=True, blank=True),
-        ),
-        migrations.AddField(
-            model_name='archiveresult',
-            name='config',
-            field=models.JSONField(default=dict, blank=False),
-        ),
-
-        # Populate UUIDs and data for archive results
-        migrations.RunPython(generate_uuid_for_archiveresults, migrations.RunPython.noop),
-        migrations.RunPython(copy_created_at_from_start_ts, migrations.RunPython.noop),
-        migrations.RunPython(populate_created_by_archiveresult, migrations.RunPython.noop),
-
-        # Make created_by non-nullable
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='created_by',
-            field=models.ForeignKey(
-                on_delete=django.db.models.deletion.CASCADE,
-                related_name='archiveresult_set',
-                to=settings.AUTH_USER_MODEL,
-                db_index=True,
-            ),
-        ),
-
-        # Update extractor choices
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='extractor',
-            field=models.CharField(
-                choices=[
-                    ('htmltotext', 'htmltotext'), ('git', 'git'), ('singlefile', 'singlefile'),
-                    ('media', 'media'), ('archive_org', 'archive_org'), ('readability', 'readability'),
-                    ('mercury', 'mercury'), ('favicon', 'favicon'), ('pdf', 'pdf'),
-                    ('headers', 'headers'), ('screenshot', 'screenshot'), ('dom', 'dom'),
-                    ('title', 'title'), ('wget', 'wget'),
-                ],
-                max_length=32, db_index=True,
-            ),
-        ),
-
-        # Update status field
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='status',
-            field=models.CharField(
-                choices=[
-                    ('queued', 'Queued'), ('started', 'Started'), ('backoff', 'Waiting to retry'),
-                    ('succeeded', 'Succeeded'), ('failed', 'Failed'), ('skipped', 'Skipped'),
-                ],
-                max_length=16, default='queued', db_index=True,
-            ),
-        ),
-
-        # Update output field size
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='output',
-            field=models.CharField(max_length=1024, default=None, null=True, blank=True),
-        ),
-
-        # Update cmd_version field size
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='cmd_version',
-            field=models.CharField(max_length=128, default=None, null=True, blank=True),
-        ),
-
-        # Make start_ts and end_ts nullable
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='start_ts',
-            field=models.DateTimeField(default=None, null=True, blank=True),
-        ),
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='end_ts',
-            field=models.DateTimeField(default=None, null=True, blank=True),
-        ),
-
-        # Make pwd nullable
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='pwd',
-            field=models.CharField(max_length=256, default=None, null=True, blank=True),
-        ),
-
-        # Make cmd nullable
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='cmd',
-            field=models.JSONField(default=None, null=True, blank=True),
-        ),
-
-        # Update model options
-        migrations.AlterModelOptions(
-            name='archiveresult',
-            options={'verbose_name': 'Archive Result', 'verbose_name_plural': 'Archive Results Log'},
-        ),
-        migrations.AlterModelOptions(
-            name='snapshot',
-            options={'verbose_name': 'Snapshot', 'verbose_name_plural': 'Snapshots'},
-        ),
-        migrations.AlterModelOptions(
-            name='tag',
-            options={'verbose_name': 'Tag', 'verbose_name_plural': 'Tags'},
-        ),
-    ]
--- a/archivebox/core/migrations/0023_upgrade_to_0_9_0.py
+++ b/archivebox/core/migrations/0023_upgrade_to_0_9_0.py
@@ -0,0 +1,190 @@
+# Generated by hand on 2025-12-29
+# Upgrades core app from v0.7.2 (migration 0022) to v0.9.0 using raw SQL
+# Handles both fresh installs and upgrades from v0.7.2
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('core', '0022_auto_20231023_2008'),
+        ('crawls', '0001_initial'),
+        ('machine', '0001_initial'),
+        ('auth', '0012_alter_user_first_name_max_length'),
+    ]
+
+    operations = [
+        migrations.RunSQL(
+            # Forward SQL
+            sql="""
+                -- ============================================================================
+                -- PART 1: Rename extractor → plugin in core_archiveresult
+                -- ============================================================================
+                -- SQLite doesn't support renaming columns directly, so we need to check if the rename is needed
+                -- If 'extractor' exists and 'plugin' doesn't, we do a table rebuild
+
+                CREATE TABLE IF NOT EXISTS core_archiveresult_new (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    uuid TEXT,
+                    created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+                    modified_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+
+                    snapshot_id TEXT NOT NULL,
+                    plugin VARCHAR(32) NOT NULL DEFAULT '',
+                    hook_name VARCHAR(255) NOT NULL DEFAULT '',
+
+                    cmd TEXT,
+                    pwd VARCHAR(256),
+                    cmd_version VARCHAR(128),
+
+                    start_ts DATETIME,
+                    end_ts DATETIME,
+                    status VARCHAR(15) NOT NULL DEFAULT 'queued',
+                    retry_at DATETIME,
+
+                    output_files TEXT NOT NULL DEFAULT '{}',
+                    output_json TEXT,
+                    output_str TEXT NOT NULL DEFAULT '',
+                    output_size INTEGER NOT NULL DEFAULT 0,
+                    output_mimetypes VARCHAR(512) NOT NULL DEFAULT '',
+
+                    config TEXT,
+                    notes TEXT NOT NULL DEFAULT '',
+                    num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
+                    num_uses_failed INTEGER NOT NULL DEFAULT 0,
+
+                    binary_id TEXT,
+                    iface_id TEXT,
+                    process_id TEXT,
+
+                    FOREIGN KEY (snapshot_id) REFERENCES core_snapshot(id) ON DELETE CASCADE,
+                    FOREIGN KEY (binary_id) REFERENCES machine_binary(id) ON DELETE SET NULL,
+                    FOREIGN KEY (iface_id) REFERENCES machine_networkinterface(id) ON DELETE SET NULL,
+                    FOREIGN KEY (process_id) REFERENCES machine_process(id) ON DELETE RESTRICT
+                );
+
+                -- Only copy if old table exists
+                INSERT OR IGNORE INTO core_archiveresult_new (
+                    id, uuid, created_at, modified_at, snapshot_id, plugin,
+                    cmd, pwd, cmd_version, start_ts, end_ts, status, output_str
+                )
+                SELECT
+                    id, uuid,
+                    COALESCE(start_ts, CURRENT_TIMESTAMP) as created_at,
+                    COALESCE(end_ts, start_ts, CURRENT_TIMESTAMP) as modified_at,
+                    snapshot_id,
+                    COALESCE(extractor, '') as plugin,
+                    cmd, pwd, cmd_version,
+                    start_ts, end_ts, status,
+                    COALESCE(output, '') as output_str
+                FROM core_archiveresult
+                WHERE EXISTS (SELECT 1 FROM sqlite_master WHERE type='table' AND name='core_archiveresult');
+
+                DROP TABLE IF EXISTS core_archiveresult;
+                ALTER TABLE core_archiveresult_new RENAME TO core_archiveresult;
+
+                CREATE INDEX IF NOT EXISTS core_archiveresult_snapshot_id_idx ON core_archiveresult(snapshot_id);
+                CREATE INDEX IF NOT EXISTS core_archiveresult_plugin_idx ON core_archiveresult(plugin);
+                CREATE INDEX IF NOT EXISTS core_archiveresult_status_idx ON core_archiveresult(status);
+                CREATE INDEX IF NOT EXISTS core_archiveresult_retry_at_idx ON core_archiveresult(retry_at);
+                CREATE INDEX IF NOT EXISTS core_archiveresult_created_at_idx ON core_archiveresult(created_at);
+                CREATE INDEX IF NOT EXISTS core_archiveresult_uuid_idx ON core_archiveresult(uuid);
+
+                -- ============================================================================
+                -- PART 2: Upgrade core_snapshot table
+                -- ============================================================================
+
+                CREATE TABLE IF NOT EXISTS core_snapshot_new (
+                    id TEXT PRIMARY KEY NOT NULL,
+                    created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+                    modified_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+
+                    url TEXT NOT NULL,
+                    timestamp VARCHAR(32) NOT NULL UNIQUE,
+                    bookmarked_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+
+                    crawl_id TEXT,
+                    parent_snapshot_id TEXT,
+
+                    title VARCHAR(512),
+                    downloaded_at DATETIME,
+                    depth INTEGER NOT NULL DEFAULT 0,
+                    fs_version VARCHAR(10) NOT NULL DEFAULT '0.9.0',
+
+                    config TEXT NOT NULL DEFAULT '{}',
+                    notes TEXT NOT NULL DEFAULT '',
+                    num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
+                    num_uses_failed INTEGER NOT NULL DEFAULT 0,
+
+                    status VARCHAR(15) NOT NULL DEFAULT 'queued',
+                    retry_at DATETIME,
+                    current_step INTEGER NOT NULL DEFAULT 0,
+
+                    FOREIGN KEY (crawl_id) REFERENCES crawls_crawl(id) ON DELETE CASCADE,
+                    FOREIGN KEY (parent_snapshot_id) REFERENCES core_snapshot(id) ON DELETE SET NULL
+                );
+
+                -- Copy data from old table if it exists
+                -- Map v0.7.2 fields: added → bookmarked_at/created_at, updated → modified_at
+                INSERT OR IGNORE INTO core_snapshot_new (
+                    id, url, timestamp, title, bookmarked_at, created_at, modified_at
+                )
+                SELECT
+                    id, url, timestamp, title,
+                    COALESCE(added, CURRENT_TIMESTAMP) as bookmarked_at,
+                    COALESCE(added, CURRENT_TIMESTAMP) as created_at,
+                    COALESCE(updated, added, CURRENT_TIMESTAMP) as modified_at
+                FROM core_snapshot
+                WHERE EXISTS (SELECT 1 FROM sqlite_master WHERE type='table' AND name='core_snapshot');
+
+                DROP TABLE IF EXISTS core_snapshot;
+                ALTER TABLE core_snapshot_new RENAME TO core_snapshot;
+
+                CREATE INDEX IF NOT EXISTS core_snapshot_url_idx ON core_snapshot(url);
+                CREATE INDEX IF NOT EXISTS core_snapshot_timestamp_idx ON core_snapshot(timestamp);
+                CREATE INDEX IF NOT EXISTS core_snapshot_bookmarked_at_idx ON core_snapshot(bookmarked_at);
+                CREATE INDEX IF NOT EXISTS core_snapshot_crawl_id_idx ON core_snapshot(crawl_id);
+                CREATE INDEX IF NOT EXISTS core_snapshot_status_idx ON core_snapshot(status);
+                CREATE INDEX IF NOT EXISTS core_snapshot_retry_at_idx ON core_snapshot(retry_at);
+                CREATE INDEX IF NOT EXISTS core_snapshot_created_at_idx ON core_snapshot(created_at);
+                CREATE UNIQUE INDEX IF NOT EXISTS core_snapshot_url_crawl_unique ON core_snapshot(url, crawl_id);
+
+                -- ============================================================================
+                -- PART 3: Upgrade core_tag table
+                -- ============================================================================
+
+                CREATE TABLE IF NOT EXISTS core_tag_new (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+                    modified_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+
+                    name VARCHAR(100) NOT NULL UNIQUE,
+                    slug VARCHAR(100) NOT NULL UNIQUE,
+
+                    created_by_id INTEGER,
+
+                    FOREIGN KEY (created_by_id) REFERENCES auth_user(id) ON DELETE CASCADE
+                );
+
+                -- Copy data from old table if it exists
+                INSERT OR IGNORE INTO core_tag_new (id, name, slug)
+                SELECT id, name, slug
+                FROM core_tag
+                WHERE EXISTS (SELECT 1 FROM sqlite_master WHERE type='table' AND name='core_tag');
+
+                DROP TABLE IF EXISTS core_tag;
+                ALTER TABLE core_tag_new RENAME TO core_tag;
+
+                CREATE INDEX IF NOT EXISTS core_tag_created_at_idx ON core_tag(created_at);
+                CREATE INDEX IF NOT EXISTS core_tag_created_by_id_idx ON core_tag(created_by_id);
+
+                -- core_snapshot_tags table already exists in v0.7.2, no changes needed
+            """,
+            # Reverse SQL (best effort - data loss may occur)
+            reverse_sql="""
+                -- This is a best-effort rollback - data in new fields will be lost
+                SELECT 'Migration 0023 cannot be fully reversed - new fields will be lost';
+            """
+        ),
+    ]
--- a/archivebox/core/migrations/0024_assign_default_crawl.py
+++ b/archivebox/core/migrations/0024_assign_default_crawl.py
@@ -0,0 +1,118 @@
+# Generated by hand on 2025-12-29
+# Creates a default crawl for v0.7.2 migrated snapshots and makes crawl_id NOT NULL
+
+from django.db import migrations
+import uuid
+
+
+def create_default_crawl_and_assign_snapshots(apps, schema_editor):
+    """
+    Create a default crawl for migrated snapshots and assign all snapshots without a crawl to it.
+    Uses raw SQL because the app registry isn't fully populated during migrations.
+    """
+    from django.db import connection
+    import uuid as uuid_lib
+    from datetime import datetime
+
+    cursor = connection.cursor()
+
+    # Check if there are any snapshots without a crawl
+    cursor.execute("SELECT COUNT(*) FROM core_snapshot WHERE crawl_id IS NULL")
+    snapshots_without_crawl = cursor.fetchone()[0]
+
+    if snapshots_without_crawl == 0:
+        print('✓ Fresh install or all snapshots already have crawls')
+        return
+
+    # Get or create system user (pk=1)
+    cursor.execute("SELECT id FROM auth_user WHERE id = 1")
+    if not cursor.fetchone():
+        cursor.execute("""
+            INSERT INTO auth_user (id, password, is_superuser, username, first_name, last_name, email, is_staff, is_active, date_joined)
+            VALUES (1, '!', 1, 'system', '', '', '', 1, 1, ?)
+        """, [datetime.now().isoformat()])
+
+    # Create a default crawl for migrated snapshots
+    crawl_id = str(uuid_lib.uuid4())
+    now = datetime.now().isoformat()
+
+    cursor.execute("""
+        INSERT INTO crawls_crawl (
+            id, created_at, modified_at, num_uses_succeeded, num_uses_failed,
+            urls, max_depth, tags_str, label, notes, output_dir,
+            status, retry_at, created_by_id, schedule_id, config, persona_id
+        ) VALUES (?, ?, ?, 0, 0, '', 0, '', 'Migrated from v0.7.2',
+                  'Auto-created crawl for snapshots migrated from v0.7.2', '',
+                  'sealed', ?, 1, NULL, '{}', NULL)
+    """, [crawl_id, now, now, now])
+
+    # Assign all snapshots without a crawl to the default crawl
+    cursor.execute("UPDATE core_snapshot SET crawl_id = ? WHERE crawl_id IS NULL", [crawl_id])
+
+    print(f'✓ Assigned {snapshots_without_crawl} snapshots to default crawl {crawl_id}')
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('core', '0023_upgrade_to_0_9_0'),
+        ('crawls', '0001_initial'),
+        ('auth', '0012_alter_user_first_name_max_length'),
+    ]
+
+    operations = [
+        migrations.RunPython(
+            create_default_crawl_and_assign_snapshots,
+            reverse_code=migrations.RunPython.noop,
+        ),
+        # Now make crawl_id NOT NULL
+        migrations.RunSQL(
+            sql="""
+                -- Rebuild snapshot table with NOT NULL crawl_id
+                CREATE TABLE core_snapshot_final (
+                    id TEXT PRIMARY KEY NOT NULL,
+                    created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+                    modified_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+
+                    url TEXT NOT NULL,
+                    timestamp VARCHAR(32) NOT NULL UNIQUE,
+                    bookmarked_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+
+                    crawl_id TEXT NOT NULL,
+                    parent_snapshot_id TEXT,
+
+                    title VARCHAR(512),
+                    downloaded_at DATETIME,
+                    depth INTEGER NOT NULL DEFAULT 0,
+                    fs_version VARCHAR(10) NOT NULL DEFAULT '0.9.0',
+
+                    config TEXT NOT NULL DEFAULT '{}',
+                    notes TEXT NOT NULL DEFAULT '',
+                    num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
+                    num_uses_failed INTEGER NOT NULL DEFAULT 0,
+
+                    status VARCHAR(15) NOT NULL DEFAULT 'queued',
+                    retry_at DATETIME,
+                    current_step INTEGER NOT NULL DEFAULT 0,
+
+                    FOREIGN KEY (crawl_id) REFERENCES crawls_crawl(id) ON DELETE CASCADE,
+                    FOREIGN KEY (parent_snapshot_id) REFERENCES core_snapshot(id) ON DELETE SET NULL
+                );
+
+                INSERT INTO core_snapshot_final SELECT * FROM core_snapshot;
+
+                DROP TABLE core_snapshot;
+                ALTER TABLE core_snapshot_final RENAME TO core_snapshot;
+
+                CREATE INDEX core_snapshot_url_idx ON core_snapshot(url);
+                CREATE INDEX core_snapshot_timestamp_idx ON core_snapshot(timestamp);
+                CREATE INDEX core_snapshot_bookmarked_at_idx ON core_snapshot(bookmarked_at);
+                CREATE INDEX core_snapshot_crawl_id_idx ON core_snapshot(crawl_id);
+                CREATE INDEX core_snapshot_status_idx ON core_snapshot(status);
+                CREATE INDEX core_snapshot_retry_at_idx ON core_snapshot(retry_at);
+                CREATE INDEX core_snapshot_created_at_idx ON core_snapshot(created_at);
+                CREATE UNIQUE INDEX core_snapshot_url_crawl_unique ON core_snapshot(url, crawl_id);
+            """,
+            reverse_sql=migrations.RunSQL.noop,
+        ),
+    ]
--- a/archivebox/core/migrations/0024_b_clear_config_fields.py
+++ b/archivebox/core/migrations/0024_b_clear_config_fields.py
@@ -1,57 +0,0 @@
-# Data migration to clear config fields that may contain invalid JSON
-# This runs before 0025 to prevent CHECK constraint failures
-
-from django.db import migrations
-
-
-def clear_config_fields(apps, schema_editor):
-    """Clear all config fields in related tables to avoid JSON validation errors."""
-    db_alias = schema_editor.connection.alias
-
-    # Disable foreign key checks temporarily to allow updates
-    with schema_editor.connection.cursor() as cursor:
-        cursor.execute("PRAGMA foreign_keys=OFF")
-
-    tables_to_clear = [
-        ('crawls_seed', 'config'),
-        ('crawls_crawl', 'config'),
-        ('crawls_crawlschedule', 'config') if 'crawlschedule' in dir() else None,
-        ('machine_machine', 'stats'),
-        ('machine_machine', 'config'),
-    ]
-
-    for table_info in tables_to_clear:
-        if table_info is None:
-            continue
-        table_name, field_name = table_info
-
-        try:
-            with schema_editor.connection.cursor() as cursor:
-                # Check if table exists first
-                cursor.execute(f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}'")
-                if not cursor.fetchone():
-                    print(f"  Skipping {table_name}.{field_name}: table does not exist")
-                    continue
-
-                # Set all to empty JSON object
-                cursor.execute(f"UPDATE {table_name} SET {field_name} = '{{}}' WHERE {field_name} IS NOT NULL")
-                print(f"  Cleared {field_name} in {table_name}: {cursor.rowcount} rows")
-        except Exception as e:
-            print(f"  Skipping {table_name}.{field_name}: {e}")
-
-    # Re-enable foreign key checks
-    with schema_editor.connection.cursor() as cursor:
-        cursor.execute("PRAGMA foreign_keys=ON")
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('core', '0023_new_schema'),
-        ('crawls', '0001_initial'),
-        ('machine', '0001_squashed'),
-    ]
-
-    operations = [
-        migrations.RunPython(clear_config_fields, reverse_code=migrations.RunPython.noop),
-    ]
--- a/archivebox/core/migrations/0024_c_disable_fk_checks.py
+++ b/archivebox/core/migrations/0024_c_disable_fk_checks.py
@@ -1,28 +0,0 @@
-# Disable foreign key checks before 0025 to prevent CHECK constraint validation errors
-
-from django.db import migrations
-
-
-def disable_fk_checks(apps, schema_editor):
-    """Temporarily disable foreign key checks."""
-    with schema_editor.connection.cursor() as cursor:
-        cursor.execute("PRAGMA foreign_keys=OFF")
-        print("  Disabled foreign key checks")
-
-
-def enable_fk_checks(apps, schema_editor):
-    """Re-enable foreign key checks."""
-    with schema_editor.connection.cursor() as cursor:
-        cursor.execute("PRAGMA foreign_keys=ON")
-        print("  Enabled foreign key checks")
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('core', '0024_b_clear_config_fields'),
-    ]
-
-    operations = [
-        migrations.RunPython(disable_fk_checks, reverse_code=enable_fk_checks),
-    ]
--- a/archivebox/core/migrations/0024_d_fix_crawls_config.py
+++ b/archivebox/core/migrations/0024_d_fix_crawls_config.py
@@ -1,93 +0,0 @@
-# Fix crawls_crawl config field to avoid CHECK constraint errors during table rebuilds
-
-from django.db import migrations
-
-
-def fix_crawls_config(apps, schema_editor):
-    """
-    Rebuild crawls_crawl table to fix CHECK constraints and make seed_id nullable.
-    Only runs for UPGRADES from 0.8.x (when crawls.0001_initial didn't exist yet).
-    For fresh installs, crawls.0001_initial creates the correct schema.
-    """
-    with schema_editor.connection.cursor() as cursor:
-        # Check if this is an upgrade from old 0.8.x or a fresh install
-        # In fresh installs, crawls.0001_initial was applied, creating seed FK
-        # In upgrades, the table was created by old migrations before 0001_initial existed
-        cursor.execute("""
-            SELECT COUNT(*) FROM django_migrations
-            WHERE app='crawls' AND name='0001_initial'
-        """)
-        has_crawls_0001 = cursor.fetchone()[0] > 0
-
-        if has_crawls_0001:
-            # Fresh install - crawls.0001_initial already created the correct schema
-            # Just clear config to avoid CHECK constraint issues
-            print("  Fresh install detected - clearing config field only")
-            try:
-                cursor.execute('UPDATE "crawls_crawl" SET "config" = NULL')
-            except Exception as e:
-                print(f"  Skipping config clear: {e}")
-            return
-
-        # Upgrade from 0.8.x - rebuild table to make seed_id nullable and remove CHECK constraint
-        print("  Upgrading from 0.8.x - rebuilding crawls_crawl table")
-        cursor.execute("PRAGMA foreign_keys=OFF")
-
-        # Backup
-        cursor.execute("CREATE TABLE crawls_crawl_backup AS SELECT * FROM crawls_crawl")
-
-        # Recreate without config CHECK constraint, with nullable seed_id
-        cursor.execute("DROP TABLE crawls_crawl")
-        cursor.execute("""
-            CREATE TABLE "crawls_crawl" (
-                "num_uses_failed" integer unsigned NOT NULL CHECK ("num_uses_failed" >= 0),
-                "num_uses_succeeded" integer unsigned NOT NULL CHECK ("num_uses_succeeded" >= 0),
-                "id" char(32) NOT NULL PRIMARY KEY,
-                "created_at" datetime NOT NULL,
-                "modified_at" datetime NOT NULL,
-                "urls" text NOT NULL,
-                "config" text,
-                "max_depth" smallint unsigned NOT NULL CHECK ("max_depth" >= 0),
-                "tags_str" varchar(1024) NOT NULL,
-                "persona_id" char(32) NULL,
-                "label" varchar(64) NOT NULL,
-                "notes" text NOT NULL,
-                "output_dir" varchar(512) NOT NULL,
-                "status" varchar(15) NOT NULL,
-                "retry_at" datetime NULL,
-                "created_by_id" integer NOT NULL REFERENCES "auth_user" ("id") DEFERRABLE INITIALLY DEFERRED,
-                "seed_id" char(32) NULL DEFAULT NULL,
-                "schedule_id" char(32) NULL REFERENCES "crawls_crawlschedule" ("id") DEFERRABLE INITIALLY DEFERRED
-            )
-        """)
-
-        # Restore data
-        cursor.execute("""
-            INSERT INTO "crawls_crawl" (
-                "num_uses_failed", "num_uses_succeeded", "id", "created_at", "modified_at",
-                "urls", "config", "max_depth", "tags_str", "persona_id", "label", "notes",
-                "output_dir", "status", "retry_at", "created_by_id", "seed_id", "schedule_id"
-            )
-            SELECT
-                "num_uses_failed", "num_uses_succeeded", "id", "created_at", "modified_at",
-                "urls", "config", "max_depth", "tags_str", "persona_id", "label", "notes",
-                "output_dir", "status", "retry_at", "created_by_id", "seed_id", "schedule_id"
-            FROM crawls_crawl_backup
-        """)
-
-        cursor.execute("DROP TABLE crawls_crawl_backup")
-
-        # NULL out config to avoid any invalid JSON
-        cursor.execute('UPDATE "crawls_crawl" SET "config" = NULL')
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('core', '0024_c_disable_fk_checks'),
-        ('crawls', '0001_initial'),
-    ]
-
-    operations = [
-        migrations.RunPython(fix_crawls_config, reverse_code=migrations.RunPython.noop),
-    ]
--- a/archivebox/core/migrations/0024_snapshot_crawl.py
+++ b/archivebox/core/migrations/0024_snapshot_crawl.py
@@ -1,38 +0,0 @@
-# Generated by Django 5.0.6 on 2024-12-25
-# Adds crawl FK and iface FK after crawls and machine apps are created
-
-from django.db import migrations, models
-import django.db.models.deletion
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('core', '0024_d_fix_crawls_config'),
-    ]
-
-    operations = [
-        # Add crawl FK to Snapshot
-        migrations.AddField(
-            model_name='snapshot',
-            name='crawl',
-            field=models.ForeignKey(
-                default=None, null=True, blank=True,
-                on_delete=django.db.models.deletion.CASCADE,
-                related_name='snapshot_set',
-                to='crawls.crawl',
-                db_index=True,
-            ),
-        ),
-
-        # Add network interface FK to ArchiveResult
-        migrations.AddField(
-            model_name='archiveresult',
-            name='iface',
-            field=models.ForeignKey(
-                null=True, blank=True,
-                on_delete=django.db.models.deletion.SET_NULL,
-                to='machine.networkinterface',
-            ),
-        ),
-    ]
--- a/archivebox/core/migrations/0025_allow_duplicate_urls_per_crawl.py
+++ b/archivebox/core/migrations/0025_allow_duplicate_urls_per_crawl.py
@@ -1,22 +0,0 @@
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('core', '0024_snapshot_crawl'),
-    ]
-
-    operations = [
-        # Remove the unique constraint on url
-        migrations.AlterField(
-            model_name='snapshot',
-            name='url',
-            field=models.URLField(db_index=True, unique=False),
-        ),
-        # Add unique constraint on (url, crawl) combination
-        migrations.AddConstraint(
-            model_name='snapshot',
-            constraint=models.UniqueConstraint(fields=['url', 'crawl'], name='unique_url_per_crawl'),
-        ),
-    ]
--- a/archivebox/core/migrations/0026_remove_archiveresult_output_dir_and_more.py
+++ b/archivebox/core/migrations/0026_remove_archiveresult_output_dir_and_more.py
@@ -1,145 +0,0 @@
-# Generated by Django 6.0 on 2025-12-25 09:34
-
-import archivebox.base_models.models
-import django.db.models.deletion
-import django.utils.timezone
-from archivebox import uuid_compat
-from django.conf import settings
-from django.db import migrations, models
-
-
-def populate_archiveresult_uuids(apps, schema_editor):
-    """Generate unique UUIDs for ArchiveResults that don't have one."""
-    # Check if uuid column exists before trying to populate it
-    with schema_editor.connection.cursor() as cursor:
-        cursor.execute("PRAGMA table_info(core_archiveresult)")
-        columns = [row[1] for row in cursor.fetchall()]
-        if 'uuid' not in columns:
-            return  # uuid column doesn't exist, skip this data migration
-
-    ArchiveResult = apps.get_model('core', 'ArchiveResult')
-    for result in ArchiveResult.objects.filter(uuid__isnull=True):
-        result.uuid = uuid_compat.uuid7()
-        result.save(update_fields=['uuid'])
-
-
-def reverse_populate_uuids(apps, schema_editor):
-    """Reverse migration - do nothing, UUIDs can stay."""
-    pass
-
-
-def remove_output_dir_if_exists(apps, schema_editor):
-    """Remove output_dir columns if they exist."""
-    with schema_editor.connection.cursor() as cursor:
-        # Check and remove from core_archiveresult
-        cursor.execute("PRAGMA table_info(core_archiveresult)")
-        columns = [row[1] for row in cursor.fetchall()]
-        if 'output_dir' in columns:
-            cursor.execute("ALTER TABLE core_archiveresult DROP COLUMN output_dir")
-
-        # Check and remove from core_snapshot
-        cursor.execute("PRAGMA table_info(core_snapshot)")
-        columns = [row[1] for row in cursor.fetchall()]
-        if 'output_dir' in columns:
-            cursor.execute("ALTER TABLE core_snapshot DROP COLUMN output_dir")
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('core', '0025_allow_duplicate_urls_per_crawl'),
-        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
-    ]
-
-    operations = [
-        # FIRST: Populate UUIDs for existing NULL rows BEFORE any schema changes
-        migrations.RunPython(populate_archiveresult_uuids, reverse_populate_uuids),
-
-        # Remove output_dir fields (not needed, computed from snapshot)
-        migrations.RunPython(remove_output_dir_if_exists, reverse_code=migrations.RunPython.noop),
-
-        # Update Django's migration state to match 0.9.x schema
-        # Database already has correct types from 0.8.x, just update state
-        migrations.SeparateDatabaseAndState(
-            state_operations=[
-                # Archiveresult field alterations
-                migrations.AlterField(
-                    model_name='archiveresult',
-                    name='created_at',
-                    field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
-                ),
-                migrations.AlterField(
-                    model_name='archiveresult',
-                    name='created_by',
-                    field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, related_name='archiveresult_set', to=settings.AUTH_USER_MODEL),
-                ),
-                migrations.AlterField(
-                    model_name='archiveresult',
-                    name='extractor',
-                    field=models.CharField(db_index=True, max_length=32),
-                ),
-                # Convert id from AutoField to UUIDField (database already has UUID CHAR(32))
-                migrations.AlterField(
-                    model_name='archiveresult',
-                    name='id',
-                    field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
-                ),
-                migrations.AlterField(
-                    model_name='archiveresult',
-                    name='status',
-                    field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('backoff', 'Waiting to retry'), ('succeeded', 'Succeeded'), ('failed', 'Failed'), ('skipped', 'Skipped')], db_index=True, default='queued', max_length=15),
-                ),
-
-                # Snapshot field alterations
-                migrations.AlterField(
-                    model_name='snapshot',
-                    name='bookmarked_at',
-                    field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
-                ),
-                migrations.AlterField(
-                    model_name='snapshot',
-                    name='created_at',
-                    field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
-                ),
-                migrations.AlterField(
-                    model_name='snapshot',
-                    name='created_by',
-                    field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to=settings.AUTH_USER_MODEL),
-                ),
-                migrations.AlterField(
-                    model_name='snapshot',
-                    name='downloaded_at',
-                    field=models.DateTimeField(blank=True, db_index=True, default=None, editable=False, null=True),
-                ),
-                migrations.AlterField(
-                    model_name='snapshot',
-                    name='id',
-                    field=models.UUIDField(default=uuid_compat.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
-                ),
-            ],
-            database_operations=[
-                # No actual database changes needed - schema is already correct from 0.8.x
-            ],
-        ),
-
-        # SnapshotTag and Tag alterations - state only, DB already correct
-        migrations.SeparateDatabaseAndState(
-            state_operations=[
-                migrations.AlterField(
-                    model_name='snapshottag',
-                    name='id',
-                    field=models.AutoField(primary_key=True, serialize=False),
-                ),
-                migrations.AlterField(
-                    model_name='tag',
-                    name='created_by',
-                    field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, related_name='tag_set', to=settings.AUTH_USER_MODEL),
-                ),
-                migrations.AlterUniqueTogether(
-                    name='snapshottag',
-                    unique_together={('snapshot', 'tag')},
-                ),
-            ],
-            database_operations=[],
-        ),
-    ]
--- a/archivebox/core/migrations/0027_alter_archiveresult_created_by_and_more.py
+++ b/archivebox/core/migrations/0027_alter_archiveresult_created_by_and_more.py
@@ -1,29 +0,0 @@
-# Generated by Django 6.0 on 2025-12-27 01:40
-
-import archivebox.base_models.models
-import django.db.models.deletion
-from django.conf import settings
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('core', '0026_remove_archiveresult_output_dir_and_more'),
-        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
-    ]
-
-    operations = [
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='created_by',
-            field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, related_name='archiveresult_set', to=settings.AUTH_USER_MODEL),
-        ),
-        migrations.AlterField(
-            model_name='snapshot',
-            name='created_by',
-            field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to=settings.AUTH_USER_MODEL),
-        ),
-        # Note: Cannot alter M2M tags field via migration (Django limitation)
-        # The related_name change is handled by the model definition itself
-    ]
--- a/archivebox/core/migrations/0028_snapshot_fs_version.py
+++ b/archivebox/core/migrations/0028_snapshot_fs_version.py
@@ -1,47 +0,0 @@
-# Generated by Claude Code on 2025-12-27
-
-from django.db import migrations, models
-
-
-def set_existing_snapshots_to_old_version(apps, schema_editor):
-    """Set existing snapshots to 0.8.0 since they use the old filesystem layout."""
-    Snapshot = apps.get_model('core', 'Snapshot')
-    # Set all existing snapshots to 0.8.0 (the previous version's layout)
-    Snapshot.objects.all().update(fs_version='0.8.0')
-
-
-def reverse_migration(apps, schema_editor):
-    """Reverse migration - do nothing."""
-    pass
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('core', '0027_alter_archiveresult_created_by_and_more'),
-    ]
-
-    operations = [
-        # Add field with temporary default to allow NULL initially
-        migrations.AddField(
-            model_name='snapshot',
-            name='fs_version',
-            field=models.CharField(
-                max_length=10,
-                default='0.8.0',  # Temporary default for adding the column
-                help_text='Filesystem version of this snapshot (e.g., "0.7.0", "0.8.0", "0.9.0"). Used to trigger lazy migration on save().'
-            ),
-        ),
-        # Set existing snapshots to old version
-        migrations.RunPython(set_existing_snapshots_to_old_version, reverse_migration),
-        # Update default to current version for new snapshots going forward
-        migrations.AlterField(
-            model_name='snapshot',
-            name='fs_version',
-            field=models.CharField(
-                max_length=10,
-                default='0.9.0',  # Hardcoded for this migration - new migration when version bumps
-                help_text='Filesystem version of this snapshot (e.g., "0.7.0", "0.8.0", "0.9.0"). Used to trigger lazy migration on save().'
-            ),
-        ),
-    ]
--- a/archivebox/core/migrations/0029_archiveresult_hook_fields.py
+++ b/archivebox/core/migrations/0029_archiveresult_hook_fields.py
@@ -1,91 +0,0 @@
-# Generated by Django for hook architecture support
-# Phase 1: Add new ArchiveResult fields for hook output
-
-from django.db import migrations, models
-import django.db.models.deletion
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('core', '0028_snapshot_fs_version'),
-        ('machine', '0002_rename_custom_cmds_to_overrides'),
-    ]
-
-    operations = [
-        # Add new output fields using SeparateDatabaseAndState to avoid table rebuilds
-        migrations.SeparateDatabaseAndState(
-            state_operations=[
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='output_str',
-                    field=models.TextField(
-                        blank=True,
-                        default='',
-                        help_text='Human-readable output summary (e.g., "Downloaded 5 files")'
-                    ),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='output_json',
-                    field=models.JSONField(
-                        null=True,
-                        blank=True,
-                        default=None,
-                        help_text='Structured metadata (headers, redirects, etc.) - should NOT duplicate ArchiveResult fields'
-                    ),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='output_files',
-                    field=models.JSONField(
-                        default=dict,
-                        help_text='Dict of {relative_path: {metadata}} - values are empty dicts for now, extensible for future metadata'
-                    ),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='output_size',
-                    field=models.BigIntegerField(
-                        default=0,
-                        help_text='Total recursive size in bytes of all output files'
-                    ),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='output_mimetypes',
-                    field=models.CharField(
-                        max_length=512,
-                        blank=True,
-                        default='',
-                        help_text='CSV of mimetypes sorted by size descending'
-                    ),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='binary',
-                    field=models.ForeignKey(
-                        'machine.Binary',
-                        on_delete=models.SET_NULL,
-                        null=True,
-                        blank=True,
-                        related_name='archiveresults',
-                        help_text='Primary binary used by this hook (optional)'
-                    ),
-                ),
-            ],
-            database_operations=[
-                migrations.RunSQL(
-                    sql="""
-                        ALTER TABLE core_archiveresult ADD COLUMN output_str TEXT DEFAULT '';
-                        ALTER TABLE core_archiveresult ADD COLUMN output_json TEXT;
-                        ALTER TABLE core_archiveresult ADD COLUMN output_files TEXT DEFAULT '{}';
-                        ALTER TABLE core_archiveresult ADD COLUMN output_size BIGINT DEFAULT 0;
-                        ALTER TABLE core_archiveresult ADD COLUMN output_mimetypes VARCHAR(512) DEFAULT '';
-                        ALTER TABLE core_archiveresult ADD COLUMN binary_id CHAR(32) REFERENCES machine_binary(id);
-                    """,
-                    reverse_sql=migrations.RunSQL.noop,
-                ),
-            ],
-        ),
-    ]
--- a/archivebox/core/migrations/0030_migrate_output_field.py
+++ b/archivebox/core/migrations/0030_migrate_output_field.py
@@ -1,83 +0,0 @@
-# Generated by Django for hook architecture support
-# Phase 1: Migrate existing 'output' field to new split fields
-
-from django.db import migrations
-import json
-
-
-def migrate_output_field(apps, schema_editor):
-    """
-    Migrate existing 'output' field to new split fields.
-
-    Logic:
-    - If output contains JSON {...}, move to output_json
-    - Otherwise, move to output_str
-
-    Use raw SQL to avoid CHECK constraint issues during migration.
-    """
-    # Use raw SQL to migrate data without triggering CHECK constraints
-    with schema_editor.connection.cursor() as cursor:
-        # Get all archive results
-        cursor.execute("""
-            SELECT id, output FROM core_archiveresult
-        """)
-
-        for row in cursor.fetchall():
-            ar_id, old_output = row
-            old_output = old_output or ''
-
-            # Case 1: JSON output
-            if old_output.strip().startswith('{'):
-                try:
-                    # Validate it's actual JSON
-                    parsed = json.loads(old_output)
-                    # Update with JSON - cast to JSON to satisfy CHECK constraint
-                    json_str = json.dumps(parsed)
-                    cursor.execute("""
-                        UPDATE core_archiveresult
-                        SET output_str = '', output_json = json(?)
-                        WHERE id = ?
-                    """, (json_str, ar_id))
-                except json.JSONDecodeError:
-                    # Not valid JSON, treat as string
-                    cursor.execute("""
-                        UPDATE core_archiveresult
-                        SET output_str = ?, output_json = NULL
-                        WHERE id = ?
-                    """, (old_output, ar_id))
-            # Case 2: File path or plain string
-            else:
-                cursor.execute("""
-                    UPDATE core_archiveresult
-                    SET output_str = ?, output_json = NULL
-                    WHERE id = ?
-                """, (old_output, ar_id))
-
-
-def reverse_migrate(apps, schema_editor):
-    """Reverse migration - copy output_str back to output."""
-    ArchiveResult = apps.get_model('core', 'ArchiveResult')
-
-    for ar in ArchiveResult.objects.all().iterator():
-        if ar.output_json:
-            ar.output = json.dumps(ar.output_json)
-        else:
-            ar.output = ar.output_str or ''
-        ar.save(update_fields=['output'])
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('core', '0029_archiveresult_hook_fields'),
-    ]
-
-    operations = [
-        migrations.RunPython(migrate_output_field, reverse_migrate),
-
-        # Now safe to remove old 'output' field
-        migrations.RemoveField(
-            model_name='archiveresult',
-            name='output',
-        ),
-    ]
--- a/archivebox/core/migrations/0031_snapshot_parent_snapshot.py
+++ b/archivebox/core/migrations/0031_snapshot_parent_snapshot.py
@@ -1,27 +0,0 @@
-# Generated by Django 6.0 on 2025-12-27
-
-import django.db.models.deletion
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('core', '0030_migrate_output_field'),
-    ]
-
-    operations = [
-        migrations.AddField(
-            model_name='snapshot',
-            name='parent_snapshot',
-            field=models.ForeignKey(
-                blank=True,
-                db_index=True,
-                help_text='Parent snapshot that discovered this URL (for recursive crawling)',
-                null=True,
-                on_delete=django.db.models.deletion.SET_NULL,
-                related_name='child_snapshots',
-                to='core.snapshot'
-            ),
-        ),
-    ]
--- a/archivebox/core/migrations/0032_alter_archiveresult_binary_and_more.py
+++ b/archivebox/core/migrations/0032_alter_archiveresult_binary_and_more.py
@@ -1,77 +0,0 @@
-# Generated by Django 6.0 on 2025-12-28 05:12
-
-import django.db.models.deletion
-from archivebox import uuid_compat
-from django.conf import settings
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('core', '0031_snapshot_parent_snapshot'),
-        ('crawls', '0004_alter_crawl_output_dir'),
-        ('machine', '0004_drop_dependency_table'),  # Changed from 0003 - wait until Dependency is dropped
-        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
-    ]
-
-    operations = [
-        # Update Django's state only - database already has correct schema from 0029
-        migrations.SeparateDatabaseAndState(
-            state_operations=[
-                migrations.AlterField(
-                    model_name='archiveresult',
-                    name='binary',
-                    field=models.ForeignKey(blank=True, help_text='Primary binary used by this hook', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='archiveresults', to='machine.binary'),
-                ),
-                migrations.AlterField(
-                    model_name='archiveresult',
-                    name='output_files',
-                    field=models.JSONField(default=dict, help_text='Dict of {relative_path: {metadata}}'),
-                ),
-                migrations.AlterField(
-                    model_name='archiveresult',
-                    name='output_json',
-                    field=models.JSONField(blank=True, default=None, help_text='Structured metadata (headers, redirects, etc.)', null=True),
-                ),
-                migrations.AlterField(
-                    model_name='archiveresult',
-                    name='output_mimetypes',
-                    field=models.CharField(blank=True, default='', help_text='CSV of mimetypes sorted by size', max_length=512),
-                ),
-                migrations.AlterField(
-                    model_name='archiveresult',
-                    name='output_size',
-                    field=models.BigIntegerField(default=0, help_text='Total bytes of all output files'),
-                ),
-                migrations.AlterField(
-                    model_name='archiveresult',
-                    name='output_str',
-                    field=models.TextField(blank=True, default='', help_text='Human-readable output summary'),
-                ),
-                migrations.AlterField(
-                    model_name='archiveresult',
-                    name='uuid',
-                    field=models.UUIDField(blank=True, db_index=True, default=uuid_compat.uuid7, null=True),
-                ),
-            ],
-            database_operations=[
-                # No database changes needed - columns already exist with correct types
-            ],
-        ),
-        # Add unique constraint without table rebuild
-        migrations.SeparateDatabaseAndState(
-            state_operations=[
-                migrations.AddConstraint(
-                    model_name='snapshot',
-                    constraint=models.UniqueConstraint(fields=('timestamp',), name='unique_timestamp'),
-                ),
-            ],
-            database_operations=[
-                migrations.RunSQL(
-                    sql="CREATE UNIQUE INDEX IF NOT EXISTS unique_timestamp ON core_snapshot (timestamp);",
-                    reverse_sql="DROP INDEX IF EXISTS unique_timestamp;",
-                ),
-            ],
-        ),
-    ]
--- a/archivebox/core/migrations/0033_rename_extractor_add_hook_name.py
+++ b/archivebox/core/migrations/0033_rename_extractor_add_hook_name.py
@@ -1,44 +0,0 @@
-# Generated by Django 6.0 on 2025-12-28
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('core', '0032_alter_archiveresult_binary_and_more'),
-    ]
-
-    operations = [
-        # Use SeparateDatabaseAndState to avoid table rebuilds that would re-add CHECK constraints
-        migrations.SeparateDatabaseAndState(
-            state_operations=[
-                migrations.RenameField(
-                    model_name='archiveresult',
-                    old_name='extractor',
-                    new_name='plugin',
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='hook_name',
-                    field=models.CharField(
-                        blank=True,
-                        default='',
-                        max_length=255,
-                        db_index=True,
-                        help_text='Full filename of the hook that executed (e.g., on_Snapshot__50_wget.py)'
-                    ),
-                ),
-            ],
-            database_operations=[
-                migrations.RunSQL(
-                    sql="""
-                        ALTER TABLE core_archiveresult RENAME COLUMN extractor TO plugin;
-                        ALTER TABLE core_archiveresult ADD COLUMN hook_name VARCHAR(255) DEFAULT '' NOT NULL;
-                        CREATE INDEX IF NOT EXISTS core_archiveresult_hook_name_idx ON core_archiveresult (hook_name);
-                    """,
-                    reverse_sql=migrations.RunSQL.noop,
-                ),
-            ],
-        ),
-    ]
--- a/archivebox/core/migrations/0034_snapshot_current_step.py
+++ b/archivebox/core/migrations/0034_snapshot_current_step.py
@@ -1,37 +0,0 @@
-# Generated by Django 6.0 on 2025-12-28
-# Add Snapshot.current_step field for hook step-based execution
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('core', '0033_rename_extractor_add_hook_name'),
-    ]
-
-    operations = [
-        # Use SeparateDatabaseAndState to avoid table rebuild that would fail on config NOT NULL constraint
-        migrations.SeparateDatabaseAndState(
-            state_operations=[
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='current_step',
-                    field=models.PositiveSmallIntegerField(
-                        default=0,
-                        db_index=True,
-                        help_text='Current hook step being executed (0-9). Used for sequential hook execution.'
-                    ),
-                ),
-            ],
-            database_operations=[
-                migrations.RunSQL(
-                    sql="""
-                        ALTER TABLE core_snapshot ADD COLUMN current_step SMALLINT UNSIGNED DEFAULT 0 NOT NULL;
-                        CREATE INDEX IF NOT EXISTS core_snapshot_current_step_idx ON core_snapshot (current_step);
-                    """,
-                    reverse_sql=migrations.RunSQL.noop,
-                ),
-            ],
-        ),
-    ]
--- a/archivebox/core/migrations/0035_snapshot_crawl_non_nullable_remove_created_by.py
+++ b/archivebox/core/migrations/0035_snapshot_crawl_non_nullable_remove_created_by.py
@@ -1,87 +0,0 @@
-# Generated migration
-
-from django.conf import settings
-from django.db import migrations, models
-import django.db.models.deletion
-
-
-def create_catchall_crawls_and_assign_snapshots(apps, schema_editor):
-    """
-    Create one catchall Crawl per user for all snapshots without a crawl.
-    Assign those snapshots to their user's catchall crawl.
-    """
-    Snapshot = apps.get_model('core', 'Snapshot')
-    Crawl = apps.get_model('crawls', 'Crawl')
-    User = apps.get_model(settings.AUTH_USER_MODEL)
-
-    # Get all snapshots without a crawl
-    snapshots_without_crawl = Snapshot.objects.filter(crawl__isnull=True)
-
-    if not snapshots_without_crawl.exists():
-        return
-
-    # Group by created_by_id
-    snapshots_by_user = {}
-    for snapshot in snapshots_without_crawl:
-        user_id = snapshot.created_by_id
-        if user_id not in snapshots_by_user:
-            snapshots_by_user[user_id] = []
-        snapshots_by_user[user_id].append(snapshot)
-
-    # Create one catchall crawl per user and assign snapshots
-    for user_id, snapshots in snapshots_by_user.items():
-        try:
-            user = User.objects.get(pk=user_id)
-            username = user.username
-        except User.DoesNotExist:
-            username = 'unknown'
-
-        # Create catchall crawl for this user
-        crawl = Crawl.objects.create(
-            urls=f'# Catchall crawl for {len(snapshots)} snapshots without a crawl',
-            max_depth=0,
-            label=f'[migration] catchall for user {username}',
-            created_by_id=user_id,
-        )
-
-        # Assign all snapshots to this crawl
-        for snapshot in snapshots:
-            snapshot.crawl = crawl
-            snapshot.save(update_fields=['crawl'])
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('core', '0034_snapshot_current_step'),
-        ('crawls', '0005_drop_seed_id_column'),
-    ]
-
-    operations = [
-        # Step 1: Assign all snapshots without a crawl to catchall crawls
-        migrations.RunPython(
-            create_catchall_crawls_and_assign_snapshots,
-            reverse_code=migrations.RunPython.noop,
-        ),
-
-        # Step 2 & 3: Update Django's state only - leave created_by_id column in database (unused but harmless)
-        migrations.SeparateDatabaseAndState(
-            state_operations=[
-                # Make crawl non-nullable
-                migrations.AlterField(
-                    model_name='snapshot',
-                    name='crawl',
-                    field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to='crawls.crawl'),
-                ),
-                # Remove created_by field from Django's state
-                migrations.RemoveField(
-                    model_name='snapshot',
-                    name='created_by',
-                ),
-            ],
-            database_operations=[
-                # No database changes - crawl_id already exists and NOT NULL constraint will be enforced by model
-                # created_by_id column remains in database but is unused
-            ],
-        ),
-    ]
--- a/archivebox/core/migrations/0036_remove_archiveresult_created_by.py
+++ b/archivebox/core/migrations/0036_remove_archiveresult_created_by.py
@@ -1,27 +0,0 @@
-# Generated migration
-
-from django.db import migrations
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('core', '0035_snapshot_crawl_non_nullable_remove_created_by'),
-    ]
-
-    operations = [
-        # Remove created_by field from ArchiveResult (state only)
-        # No data migration needed - created_by can be accessed via snapshot.crawl.created_by
-        # Leave created_by_id column in database (unused but harmless, avoids table rebuild)
-        migrations.SeparateDatabaseAndState(
-            state_operations=[
-                migrations.RemoveField(
-                    model_name='archiveresult',
-                    name='created_by',
-                ),
-            ],
-            database_operations=[
-                # No database changes - leave created_by_id column in place to avoid table rebuild
-            ],
-        ),
-    ]
--- a/archivebox/core/migrations/0037_remove_archiveresult_output_dir_and_more.py
+++ b/archivebox/core/migrations/0037_remove_archiveresult_output_dir_and_more.py
@@ -1,44 +0,0 @@
-# Generated by Django 6.0 on 2025-12-29 06:45
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('core', '0036_remove_archiveresult_created_by'),
-    ]
-
-    operations = [
-        # Update Django's state only - database columns remain for backwards compat
-        migrations.SeparateDatabaseAndState(
-            state_operations=[
-                migrations.RemoveField(
-                    model_name='archiveresult',
-                    name='output_dir',
-                ),
-                migrations.RemoveField(
-                    model_name='snapshot',
-                    name='output_dir',
-                ),
-                migrations.AlterField(
-                    model_name='archiveresult',
-                    name='config',
-                    field=models.JSONField(blank=True, default=dict, null=True),
-                ),
-                migrations.AlterField(
-                    model_name='snapshot',
-                    name='config',
-                    field=models.JSONField(blank=True, default=dict, null=True),
-                ),
-                migrations.AlterField(
-                    model_name='snapshot',
-                    name='tags',
-                    field=models.ManyToManyField(blank=True, related_name='snapshot_set', through='core.SnapshotTag', through_fields=('snapshot', 'tag'), to='core.tag'),
-                ),
-            ],
-            database_operations=[
-                # No database changes - columns remain in place to avoid table rebuilds
-            ],
-        ),
-    ]
--- a/archivebox/core/migrations/0038_fix_missing_columns.py
+++ b/archivebox/core/migrations/0038_fix_missing_columns.py
@@ -1,84 +0,0 @@
-# Add missing columns to ArchiveResult and remove created_by_id from Snapshot
-
-from django.db import migrations, models, connection
-import django.utils.timezone
-
-
-def add_columns_if_not_exist(apps, schema_editor):
-    """Add columns to ArchiveResult only if they don't already exist."""
-    with connection.cursor() as cursor:
-        # Get existing columns
-        cursor.execute("PRAGMA table_info(core_archiveresult)")
-        existing_columns = {row[1] for row in cursor.fetchall()}
-
-        # Add num_uses_failed if it doesn't exist
-        if 'num_uses_failed' not in existing_columns:
-            cursor.execute("ALTER TABLE core_archiveresult ADD COLUMN num_uses_failed integer unsigned NOT NULL DEFAULT 0 CHECK (num_uses_failed >= 0)")
-
-        # Add num_uses_succeeded if it doesn't exist
-        if 'num_uses_succeeded' not in existing_columns:
-            cursor.execute("ALTER TABLE core_archiveresult ADD COLUMN num_uses_succeeded integer unsigned NOT NULL DEFAULT 0 CHECK (num_uses_succeeded >= 0)")
-
-        # Add config if it doesn't exist
-        if 'config' not in existing_columns:
-            cursor.execute("ALTER TABLE core_archiveresult ADD COLUMN config text NULL")
-
-        # Add retry_at if it doesn't exist
-        if 'retry_at' not in existing_columns:
-            cursor.execute("ALTER TABLE core_archiveresult ADD COLUMN retry_at datetime NULL")
-            cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_retry_at_idx ON core_archiveresult(retry_at)")
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('core', '0037_remove_archiveresult_output_dir_and_more'),
-    ]
-
-    operations = [
-        # Add missing columns to ArchiveResult
-        migrations.SeparateDatabaseAndState(
-            state_operations=[
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='num_uses_failed',
-                    field=models.PositiveIntegerField(default=0),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='num_uses_succeeded',
-                    field=models.PositiveIntegerField(default=0),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='config',
-                    field=models.JSONField(blank=True, default=dict, null=True),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='retry_at',
-                    field=models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True),
-                ),
-            ],
-            database_operations=[
-                migrations.RunPython(add_columns_if_not_exist, reverse_code=migrations.RunPython.noop),
-            ],
-        ),
-
-        # Drop created_by_id from Snapshot (database only, already removed from model in 0035)
-        migrations.SeparateDatabaseAndState(
-            state_operations=[
-                # No state changes - field already removed in 0035
-            ],
-            database_operations=[
-                migrations.RunSQL(
-                    sql="""
-                        -- Drop index first, then column
-                        DROP INDEX IF EXISTS core_snapshot_created_by_id_6dbd6149;
-                        ALTER TABLE core_snapshot DROP COLUMN created_by_id;
-                    """,
-                    reverse_sql=migrations.RunSQL.noop,
-                ),
-            ],
-        ),
-    ]
--- a/archivebox/core/migrations/0039_fix_num_uses_values.py
+++ b/archivebox/core/migrations/0039_fix_num_uses_values.py
@@ -1,30 +0,0 @@
-# Fix num_uses_failed and num_uses_succeeded string values to integers
-
-from django.db import migrations
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('core', '0038_fix_missing_columns'),
-    ]
-
-    operations = [
-        # Fix string values that got inserted as literals instead of integers
-        migrations.RunSQL(
-            sql="""
-                UPDATE core_snapshot
-                SET num_uses_failed = 0
-                WHERE typeof(num_uses_failed) = 'text' OR num_uses_failed = 'num_uses_failed';
-
-                UPDATE core_snapshot
-                SET num_uses_succeeded = 0
-                WHERE typeof(num_uses_succeeded) = 'text' OR num_uses_succeeded = 'num_uses_succeeded';
-
-                UPDATE core_snapshot
-                SET depth = 0
-                WHERE typeof(depth) = 'text' OR depth = 'depth';
-            """,
-            reverse_sql=migrations.RunSQL.noop,
-        ),
-    ]
--- a/archivebox/core/migrations/archivebox/api/migrations/init.py
+++ b/archivebox/core/migrations/archivebox/api/migrations/init.py
--- a/archivebox/core/migrations/archivebox/crawls/migrations/init.py
+++ b/archivebox/core/migrations/archivebox/crawls/migrations/init.py
--- a/archivebox/core/migrations/archivebox/machine/migrations/init.py
+++ b/archivebox/core/migrations/archivebox/machine/migrations/init.py