cleanup migrations, json, jsonl

2026-04-06 07:47:53 +10:00 · 2025-12-31 15:36:13 -08:00
parent 0930911a15
commit a04e4a7345
21 changed files with 993 additions and 1418 deletions
--- a/archivebox/core/admin_archiveresults.py
+++ b/archivebox/core/admin_archiveresults.py
@@ -252,8 +252,8 @@ class ArchiveResultInline(admin.TabularInline):
 class ArchiveResultAdmin(BaseModelAdmin):
    list_display = ('id', 'created_at', 'snapshot_info', 'tags_str', 'status', 'plugin_with_icon', 'cmd_str', 'output_str')
    sort_fields = ('id', 'created_at', 'plugin', 'status')
-    readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'output_summary', 'plugin_with_icon', 'process')
-    search_fields = ('id', 'snapshot__url', 'plugin', 'output_str', 'cmd_version', 'cmd', 'snapshot__timestamp', 'process__cmd')
+    readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'output_summary', 'plugin_with_icon')
+    search_fields = ('id', 'snapshot__url', 'plugin', 'output_str', 'cmd_version', 'cmd', 'snapshot__timestamp')
    autocomplete_fields = ['snapshot']

    fieldsets = (
@@ -270,7 +270,7 @@ class ArchiveResultAdmin(BaseModelAdmin):
            'classes': ('card',),
        }),
        ('Command', {
-            'fields': ('process', 'cmd', 'cmd_str', 'cmd_version', 'pwd'),
+            'fields': ('cmd', 'cmd_str', 'cmd_version', 'pwd'),
            'classes': ('card',),
        }),
        ('Output', {
--- a/archivebox/core/migrations/0023_upgrade_to_0_9_0.py
+++ b/archivebox/core/migrations/0023_upgrade_to_0_9_0.py
@@ -1,299 +1,250 @@
 # Generated by hand on 2025-12-29
-# Upgrades core app from v0.7.2 (migration 0022) or v0.8.6rc0 (migration 0076) to v0.9.0 using raw SQL
+# Upgrades core app from v0.7.2/v0.8.6rc0 (migration 0022) to v0.9.0 using raw SQL
+# Handles both fresh installs and upgrades from v0.7.2/v0.8.6rc0

-from django.db import migrations
+from django.db import migrations, models, connection


-def upgrade_from_v072_or_v086(apps, schema_editor):
-    """
-    Upgrade core tables from either v0.7.2 or v0.8.6rc0 to v0.9.0.
-    Handles differences in schema between versions.
-    """
-    with schema_editor.connection.cursor() as cursor:
-        # Check if uuid column exists (v0.7.2 has it, v0.8.6rc0 doesn't)
+def get_table_columns(table_name):
+    """Get list of column names for a table."""
+    cursor = connection.cursor()
+    cursor.execute(f"PRAGMA table_info({table_name})")
+    return {row[1] for row in cursor.fetchall()}
+
+
+def upgrade_core_tables(apps, schema_editor):
+    """Upgrade core tables from v0.7.2 or v0.8.6rc0 to v0.9.0."""
+    cursor = connection.cursor()
+
+    # Check if core_archiveresult table exists
+    cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='core_archiveresult'")
+    if not cursor.fetchone():
+        # Fresh install - no migration needed, tables will be created by later migrations
+        return
+
+    # Detect which version we're migrating from
+    archiveresult_cols = get_table_columns('core_archiveresult')
+    has_uuid = 'uuid' in archiveresult_cols
+    has_abid = 'abid' in archiveresult_cols
+
+    # ============================================================================
+    # PART 1: Upgrade core_archiveresult table
+    # ============================================================================
+    cursor.execute("""
+        CREATE TABLE IF NOT EXISTS core_archiveresult_new (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            uuid TEXT,
+            created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+            modified_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+
+            snapshot_id TEXT NOT NULL,
+            plugin VARCHAR(32) NOT NULL DEFAULT '',
+            hook_name VARCHAR(255) NOT NULL DEFAULT '',
+
+            cmd TEXT,
+            pwd VARCHAR(256),
+            cmd_version VARCHAR(128),
+
+            start_ts DATETIME,
+            end_ts DATETIME,
+            status VARCHAR(15) NOT NULL DEFAULT 'queued',
+            retry_at DATETIME,
+
+            output_files TEXT NOT NULL DEFAULT '{}',
+            output_json TEXT,
+            output_str TEXT NOT NULL DEFAULT '',
+            output_size INTEGER NOT NULL DEFAULT 0,
+            output_mimetypes VARCHAR(512) NOT NULL DEFAULT '',
+
+            config TEXT,
+            notes TEXT NOT NULL DEFAULT '',
+            num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
+            num_uses_failed INTEGER NOT NULL DEFAULT 0,
+
+            FOREIGN KEY (snapshot_id) REFERENCES core_snapshot(id) ON DELETE CASCADE
+        );
+    """)
+
+    if has_uuid and not has_abid:
+        # Migrating from v0.7.2 (has uuid, minimal fields)
+        print('Migrating ArchiveResult from v0.7.2 schema...')
        cursor.execute("""
-            SELECT COUNT(*) FROM pragma_table_info('core_archiveresult') WHERE name='uuid'
-        """)
-        has_uuid = cursor.fetchone()[0] > 0
-
-        # Check if id is INTEGER (v0.7.2) or TEXT/char (v0.8.6rc0)
-        cursor.execute("""
-            SELECT type FROM pragma_table_info('core_archiveresult') WHERE name='id'
-        """)
-        id_type = cursor.fetchone()[0] if cursor.rowcount else 'INTEGER'
-        is_v072 = 'INT' in id_type.upper()
-
-        # ============================================================================
-        # PART 1: Upgrade core_archiveresult table
-        # ============================================================================
-
-        # Create new table with v0.9.0 schema
-        cursor.execute("""
-            CREATE TABLE IF NOT EXISTS core_archiveresult_new (
-                id INTEGER PRIMARY KEY AUTOINCREMENT,
-                uuid TEXT,
-                created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
-                modified_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
-
-                snapshot_id TEXT NOT NULL,
-                plugin VARCHAR(32) NOT NULL DEFAULT '',
-                hook_name VARCHAR(255) NOT NULL DEFAULT '',
-
-                cmd TEXT,
-                pwd VARCHAR(256),
-                cmd_version VARCHAR(128),
-
-                start_ts DATETIME,
-                end_ts DATETIME,
-                status VARCHAR(15) NOT NULL DEFAULT 'queued',
-                retry_at DATETIME,
-
-                output_files TEXT NOT NULL DEFAULT '{}',
-                output_json TEXT,
-                output_str TEXT NOT NULL DEFAULT '',
-                output_size INTEGER NOT NULL DEFAULT 0,
-                output_mimetypes VARCHAR(512) NOT NULL DEFAULT '',
-
-                config TEXT,
-                notes TEXT NOT NULL DEFAULT '',
-                num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
-                num_uses_failed INTEGER NOT NULL DEFAULT 0,
-
-                binary_id TEXT,
-                iface_id TEXT,
-                process_id TEXT,
-
-                FOREIGN KEY (snapshot_id) REFERENCES core_snapshot(id) ON DELETE CASCADE,
-                FOREIGN KEY (binary_id) REFERENCES machine_binary(id) ON DELETE SET NULL,
-                FOREIGN KEY (iface_id) REFERENCES machine_networkinterface(id) ON DELETE SET NULL,
-                FOREIGN KEY (process_id) REFERENCES machine_process(id) ON DELETE RESTRICT
+            INSERT OR IGNORE INTO core_archiveresult_new (
+                id, uuid, created_at, modified_at, snapshot_id, plugin,
+                cmd, pwd, cmd_version, start_ts, end_ts, status, output_str
            )
+            SELECT
+                id, uuid,
+                COALESCE(start_ts, CURRENT_TIMESTAMP) as created_at,
+                COALESCE(end_ts, start_ts, CURRENT_TIMESTAMP) as modified_at,
+                snapshot_id,
+                COALESCE(extractor, '') as plugin,
+                cmd, pwd, cmd_version,
+                start_ts, end_ts, status,
+                COALESCE(output, '') as output_str
+            FROM core_archiveresult;
        """)
-
-        # Copy data based on source version
-        if is_v072:
-            # Coming from v0.7.2: has INTEGER id, has uuid column, has extractor
-            print("  Migrating from v0.7.2 schema...")
-            cursor.execute("""
-                INSERT OR IGNORE INTO core_archiveresult_new (
-                    uuid, created_at, modified_at, snapshot_id, plugin,
-                    cmd, pwd, cmd_version, start_ts, end_ts, status, output_str
-                )
-                SELECT
-                    uuid,
-                    COALESCE(start_ts, CURRENT_TIMESTAMP) as created_at,
-                    COALESCE(end_ts, start_ts, CURRENT_TIMESTAMP) as modified_at,
-                    snapshot_id,
-                    COALESCE(extractor, '') as plugin,
-                    cmd, pwd, cmd_version,
-                    start_ts, end_ts, status,
-                    COALESCE(output, '') as output_str
-                FROM core_archiveresult
-            """)
-        else:
-            # Coming from v0.8.6rc0: has TEXT id, no uuid column, has abid
-            print("  Migrating from v0.8.6rc0 schema...")
-            cursor.execute("""
-                INSERT OR IGNORE INTO core_archiveresult_new (
-                    uuid, created_at, modified_at, snapshot_id, plugin,
-                    cmd, pwd, cmd_version, start_ts, end_ts, status, retry_at, output_str
-                )
-                SELECT
-                    id as uuid,
-                    created_at,
-                    modified_at,
-                    snapshot_id,
-                    COALESCE(extractor, '') as plugin,
-                    cmd, pwd, cmd_version,
-                    start_ts, end_ts, status, retry_at,
-                    COALESCE(output, '') as output_str
-                FROM core_archiveresult
-            """)
-
-        # Replace old table
-        cursor.execute("DROP TABLE IF EXISTS core_archiveresult")
-        cursor.execute("ALTER TABLE core_archiveresult_new RENAME TO core_archiveresult")
-
-        # Create indexes
-        cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_snapshot_id_idx ON core_archiveresult(snapshot_id)")
-        cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_plugin_idx ON core_archiveresult(plugin)")
-        cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_status_idx ON core_archiveresult(status)")
-        cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_retry_at_idx ON core_archiveresult(retry_at)")
-        cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_created_at_idx ON core_archiveresult(created_at)")
-        cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_uuid_idx ON core_archiveresult(uuid)")
-
-        # ============================================================================
-        # PART 2: Upgrade core_snapshot table
-        # ============================================================================
-
-        # Check snapshot schema version
+    elif has_abid and not has_uuid:
+        # Migrating from v0.8.6rc0 (has abid, full fields)
+        print('Migrating ArchiveResult from v0.8.6rc0 schema...')
        cursor.execute("""
-            SELECT COUNT(*) FROM pragma_table_info('core_snapshot') WHERE name='crawl_id'
-        """)
-        has_crawl_id = cursor.fetchone()[0] > 0
-
-        # Create new table
-        cursor.execute("""
-            CREATE TABLE IF NOT EXISTS core_snapshot_new (
-                id TEXT PRIMARY KEY NOT NULL,
-                created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
-                modified_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
-                bookmarked_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
-                downloaded_at DATETIME,
-
-                url TEXT NOT NULL,
-                timestamp TEXT NOT NULL,
-                title TEXT,
-
-                crawl_id TEXT,
-                depth INTEGER NOT NULL DEFAULT 0,
-                parent_snapshot_id TEXT,
-
-                status VARCHAR(15) NOT NULL DEFAULT 'queued',
-                retry_at DATETIME,
-                current_step INTEGER NOT NULL DEFAULT 0,
-
-                fs_version VARCHAR(10) NOT NULL DEFAULT '0.9.0',
-                config TEXT,
-                notes TEXT NOT NULL DEFAULT '',
-                num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
-                num_uses_failed INTEGER NOT NULL DEFAULT 0
-
-                -- Note: crawl_id foreign key will be added in 0024 after assigning crawl_ids
-                -- FOREIGN KEY (crawl_id) REFERENCES crawls_crawl(id) ON DELETE CASCADE,
-                -- FOREIGN KEY (parent_snapshot_id) REFERENCES core_snapshot(id) ON DELETE SET NULL
+            INSERT OR IGNORE INTO core_archiveresult_new (
+                id, uuid, created_at, modified_at, snapshot_id, plugin,
+                cmd, pwd, cmd_version, start_ts, end_ts, status, retry_at, output_str
            )
+            SELECT
+                id, abid as uuid,
+                created_at, modified_at,
+                snapshot_id,
+                COALESCE(extractor, '') as plugin,
+                cmd, pwd, cmd_version,
+                start_ts, end_ts, status, retry_at,
+                COALESCE(output, '') as output_str
+            FROM core_archiveresult;
        """)
+    else:
+        print(f'Warning: Unexpected schema - has_uuid={has_uuid}, has_abid={has_abid}')

-        # Copy snapshot data
-        if has_crawl_id:
-            # v0.8.6rc0 schema - already has created_at, modified_at, bookmarked_at
+    cursor.execute("DROP TABLE IF EXISTS core_archiveresult;")
+    cursor.execute("ALTER TABLE core_archiveresult_new RENAME TO core_archiveresult;")
+
+    # Create indexes
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_snapshot_id_idx ON core_archiveresult(snapshot_id);")
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_plugin_idx ON core_archiveresult(plugin);")
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_status_idx ON core_archiveresult(status);")
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_retry_at_idx ON core_archiveresult(retry_at);")
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_created_at_idx ON core_archiveresult(created_at);")
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_uuid_idx ON core_archiveresult(uuid);")
+
+    # ============================================================================
+    # PART 2: Upgrade core_snapshot table
+    # ============================================================================
+    cursor.execute("""
+        CREATE TABLE IF NOT EXISTS core_snapshot_new (
+            id TEXT PRIMARY KEY NOT NULL,
+            created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+            modified_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+
+            url TEXT NOT NULL,
+            timestamp VARCHAR(32) NOT NULL UNIQUE,
+            bookmarked_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+
+            crawl_id TEXT,
+            parent_snapshot_id TEXT,
+
+            title VARCHAR(512),
+            downloaded_at DATETIME,
+            depth INTEGER NOT NULL DEFAULT 0,
+            fs_version VARCHAR(10) NOT NULL DEFAULT '0.9.0',
+
+            config TEXT NOT NULL DEFAULT '{}',
+            notes TEXT NOT NULL DEFAULT '',
+            num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
+            num_uses_failed INTEGER NOT NULL DEFAULT 0,
+
+            status VARCHAR(15) NOT NULL DEFAULT 'queued',
+            retry_at DATETIME,
+            current_step INTEGER NOT NULL DEFAULT 0,
+
+            FOREIGN KEY (crawl_id) REFERENCES crawls_crawl(id) ON DELETE CASCADE,
+            FOREIGN KEY (parent_snapshot_id) REFERENCES core_snapshot(id) ON DELETE SET NULL
+        );
+    """)
+
+    # Check if core_snapshot exists (it should)
+    cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='core_snapshot'")
+    if cursor.fetchone():
+        # Detect which version we're migrating from
+        snapshot_cols = get_table_columns('core_snapshot')
+        has_added = 'added' in snapshot_cols
+        has_bookmarked_at = 'bookmarked_at' in snapshot_cols
+
+        if has_added and not has_bookmarked_at:
+            # Migrating from v0.7.2 (has added/updated, no bookmarked_at/created_at/modified_at)
+            print('Migrating Snapshot from v0.7.2 schema...')
            cursor.execute("""
                INSERT OR IGNORE INTO core_snapshot_new (
-                    id, created_at, modified_at, bookmarked_at, downloaded_at, url, timestamp,
-                    crawl_id, status, retry_at
+                    id, url, timestamp, title, bookmarked_at, created_at, modified_at
                )
                SELECT
-                    id,
-                    created_at,
-                    modified_at,
-                    bookmarked_at,
-                    downloaded_at,
-                    url, timestamp,
-                    NULLIF(crawl_id, ''),
-                    COALESCE(status, 'queued'),
-                    retry_at
-                FROM core_snapshot
+                    id, url, timestamp, title,
+                    COALESCE(added, CURRENT_TIMESTAMP) as bookmarked_at,
+                    COALESCE(added, CURRENT_TIMESTAMP) as created_at,
+                    COALESCE(updated, added, CURRENT_TIMESTAMP) as modified_at
+                FROM core_snapshot;
+            """)
+        elif has_bookmarked_at and not has_added:
+            # Migrating from v0.8.6rc0 (already has bookmarked_at/created_at/modified_at)
+            print('Migrating Snapshot from v0.8.6rc0 schema...')
+            # Check what fields exist
+            has_status = 'status' in snapshot_cols
+            has_retry_at = 'retry_at' in snapshot_cols
+            has_crawl_id = 'crawl_id' in snapshot_cols
+
+            # Build column list based on what exists
+            cols = ['id', 'url', 'timestamp', 'title', 'bookmarked_at', 'created_at', 'modified_at', 'downloaded_at']
+            if has_crawl_id:
+                cols.append('crawl_id')
+            if has_status:
+                cols.append('status')
+            if has_retry_at:
+                cols.append('retry_at')
+
+            cursor.execute(f"""
+                INSERT OR IGNORE INTO core_snapshot_new ({', '.join(cols)})
+                SELECT {', '.join(cols)}
+                FROM core_snapshot;
            """)
        else:
-            # v0.7.2 schema - will get crawl_id assigned by later migration (0024)
-            cursor.execute("""
-                INSERT OR IGNORE INTO core_snapshot_new (
-                    id, created_at, modified_at, bookmarked_at, url, timestamp, crawl_id
-                )
-                SELECT
-                    id,
-                    COALESCE(added, CURRENT_TIMESTAMP),
-                    COALESCE(updated, added, CURRENT_TIMESTAMP),
-                    COALESCE(added, CURRENT_TIMESTAMP),
-                    url, timestamp,
-                    NULL as crawl_id
-                FROM core_snapshot
-            """)
+            print(f'Warning: Unexpected Snapshot schema - has_added={has_added}, has_bookmarked_at={has_bookmarked_at}')

-        # Replace old table
-        cursor.execute("DROP TABLE IF EXISTS core_snapshot")
-        cursor.execute("ALTER TABLE core_snapshot_new RENAME TO core_snapshot")
+    cursor.execute("DROP TABLE IF EXISTS core_snapshot;")
+    cursor.execute("ALTER TABLE core_snapshot_new RENAME TO core_snapshot;")

-        # Create indexes
-        cursor.execute("CREATE INDEX IF NOT EXISTS core_snapshot_crawl_id_idx ON core_snapshot(crawl_id)")
-        cursor.execute("CREATE INDEX IF NOT EXISTS core_snapshot_url_idx ON core_snapshot(url)")
-        cursor.execute("CREATE INDEX IF NOT EXISTS core_snapshot_status_idx ON core_snapshot(status)")
-        cursor.execute("CREATE INDEX IF NOT EXISTS core_snapshot_retry_at_idx ON core_snapshot(retry_at)")
-        cursor.execute("CREATE INDEX IF NOT EXISTS core_snapshot_created_at_idx ON core_snapshot(created_at)")
-        cursor.execute("CREATE INDEX IF NOT EXISTS core_snapshot_bookmarked_at_idx ON core_snapshot(bookmarked_at)")
+    # Create indexes
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_snapshot_url_idx ON core_snapshot(url);")
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_snapshot_timestamp_idx ON core_snapshot(timestamp);")
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_snapshot_bookmarked_at_idx ON core_snapshot(bookmarked_at);")
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_snapshot_crawl_id_idx ON core_snapshot(crawl_id);")
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_snapshot_status_idx ON core_snapshot(status);")
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_snapshot_retry_at_idx ON core_snapshot(retry_at);")
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_snapshot_created_at_idx ON core_snapshot(created_at);")
+    cursor.execute("CREATE UNIQUE INDEX IF NOT EXISTS core_snapshot_url_crawl_unique ON core_snapshot(url, crawl_id);")

-        # ============================================================================
-        # PART 3: Upgrade core_tag table
-        # ============================================================================
+    # ============================================================================
+    # PART 3: Upgrade core_tag table
+    # ============================================================================
+    cursor.execute("""
+        CREATE TABLE IF NOT EXISTS core_tag_new (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+            modified_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,

-        # Check if tag id is INTEGER (v0.7.2) or TEXT (v0.8.6rc0)
+            name VARCHAR(100) NOT NULL UNIQUE,
+            slug VARCHAR(100) NOT NULL UNIQUE,
+
+            created_by_id INTEGER,
+
+            FOREIGN KEY (created_by_id) REFERENCES auth_user(id) ON DELETE CASCADE
+        );
+    """)
+
+    cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='core_tag'")
+    if cursor.fetchone():
        cursor.execute("""
-            SELECT type FROM pragma_table_info('core_tag') WHERE name='id'
-        """)
-        tag_id_type = cursor.fetchone()[0] if cursor.rowcount else 'INTEGER'
-        tag_id_is_int = 'INT' in tag_id_type.upper()
-
-        cursor.execute("""
-            CREATE TABLE IF NOT EXISTS core_tag_new (
-                id INTEGER PRIMARY KEY AUTOINCREMENT,
-                created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
-                modified_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
-                created_by_id INTEGER,
-
-                name VARCHAR(100) NOT NULL UNIQUE,
-                slug VARCHAR(100) NOT NULL UNIQUE,
-
-                FOREIGN KEY (created_by_id) REFERENCES auth_user(id) ON DELETE SET NULL
-            )
+            INSERT OR IGNORE INTO core_tag_new (id, name, slug)
+            SELECT id, name, slug
+            FROM core_tag;
        """)

-        if tag_id_is_int:
-            # v0.7.2: Direct copy (INTEGER to INTEGER)
-            cursor.execute("""
-                INSERT OR IGNORE INTO core_tag_new (id, name, slug)
-                SELECT id, name, slug FROM core_tag
-            """)
-        else:
-            # v0.8.6rc0: Need to remap TEXT ids to new INTEGER ids
-            cursor.execute("SELECT id, name, slug FROM core_tag")
-            old_tags = cursor.fetchall()
-            tag_id_mapping = {}  # old_text_id -> new_int_id
+    cursor.execute("DROP TABLE IF EXISTS core_tag;")
+    cursor.execute("ALTER TABLE core_tag_new RENAME TO core_tag;")

-            for old_id, name, slug in old_tags:
-                cursor.execute("""
-                    INSERT OR IGNORE INTO core_tag_new (name, slug)
-                    VALUES (?, ?)
-                """, [name, slug])
-                cursor.execute("SELECT id FROM core_tag_new WHERE slug = ?", [slug])
-                new_id = cursor.fetchone()[0]
-                tag_id_mapping[old_id] = new_id
+    # Create indexes
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_tag_created_at_idx ON core_tag(created_at);")
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_tag_created_by_id_idx ON core_tag(created_by_id);")

-        cursor.execute("DROP TABLE IF EXISTS core_tag")
-        cursor.execute("ALTER TABLE core_tag_new RENAME TO core_tag")
-
-        # Recreate M2M table
-        cursor.execute("""
-            CREATE TABLE IF NOT EXISTS core_snapshot_tags_new (
-                id INTEGER PRIMARY KEY AUTOINCREMENT,
-                snapshot_id TEXT NOT NULL,
-                tag_id INTEGER NOT NULL,
-                FOREIGN KEY (snapshot_id) REFERENCES core_snapshot(id) ON DELETE CASCADE,
-                FOREIGN KEY (tag_id) REFERENCES core_tag(id) ON DELETE CASCADE,
-                UNIQUE(snapshot_id, tag_id)
-            )
-        """)
-
-        if tag_id_is_int:
-            # Direct copy for v0.7.2
-            cursor.execute("""
-                INSERT OR IGNORE INTO core_snapshot_tags_new (snapshot_id, tag_id)
-                SELECT snapshot_id, tag_id FROM core_snapshot_tags
-            """)
-        else:
-            # v0.8.6rc0: Use mapping to convert old TEXT ids to new INTEGER ids
-            cursor.execute("SELECT snapshot_id, tag_id FROM core_snapshot_tags")
-            m2m_entries = cursor.fetchall()
-            for snapshot_id, old_tag_id in m2m_entries:
-                new_tag_id = tag_id_mapping.get(old_tag_id)
-                if new_tag_id:
-                    cursor.execute("""
-                        INSERT OR IGNORE INTO core_snapshot_tags_new (snapshot_id, tag_id)
-                        VALUES (?, ?)
-                    """, [snapshot_id, new_tag_id])
-
-        cursor.execute("DROP TABLE IF EXISTS core_snapshot_tags")
-        cursor.execute("ALTER TABLE core_snapshot_tags_new RENAME TO core_snapshot_tags")
+    print('✓ Core tables upgraded to v0.9.0')


 class Migration(migrations.Migration):
@@ -301,10 +252,49 @@ class Migration(migrations.Migration):
    dependencies = [
        ('core', '0022_auto_20231023_2008'),
        ('crawls', '0001_initial'),
-        ('machine', '0001_initial'),
        ('auth', '0012_alter_user_first_name_max_length'),
    ]

    operations = [
-        migrations.RunPython(upgrade_from_v072_or_v086, reverse_code=migrations.RunPython.noop),
+        migrations.SeparateDatabaseAndState(
+            database_operations=[
+                migrations.RunPython(
+                    upgrade_core_tables,
+                    reverse_code=migrations.RunPython.noop,
+                ),
+            ],
+            state_operations=[
+                # Remove old ArchiveResult fields
+                migrations.RemoveField(model_name='archiveresult', name='extractor'),
+                migrations.RemoveField(model_name='archiveresult', name='output'),
+                # Remove old Snapshot fields
+                migrations.RemoveField(model_name='snapshot', name='added'),
+                migrations.RemoveField(model_name='snapshot', name='updated'),
+                # SnapshotTag table already exists from v0.7.2, just declare it in state
+                migrations.CreateModel(
+                    name='SnapshotTag',
+                    fields=[
+                        ('id', models.AutoField(primary_key=True, serialize=False)),
+                        ('snapshot', models.ForeignKey(to='core.Snapshot', db_column='snapshot_id', on_delete=models.CASCADE)),
+                        ('tag', models.ForeignKey(to='core.Tag', db_column='tag_id', on_delete=models.CASCADE)),
+                    ],
+                    options={
+                        'db_table': 'core_snapshot_tags',
+                        'unique_together': {('snapshot', 'tag')},
+                    },
+                ),
+                # Declare that Snapshot.tags M2M already uses through=SnapshotTag (from v0.7.2)
+                migrations.AlterField(
+                    model_name='snapshot',
+                    name='tags',
+                    field=models.ManyToManyField(
+                        'Tag',
+                        blank=True,
+                        related_name='snapshot_set',
+                        through='SnapshotTag',
+                        through_fields=('snapshot', 'tag'),
+                    ),
+                ),
+            ],
+        ),
    ]
--- a/archivebox/core/migrations/0024_assign_default_crawl.py
+++ b/archivebox/core/migrations/0024_assign_default_crawl.py
@@ -1,7 +1,7 @@
 # Generated by hand on 2025-12-29
 # Creates a default crawl for v0.7.2 migrated snapshots and makes crawl_id NOT NULL

-from django.db import migrations
+from django.db import migrations, models
 import uuid


@@ -56,8 +56,7 @@ class Migration(migrations.Migration):

    dependencies = [
        ('core', '0023_upgrade_to_0_9_0'),
-        ('crawls', '0002_upgrade_to_0_9_0'),
-        ('machine', '0001_initial'),
+        ('crawls', '0001_initial'),
        ('auth', '0012_alter_user_first_name_max_length'),
    ]

@@ -66,65 +65,80 @@ class Migration(migrations.Migration):
            create_default_crawl_and_assign_snapshots,
            reverse_code=migrations.RunPython.noop,
        ),
-        # Now make crawl_id NOT NULL
-        migrations.RunSQL(
-            sql="""
-                -- Rebuild snapshot table with NOT NULL crawl_id
-                CREATE TABLE core_snapshot_final (
-                    id TEXT PRIMARY KEY NOT NULL,
-                    created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
-                    modified_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+        migrations.SeparateDatabaseAndState(
+            database_operations=[
+                # Now make crawl_id NOT NULL
+                migrations.RunSQL(
+                    sql="""
+                        -- Rebuild snapshot table with NOT NULL crawl_id
+                        CREATE TABLE core_snapshot_final (
+                            id TEXT PRIMARY KEY NOT NULL,
+                            created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+                            modified_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,

-                    url TEXT NOT NULL,
-                    timestamp VARCHAR(32) NOT NULL UNIQUE,
-                    bookmarked_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+                            url TEXT NOT NULL,
+                            timestamp VARCHAR(32) NOT NULL UNIQUE,
+                            bookmarked_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,

-                    crawl_id TEXT NOT NULL,
-                    parent_snapshot_id TEXT,
+                            crawl_id TEXT NOT NULL,
+                            parent_snapshot_id TEXT,

-                    title VARCHAR(512),
-                    downloaded_at DATETIME,
-                    depth INTEGER NOT NULL DEFAULT 0,
-                    fs_version VARCHAR(10) NOT NULL DEFAULT '0.9.0',
+                            title VARCHAR(512),
+                            downloaded_at DATETIME,
+                            depth INTEGER NOT NULL DEFAULT 0,
+                            fs_version VARCHAR(10) NOT NULL DEFAULT '0.9.0',

-                    config TEXT NOT NULL DEFAULT '{}',
-                    notes TEXT NOT NULL DEFAULT '',
-                    num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
-                    num_uses_failed INTEGER NOT NULL DEFAULT 0,
+                            config TEXT NOT NULL DEFAULT '{}',
+                            notes TEXT NOT NULL DEFAULT '',
+                            num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
+                            num_uses_failed INTEGER NOT NULL DEFAULT 0,

-                    status VARCHAR(15) NOT NULL DEFAULT 'queued',
-                    retry_at DATETIME,
-                    current_step INTEGER NOT NULL DEFAULT 0,
+                            status VARCHAR(15) NOT NULL DEFAULT 'queued',
+                            retry_at DATETIME,
+                            current_step INTEGER NOT NULL DEFAULT 0,

-                    FOREIGN KEY (crawl_id) REFERENCES crawls_crawl(id) ON DELETE CASCADE,
-                    FOREIGN KEY (parent_snapshot_id) REFERENCES core_snapshot(id) ON DELETE SET NULL
-                );
+                            FOREIGN KEY (crawl_id) REFERENCES crawls_crawl(id) ON DELETE CASCADE,
+                            FOREIGN KEY (parent_snapshot_id) REFERENCES core_snapshot(id) ON DELETE SET NULL
+                        );

-                INSERT INTO core_snapshot_final (
-                    id, created_at, modified_at, url, timestamp, bookmarked_at,
-                    crawl_id, parent_snapshot_id, title, downloaded_at, depth, fs_version,
-                    config, notes, num_uses_succeeded, num_uses_failed,
-                    status, retry_at, current_step
-                )
-                SELECT
-                    id, created_at, modified_at, url, timestamp, bookmarked_at,
-                    crawl_id, parent_snapshot_id, title, downloaded_at, depth, fs_version,
-                    COALESCE(config, '{}'), COALESCE(notes, ''), num_uses_succeeded, num_uses_failed,
-                    status, retry_at, current_step
-                FROM core_snapshot;
+                        INSERT INTO core_snapshot_final (
+                            id, created_at, modified_at, url, timestamp, bookmarked_at,
+                            crawl_id, parent_snapshot_id, title, downloaded_at, depth, fs_version,
+                            config, notes, num_uses_succeeded, num_uses_failed,
+                            status, retry_at, current_step
+                        )
+                        SELECT
+                            id, created_at, modified_at, url, timestamp, bookmarked_at,
+                            crawl_id, parent_snapshot_id, title, downloaded_at, depth, fs_version,
+                            COALESCE(config, '{}'), COALESCE(notes, ''), num_uses_succeeded, num_uses_failed,
+                            status, retry_at, current_step
+                        FROM core_snapshot;

-                DROP TABLE core_snapshot;
-                ALTER TABLE core_snapshot_final RENAME TO core_snapshot;
+                        DROP TABLE core_snapshot;
+                        ALTER TABLE core_snapshot_final RENAME TO core_snapshot;

-                CREATE INDEX core_snapshot_url_idx ON core_snapshot(url);
-                CREATE INDEX core_snapshot_timestamp_idx ON core_snapshot(timestamp);
-                CREATE INDEX core_snapshot_bookmarked_at_idx ON core_snapshot(bookmarked_at);
-                CREATE INDEX core_snapshot_crawl_id_idx ON core_snapshot(crawl_id);
-                CREATE INDEX core_snapshot_status_idx ON core_snapshot(status);
-                CREATE INDEX core_snapshot_retry_at_idx ON core_snapshot(retry_at);
-                CREATE INDEX core_snapshot_created_at_idx ON core_snapshot(created_at);
-                CREATE UNIQUE INDEX core_snapshot_url_crawl_unique ON core_snapshot(url, crawl_id);
-            """,
-            reverse_sql=migrations.RunSQL.noop,
+                        CREATE INDEX core_snapshot_url_idx ON core_snapshot(url);
+                        CREATE INDEX core_snapshot_timestamp_idx ON core_snapshot(timestamp);
+                        CREATE INDEX core_snapshot_bookmarked_at_idx ON core_snapshot(bookmarked_at);
+                        CREATE INDEX core_snapshot_crawl_id_idx ON core_snapshot(crawl_id);
+                        CREATE INDEX core_snapshot_status_idx ON core_snapshot(status);
+                        CREATE INDEX core_snapshot_retry_at_idx ON core_snapshot(retry_at);
+                        CREATE INDEX core_snapshot_created_at_idx ON core_snapshot(created_at);
+                        CREATE UNIQUE INDEX core_snapshot_url_crawl_unique ON core_snapshot(url, crawl_id);
+                    """,
+                    reverse_sql=migrations.RunSQL.noop,
+                ),
+            ],
+            state_operations=[
+                migrations.AddField(
+                    model_name='snapshot',
+                    name='crawl',
+                    field=models.ForeignKey(
+                        on_delete=models.deletion.CASCADE,
+                        to='crawls.crawl',
+                        help_text='Crawl that created this snapshot'
+                    ),
+                ),
+            ],
        ),
    ]
--- a/archivebox/core/migrations/0025_alter_archiveresult_options_alter_snapshot_options_and_more.py
+++ b/archivebox/core/migrations/0025_alter_archiveresult_options_alter_snapshot_options_and_more.py
@@ -0,0 +1,258 @@
+# Generated by Django 6.0 on 2025-12-31 23:09
+
+import archivebox.base_models.models
+import django.db.models.deletion
+import django.utils.timezone
+import uuid
+from django.conf import settings
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('core', '0024_assign_default_crawl'),
+        ('crawls', '0001_initial'),
+        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
+    ]
+
+    operations = [
+        migrations.AlterModelOptions(
+            name='archiveresult',
+            options={'verbose_name': 'Archive Result', 'verbose_name_plural': 'Archive Results Log'},
+        ),
+        migrations.AlterModelOptions(
+            name='snapshot',
+            options={'verbose_name': 'Snapshot', 'verbose_name_plural': 'Snapshots'},
+        ),
+        migrations.RemoveField(
+            model_name='archiveresult',
+            name='cmd',
+        ),
+        migrations.RemoveField(
+            model_name='archiveresult',
+            name='cmd_version',
+        ),
+        migrations.RemoveField(
+            model_name='archiveresult',
+            name='pwd',
+        ),
+        migrations.AddField(
+            model_name='archiveresult',
+            name='config',
+            field=models.JSONField(blank=True, default=dict, null=True),
+        ),
+        migrations.AddField(
+            model_name='archiveresult',
+            name='created_at',
+            field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
+        ),
+        migrations.AddField(
+            model_name='archiveresult',
+            name='hook_name',
+            field=models.CharField(blank=True, db_index=True, default='', help_text='Full filename of the hook that executed (e.g., on_Snapshot__50_wget.py)', max_length=255),
+        ),
+        migrations.AddField(
+            model_name='archiveresult',
+            name='modified_at',
+            field=models.DateTimeField(auto_now=True),
+        ),
+        migrations.AddField(
+            model_name='archiveresult',
+            name='notes',
+            field=models.TextField(blank=True, default=''),
+        ),
+        migrations.AddField(
+            model_name='archiveresult',
+            name='num_uses_failed',
+            field=models.PositiveIntegerField(default=0),
+        ),
+        migrations.AddField(
+            model_name='archiveresult',
+            name='num_uses_succeeded',
+            field=models.PositiveIntegerField(default=0),
+        ),
+        migrations.AddField(
+            model_name='archiveresult',
+            name='output_files',
+            field=models.JSONField(default=dict, help_text='Dict of {relative_path: {metadata}}'),
+        ),
+        migrations.AddField(
+            model_name='archiveresult',
+            name='output_json',
+            field=models.JSONField(blank=True, default=None, help_text='Structured metadata (headers, redirects, etc.)', null=True),
+        ),
+        migrations.AddField(
+            model_name='archiveresult',
+            name='output_mimetypes',
+            field=models.CharField(blank=True, default='', help_text='CSV of mimetypes sorted by size', max_length=512),
+        ),
+        migrations.AddField(
+            model_name='archiveresult',
+            name='output_size',
+            field=models.BigIntegerField(default=0, help_text='Total bytes of all output files'),
+        ),
+        migrations.AddField(
+            model_name='archiveresult',
+            name='output_str',
+            field=models.TextField(blank=True, default='', help_text='Human-readable output summary'),
+        ),
+        migrations.AddField(
+            model_name='archiveresult',
+            name='plugin',
+            field=models.CharField(db_index=True, default='', max_length=32),
+        ),
+        migrations.AddField(
+            model_name='archiveresult',
+            name='retry_at',
+            field=models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True),
+        ),
+        migrations.AddField(
+            model_name='snapshot',
+            name='bookmarked_at',
+            field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
+        ),
+        migrations.AddField(
+            model_name='snapshot',
+            name='config',
+            field=models.JSONField(default=dict),
+        ),
+        migrations.AddField(
+            model_name='snapshot',
+            name='created_at',
+            field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
+        ),
+        migrations.AddField(
+            model_name='snapshot',
+            name='current_step',
+            field=models.PositiveSmallIntegerField(db_index=True, default=0, help_text='Current hook step being executed (0-9). Used for sequential hook execution.'),
+        ),
+        migrations.AddField(
+            model_name='snapshot',
+            name='depth',
+            field=models.PositiveSmallIntegerField(db_index=True, default=0),
+        ),
+        migrations.AddField(
+            model_name='snapshot',
+            name='downloaded_at',
+            field=models.DateTimeField(blank=True, db_index=True, default=None, editable=False, null=True),
+        ),
+        migrations.AddField(
+            model_name='snapshot',
+            name='fs_version',
+            field=models.CharField(default='0.9.0', help_text='Filesystem version of this snapshot (e.g., "0.7.0", "0.8.0", "0.9.0"). Used to trigger lazy migration on save().', max_length=10),
+        ),
+        migrations.AddField(
+            model_name='snapshot',
+            name='modified_at',
+            field=models.DateTimeField(auto_now=True),
+        ),
+        migrations.AddField(
+            model_name='snapshot',
+            name='notes',
+            field=models.TextField(blank=True, default=''),
+        ),
+        migrations.AddField(
+            model_name='snapshot',
+            name='num_uses_failed',
+            field=models.PositiveIntegerField(default=0),
+        ),
+        migrations.AddField(
+            model_name='snapshot',
+            name='num_uses_succeeded',
+            field=models.PositiveIntegerField(default=0),
+        ),
+        migrations.AddField(
+            model_name='snapshot',
+            name='parent_snapshot',
+            field=models.ForeignKey(blank=True, help_text='Parent snapshot that discovered this URL (for recursive crawling)', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='child_snapshots', to='core.snapshot'),
+        ),
+        migrations.AddField(
+            model_name='snapshot',
+            name='retry_at',
+            field=models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True),
+        ),
+        migrations.AddField(
+            model_name='snapshot',
+            name='status',
+            field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('sealed', 'Sealed')], db_index=True, default='queued', max_length=15),
+        ),
+        migrations.AddField(
+            model_name='tag',
+            name='created_at',
+            field=models.DateTimeField(db_index=True, default=django.utils.timezone.now, null=True),
+        ),
+        migrations.AddField(
+            model_name='tag',
+            name='created_by',
+            field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='tag_set', to=settings.AUTH_USER_MODEL),
+        ),
+        migrations.AddField(
+            model_name='tag',
+            name='modified_at',
+            field=models.DateTimeField(auto_now=True),
+        ),
+        migrations.AlterField(
+            model_name='archiveresult',
+            name='end_ts',
+            field=models.DateTimeField(blank=True, default=None, null=True),
+        ),
+        migrations.AlterField(
+            model_name='archiveresult',
+            name='id',
+            field=models.AutoField(editable=False, primary_key=True, serialize=False),
+        ),
+        migrations.AlterField(
+            model_name='archiveresult',
+            name='start_ts',
+            field=models.DateTimeField(blank=True, default=None, null=True),
+        ),
+        migrations.AlterField(
+            model_name='archiveresult',
+            name='status',
+            field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('backoff', 'Waiting to retry'), ('succeeded', 'Succeeded'), ('failed', 'Failed'), ('skipped', 'Skipped')], db_index=True, default='queued', max_length=15),
+        ),
+        migrations.AlterField(
+            model_name='archiveresult',
+            name='uuid',
+            field=models.UUIDField(blank=True, db_index=True, default=uuid.uuid7, null=True),
+        ),
+        migrations.AlterField(
+            model_name='snapshot',
+            name='crawl',
+            field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to='crawls.crawl'),
+        ),
+        migrations.AlterField(
+            model_name='snapshot',
+            name='id',
+            field=models.UUIDField(default=uuid.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
+        ),
+        migrations.AlterField(
+            model_name='snapshot',
+            name='tags',
+            field=models.ManyToManyField(blank=True, related_name='snapshot_set', through='core.SnapshotTag', through_fields=('snapshot', 'tag'), to='core.tag'),
+        ),
+        migrations.AlterField(
+            model_name='snapshot',
+            name='timestamp',
+            field=models.CharField(db_index=True, editable=False, max_length=32, unique=True),
+        ),
+        migrations.AlterField(
+            model_name='snapshot',
+            name='url',
+            field=models.URLField(db_index=True),
+        ),
+        migrations.AlterField(
+            model_name='tag',
+            name='slug',
+            field=models.SlugField(editable=False, max_length=100, unique=True),
+        ),
+        migrations.AddConstraint(
+            model_name='snapshot',
+            constraint=models.UniqueConstraint(fields=('url', 'crawl'), name='unique_url_per_crawl'),
+        ),
+        migrations.AddConstraint(
+            model_name='snapshot',
+            constraint=models.UniqueConstraint(fields=('timestamp',), name='unique_timestamp'),
+        ),
+    ]
--- a/archivebox/core/migrations/0025_cleanup_schema.py
+++ b/archivebox/core/migrations/0025_cleanup_schema.py
@@ -1,484 +0,0 @@
-# Generated by hand on 2025-12-29
-# Cleans up extra columns from raw SQL migrations and ensures schema matches models
-
-from django.db import migrations, models
-import django.db.models.deletion
-import django.utils.timezone
-from django.conf import settings
-import archivebox.base_models.models
-
-
-def cleanup_extra_columns(apps, schema_editor):
-    """
-    Create Process records from old cmd/pwd/cmd_version columns and remove those columns.
-    This preserves the execution details by moving them to the Process model.
-    """
-    with schema_editor.connection.cursor() as cursor:
-        # Check if cmd column exists (means we came from v0.7.2/v0.8.6rc0)
-        cursor.execute("SELECT COUNT(*) FROM pragma_table_info('core_archiveresult') WHERE name='cmd'")
-        has_cmd = cursor.fetchone()[0] > 0
-
-        if has_cmd:
-            print("  Migrating cmd/pwd/cmd_version data to Process records...")
-
-            # For each ArchiveResult, create a Process record with cmd/pwd data
-            # Note: cmd_version from old schema is not preserved (it's now derived from Binary)
-            cursor.execute("""
-                SELECT id, cmd, pwd, binary_id, iface_id, start_ts, end_ts, status
-                FROM core_archiveresult
-            """)
-            archive_results = cursor.fetchall()
-
-            from archivebox.uuid_compat import uuid7
-            from archivebox.base_models.models import get_or_create_system_user_pk
-
-            # Get or create a Machine record
-            result = cursor.execute("SELECT id FROM machine_machine LIMIT 1").fetchone()
-            if result:
-                machine_id = result[0]
-                print(f"  Using existing Machine: {machine_id}")
-            else:
-                # Create a minimal Machine record with raw SQL (can't use model during migration)
-                print("  Creating Machine record for Process migration...")
-                import platform
-                import socket
-
-                # Generate minimal machine data without using the model
-                machine_id = str(uuid7())
-                guid = f"{socket.gethostname()}-{platform.machine()}"
-                hostname = socket.gethostname()
-
-                # Check schema version
-                cursor.execute("SELECT COUNT(*) FROM pragma_table_info('machine_machine') WHERE name='config'")
-                has_config = cursor.fetchone()[0] > 0
-                cursor.execute("SELECT COUNT(*) FROM pragma_table_info('machine_machine') WHERE name='abid'")
-                has_abid = cursor.fetchone()[0] > 0
-                cursor.execute("SELECT COUNT(*) FROM pragma_table_info('machine_machine') WHERE name='num_uses_succeeded'")
-                has_num_uses = cursor.fetchone()[0] > 0
-
-                # Insert directly with SQL (use INSERT OR IGNORE in case it already exists)
-                if has_config:
-                    # v0.9.0+ schema
-                    cursor.execute("""
-                        INSERT OR IGNORE INTO machine_machine (
-                            id, created_at, modified_at,
-                            guid, hostname, hw_in_docker, hw_in_vm, hw_manufacturer, hw_product, hw_uuid,
-                            os_arch, os_family, os_platform, os_release, os_kernel,
-                            stats, config
-                        ) VALUES (?, datetime('now'), datetime('now'), ?, ?, 0, 0, '', '', '', ?, ?, ?, ?, '', '{}', '{}')
-                    """, (
-                        machine_id, guid, hostname,
-                        platform.machine(), platform.system(), platform.platform(), platform.release()
-                    ))
-                elif has_abid and has_num_uses:
-                    # v0.8.6rc0 schema (has abid and num_uses columns)
-                    cursor.execute("""
-                        INSERT OR IGNORE INTO machine_machine (
-                            id, abid, created_at, modified_at,
-                            guid, hostname, hw_in_docker, hw_in_vm, hw_manufacturer, hw_product, hw_uuid,
-                            os_arch, os_family, os_platform, os_release, os_kernel,
-                            stats, num_uses_failed, num_uses_succeeded
-                        ) VALUES (?, '', datetime('now'), datetime('now'), ?, ?, 0, 0, '', '', '', ?, ?, ?, ?, '', '{}', 0, 0)
-                    """, (
-                        machine_id, guid, hostname,
-                        platform.machine(), platform.system(), platform.platform(), platform.release()
-                    ))
-                else:
-                    # v0.7.2 or other schema
-                    cursor.execute("""
-                        INSERT OR IGNORE INTO machine_machine (
-                            id, created_at, modified_at,
-                            guid, hostname, hw_in_docker, hw_in_vm, hw_manufacturer, hw_product, hw_uuid,
-                            os_arch, os_family, os_platform, os_release, os_kernel,
-                            stats
-                        ) VALUES (?, datetime('now'), datetime('now'), ?, ?, 0, 0, '', '', '', ?, ?, ?, ?, '', '{}')
-                    """, (
-                        machine_id, guid, hostname,
-                        platform.machine(), platform.system(), platform.platform(), platform.release()
-                    ))
-                # Re-query to get the actual id (in case INSERT OR IGNORE skipped it)
-                result = cursor.execute("SELECT id FROM machine_machine LIMIT 1").fetchone()
-                if result:
-                    machine_id = result[0]
-                    print(f"  ✓ Using/Created Machine: {machine_id}")
-                else:
-                    # INSERT OR IGNORE failed - try again without IGNORE to see the error
-                    raise Exception("Failed to create Machine record - machine_machine table is empty after INSERT")
-
-            for ar_id, cmd, pwd, binary_id, iface_id, start_ts, end_ts, status in archive_results:
-                # Create Process record
-                process_id = str(uuid7())
-                cursor.execute("""
-                    INSERT INTO machine_process (
-                        id, created_at, modified_at,
-                        machine_id, binary_id, iface_id,
-                        pwd, cmd, env, timeout,
-                        pid, exit_code, stdout, stderr,
-                        started_at, ended_at, url, status, retry_at
-                    ) VALUES (?, datetime('now'), datetime('now'), ?, ?, ?, ?, ?, '{}', 120, NULL, NULL, '', '', ?, ?, '', ?, NULL)
-                """, (process_id, machine_id, binary_id, iface_id, pwd or '', cmd or '[]', start_ts, end_ts, status or 'queued'))
-
-                # Update ArchiveResult to point to new Process
-                cursor.execute("UPDATE core_archiveresult SET process_id = ? WHERE id = ?", (process_id, ar_id))
-
-            print(f"  ✓ Created {len(archive_results)} Process records from ArchiveResult data")
-
-            # Now rebuild table without the extra columns
-            print("  Rebuilding core_archiveresult table...")
-            cursor.execute("""
-                CREATE TABLE core_archiveresult_final (
-                    id INTEGER PRIMARY KEY AUTOINCREMENT,
-                    uuid TEXT,
-                    created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
-                    modified_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
-
-                    snapshot_id TEXT NOT NULL,
-                    plugin VARCHAR(32) NOT NULL DEFAULT '',
-                    hook_name VARCHAR(255) NOT NULL DEFAULT '',
-
-                    start_ts DATETIME,
-                    end_ts DATETIME,
-                    status VARCHAR(15) NOT NULL DEFAULT 'queued',
-                    retry_at DATETIME,
-
-                    output_files TEXT NOT NULL DEFAULT '{}',
-                    output_json TEXT,
-                    output_str TEXT NOT NULL DEFAULT '',
-                    output_size INTEGER NOT NULL DEFAULT 0,
-                    output_mimetypes VARCHAR(512) NOT NULL DEFAULT '',
-
-                    config TEXT,
-                    notes TEXT NOT NULL DEFAULT '',
-                    num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
-                    num_uses_failed INTEGER NOT NULL DEFAULT 0,
-
-                    process_id TEXT NOT NULL,
-
-                    FOREIGN KEY (snapshot_id) REFERENCES core_snapshot(id) ON DELETE CASCADE,
-                    FOREIGN KEY (process_id) REFERENCES machine_process(id) ON DELETE RESTRICT
-                )
-            """)
-
-            # Copy data (cmd, pwd, etc. are now in Process records)
-            cursor.execute("""
-                INSERT INTO core_archiveresult_final SELECT
-                    id, uuid, created_at, modified_at,
-                    snapshot_id, plugin, hook_name,
-                    start_ts, end_ts, status, retry_at,
-                    output_files, output_json, output_str, output_size, output_mimetypes,
-                    config, notes, num_uses_succeeded, num_uses_failed,
-                    process_id
-                FROM core_archiveresult
-            """)
-
-            # Replace table
-            cursor.execute("DROP TABLE core_archiveresult")
-            cursor.execute("ALTER TABLE core_archiveresult_final RENAME TO core_archiveresult")
-
-            # Recreate indexes
-            cursor.execute("CREATE INDEX core_archiveresult_snapshot_id_idx ON core_archiveresult(snapshot_id)")
-            cursor.execute("CREATE INDEX core_archiveresult_plugin_idx ON core_archiveresult(plugin)")
-            cursor.execute("CREATE INDEX core_archiveresult_status_idx ON core_archiveresult(status)")
-            cursor.execute("CREATE INDEX core_archiveresult_retry_at_idx ON core_archiveresult(retry_at)")
-            cursor.execute("CREATE INDEX core_archiveresult_created_at_idx ON core_archiveresult(created_at)")
-            cursor.execute("CREATE INDEX core_archiveresult_uuid_idx ON core_archiveresult(uuid)")
-
-            print("  ✓ Cleaned up core_archiveresult schema")
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('core', '0024_assign_default_crawl'),
-        ('machine', '0005_add_process_table'),
-        ('crawls', '0002_upgrade_to_0_9_0'),
-        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
-    ]
-
-    operations = [
-        migrations.SeparateDatabaseAndState(
-            database_operations=[
-                migrations.RunPython(
-                    cleanup_extra_columns,
-                    reverse_code=migrations.RunPython.noop,
-                ),
-            ],
-            state_operations=[
-                # Tell Django about all the fields that exist after raw SQL migrations
-                #  ArchiveResult model options
-                migrations.AlterModelOptions(
-                    name='archiveresult',
-                    options={'verbose_name': 'Archive Result', 'verbose_name_plural': 'Archive Results Log'},
-                ),
-
-                # Remove old fields
-                migrations.RemoveField(model_name='archiveresult', name='cmd'),
-                migrations.RemoveField(model_name='archiveresult', name='pwd'),
-                migrations.RemoveField(model_name='archiveresult', name='cmd_version'),
-                migrations.RemoveField(model_name='archiveresult', name='extractor'),
-                migrations.RemoveField(model_name='archiveresult', name='output'),
-                migrations.RemoveField(model_name='snapshot', name='added'),
-                migrations.RemoveField(model_name='snapshot', name='updated'),
-
-                # Add new ArchiveResult fields
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='plugin',
-                    field=models.CharField(blank=True, default='', max_length=32),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='hook_name',
-                    field=models.CharField(blank=True, default='', max_length=255),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='output_str',
-                    field=models.TextField(blank=True, default=''),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='output_json',
-                    field=models.JSONField(blank=True, default=dict, null=True),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='output_files',
-                    field=models.JSONField(blank=True, default=dict),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='output_size',
-                    field=models.PositiveIntegerField(default=0),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='output_mimetypes',
-                    field=models.CharField(blank=True, default='', max_length=512),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='config',
-                    field=models.JSONField(blank=True, default=dict, null=True),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='notes',
-                    field=models.TextField(blank=True, default=''),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='num_uses_succeeded',
-                    field=models.PositiveIntegerField(default=0),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='num_uses_failed',
-                    field=models.PositiveIntegerField(default=0),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='retry_at',
-                    field=models.DateTimeField(blank=True, db_index=True, default=None, null=True),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='created_at',
-                    field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='modified_at',
-                    field=models.DateTimeField(auto_now=True),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='process',
-                    field=models.OneToOneField(null=True, on_delete=django.db.models.deletion.PROTECT, related_name='archiveresult', to='machine.process'),
-                ),
-
-                # Update Snapshot model
-                migrations.AlterModelOptions(
-                    name='snapshot',
-                    options={'verbose_name': 'Snapshot', 'verbose_name_plural': 'Snapshots'},
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='created_at',
-                    field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='modified_at',
-                    field=models.DateTimeField(auto_now=True),
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='bookmarked_at',
-                    field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='downloaded_at',
-                    field=models.DateTimeField(blank=True, null=True),
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='crawl',
-                    field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='crawls.crawl'),
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='depth',
-                    field=models.PositiveSmallIntegerField(default=0),
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='parent_snapshot',
-                    field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='child_snapshots', to='core.snapshot'),
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='status',
-                    field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('sealed', 'Sealed')], db_index=True, default='queued', max_length=15),
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='retry_at',
-                    field=models.DateTimeField(blank=True, db_index=True, default=None, null=True),
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='current_step',
-                    field=models.PositiveSmallIntegerField(default=0),
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='fs_version',
-                    field=models.CharField(default='0.9.0', max_length=10),
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='config',
-                    field=models.JSONField(blank=True, default=dict),
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='notes',
-                    field=models.TextField(blank=True, default=''),
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='num_uses_succeeded',
-                    field=models.PositiveIntegerField(default=0),
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='num_uses_failed',
-                    field=models.PositiveIntegerField(default=0),
-                ),
-
-                # Update Tag model
-                migrations.AlterModelOptions(
-                    name='tag',
-                    options={'verbose_name': 'Tag', 'verbose_name_plural': 'Tags'},
-                ),
-                migrations.AddField(
-                    model_name='tag',
-                    name='created_at',
-                    field=models.DateTimeField(db_index=True, default=django.utils.timezone.now, null=True),
-                ),
-                migrations.AddField(
-                    model_name='tag',
-                    name='modified_at',
-                    field=models.DateTimeField(auto_now=True),
-                ),
-                migrations.AddField(
-                    model_name='tag',
-                    name='created_by',
-                    field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='tag_set', to=settings.AUTH_USER_MODEL),
-                ),
-
-                # Alter field types
-                migrations.AlterField(
-                    model_name='archiveresult',
-                    name='id',
-                    field=models.AutoField(primary_key=True, serialize=False, verbose_name='ID'),
-                ),
-                migrations.AlterField(
-                    model_name='archiveresult',
-                    name='uuid',
-                    field=models.UUIDField(blank=True, db_index=True, editable=False, null=True, unique=True),
-                ),
-                migrations.AlterField(
-                    model_name='archiveresult',
-                    name='end_ts',
-                    field=models.DateTimeField(blank=True, default=None, null=True),
-                ),
-                migrations.AlterField(
-                    model_name='archiveresult',
-                    name='start_ts',
-                    field=models.DateTimeField(blank=True, default=None, null=True),
-                ),
-                migrations.AlterField(
-                    model_name='archiveresult',
-                    name='status',
-                    field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('succeeded', 'Succeeded'), ('failed', 'Failed')], db_index=True, default='queued', max_length=15),
-                ),
-                migrations.AlterField(
-                    model_name='snapshot',
-                    name='id',
-                    field=models.CharField(editable=False, max_length=32, primary_key=True, serialize=False, unique=True),
-                ),
-                migrations.AlterField(
-                    model_name='snapshot',
-                    name='timestamp',
-                    field=models.CharField(db_index=True, max_length=32, unique=True),
-                ),
-                migrations.AlterField(
-                    model_name='snapshot',
-                    name='url',
-                    field=models.URLField(max_length=2048),
-                ),
-                migrations.AlterField(
-                    model_name='tag',
-                    name='slug',
-                    field=models.SlugField(editable=False, max_length=100, unique=True),
-                ),
-
-                # Create M2M model for snapshot tags
-                migrations.CreateModel(
-                    name='SnapshotTag',
-                    fields=[
-                        ('id', models.AutoField(primary_key=True, serialize=False, verbose_name='ID')),
-                        ('snapshot', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='core.snapshot')),
-                        ('tag', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='core.tag')),
-                    ],
-                    options={
-                        'db_table': 'core_snapshot_tags',
-                    },
-                ),
-                migrations.AlterUniqueTogether(
-                    name='snapshottag',
-                    unique_together={('snapshot', 'tag')},
-                ),
-
-                # Update tags field on Snapshot to use the through model
-                migrations.AlterField(
-                    model_name='snapshot',
-                    name='tags',
-                    field=models.ManyToManyField(related_name='snapshot_set', through='core.SnapshotTag', to='core.tag'),
-                ),
-
-                # Add constraints
-                migrations.AddConstraint(
-                    model_name='snapshot',
-                    constraint=models.UniqueConstraint(fields=['url', 'crawl'], name='unique_url_per_crawl'),
-                ),
-                migrations.AddConstraint(
-                    model_name='snapshot',
-                    constraint=models.UniqueConstraint(fields=['timestamp'], name='unique_timestamp'),
-                ),
-            ],
-        ),
-    ]
--- a/archivebox/core/migrations/0026_final_field_adjustments.py
+++ b/archivebox/core/migrations/0026_final_field_adjustments.py
@@ -1,76 +0,0 @@
-# Generated by hand on 2025-12-30
-# Final field adjustments to match model definitions exactly
-
-from django.db import migrations, models
-import django.db.models.deletion
-import django.utils.timezone
-from archivebox.uuid_compat import uuid7
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('core', '0025_cleanup_schema'),
-        ('crawls', '0002_upgrade_to_0_9_0'),
-    ]
-
-    operations = [
-        # Alter Snapshot fields to match model exactly
-        migrations.AlterField(
-            model_name='snapshot',
-            name='id',
-            field=models.UUIDField(default=uuid7, editable=False, primary_key=True, unique=True),
-        ),
-        migrations.AlterField(
-            model_name='snapshot',
-            name='timestamp',
-            field=models.CharField(db_index=True, editable=False, max_length=32, unique=True),
-        ),
-        migrations.AlterField(
-            model_name='snapshot',
-            name='url',
-            field=models.URLField(db_index=True, unique=False),
-        ),
-        migrations.AlterField(
-            model_name='snapshot',
-            name='downloaded_at',
-            field=models.DateTimeField(blank=True, db_index=True, default=None, editable=False, null=True),
-        ),
-        migrations.AlterField(
-            model_name='snapshot',
-            name='parent_snapshot',
-            field=models.ForeignKey(blank=True, db_index=True, help_text='Parent snapshot that discovered this URL (for recursive crawling)', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='child_snapshots', to='core.snapshot'),
-        ),
-        migrations.AlterField(
-            model_name='snapshot',
-            name='retry_at',
-            field=models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True),
-        ),
-        migrations.AlterField(
-            model_name='snapshot',
-            name='fs_version',
-            field=models.CharField(default='0.9.0', help_text='Filesystem version of this snapshot (e.g., "0.7.0", "0.8.0", "0.9.0"). Used to trigger lazy migration on save().', max_length=10),
-        ),
-        migrations.AlterField(
-            model_name='snapshot',
-            name='tags',
-            field=models.ManyToManyField(blank=True, related_name='snapshot_set', through='core.SnapshotTag', through_fields=('snapshot', 'tag'), to='core.tag'),
-        ),
-
-        # Alter SnapshotTag fields
-        migrations.AlterField(
-            model_name='snapshottag',
-            name='id',
-            field=models.AutoField(primary_key=True, serialize=False, verbose_name='ID'),
-        ),
-        migrations.AlterField(
-            model_name='snapshottag',
-            name='snapshot',
-            field=models.ForeignKey(db_column='snapshot_id', on_delete=django.db.models.deletion.CASCADE, to='core.snapshot'),
-        ),
-        migrations.AlterField(
-            model_name='snapshottag',
-            name='tag',
-            field=models.ForeignKey(db_column='tag_id', on_delete=django.db.models.deletion.CASCADE, to='core.tag'),
-        ),
-    ]
--- a/archivebox/core/migrations/0027_alter_archiveresult_hook_name_alter_archiveresult_id_and_more.py
+++ b/archivebox/core/migrations/0027_alter_archiveresult_hook_name_alter_archiveresult_id_and_more.py
@@ -1,108 +0,0 @@
-# Generated by Django 6.0 on 2025-12-31 09:04
-
-import django.db.models.deletion
-import django.utils.timezone
-import uuid
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('core', '0026_final_field_adjustments'),
-        ('crawls', '0002_upgrade_to_0_9_0'),
-        ('machine', '0001_initial'),
-    ]
-
-    operations = [
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='hook_name',
-            field=models.CharField(blank=True, db_index=True, default='', help_text='Full filename of the hook that executed (e.g., on_Snapshot__50_wget.py)', max_length=255),
-        ),
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='id',
-            field=models.AutoField(editable=False, primary_key=True, serialize=False),
-        ),
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='output_files',
-            field=models.JSONField(default=dict, help_text='Dict of {relative_path: {metadata}}'),
-        ),
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='output_json',
-            field=models.JSONField(blank=True, default=None, help_text='Structured metadata (headers, redirects, etc.)', null=True),
-        ),
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='output_mimetypes',
-            field=models.CharField(blank=True, default='', help_text='CSV of mimetypes sorted by size', max_length=512),
-        ),
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='output_size',
-            field=models.BigIntegerField(default=0, help_text='Total bytes of all output files'),
-        ),
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='output_str',
-            field=models.TextField(blank=True, default='', help_text='Human-readable output summary'),
-        ),
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='plugin',
-            field=models.CharField(db_index=True, default='', max_length=32),
-        ),
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='process',
-            field=models.OneToOneField(help_text='Process execution details for this archive result', on_delete=django.db.models.deletion.PROTECT, related_name='archiveresult', to='machine.process'),
-        ),
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='retry_at',
-            field=models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True),
-        ),
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='status',
-            field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('backoff', 'Waiting to retry'), ('succeeded', 'Succeeded'), ('failed', 'Failed'), ('skipped', 'Skipped')], db_index=True, default='queued', max_length=15),
-        ),
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='uuid',
-            field=models.UUIDField(blank=True, db_index=True, default=uuid.uuid7, null=True),
-        ),
-        migrations.AlterField(
-            model_name='snapshot',
-            name='config',
-            field=models.JSONField(default=dict),
-        ),
-        migrations.AlterField(
-            model_name='snapshot',
-            name='crawl',
-            field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to='crawls.crawl'),
-        ),
-        migrations.AlterField(
-            model_name='snapshot',
-            name='current_step',
-            field=models.PositiveSmallIntegerField(db_index=True, default=0, help_text='Current hook step being executed (0-9). Used for sequential hook execution.'),
-        ),
-        migrations.AlterField(
-            model_name='snapshot',
-            name='depth',
-            field=models.PositiveSmallIntegerField(db_index=True, default=0),
-        ),
-        migrations.AlterField(
-            model_name='snapshot',
-            name='id',
-            field=models.UUIDField(default=uuid.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
-        ),
-        migrations.AlterField(
-            model_name='snapshottag',
-            name='id',
-            field=models.AutoField(primary_key=True, serialize=False),
-        ),
-    ]
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -91,9 +91,9 @@ class Tag(ModelWithSerializers):
    def api_url(self) -> str:
        return reverse_lazy('api-1:get_tag', args=[self.id])

-    def to_jsonl(self) -> dict:
+    def to_json(self) -> dict:
        """
-        Convert Tag model instance to a JSONL record.
+        Convert Tag model instance to a JSON-serializable dict.
        """
        from archivebox.config import VERSION
        return {
@@ -105,12 +105,12 @@ class Tag(ModelWithSerializers):
        }

    @staticmethod
-    def from_jsonl(record: Dict[str, Any], overrides: Dict[str, Any] = None):
+    def from_json(record: Dict[str, Any], overrides: Dict[str, Any] = None):
        """
-        Create/update Tag from JSONL record.
+        Create/update Tag from JSON dict.

        Args:
-            record: JSONL record with 'name' field
+            record: JSON dict with 'name' field
            overrides: Optional dict with 'snapshot' to auto-attach tag

        Returns:
@@ -982,8 +982,8 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
        processes_seen = set()

        with open(index_path, 'w') as f:
-            # Write Snapshot record first (to_jsonl includes crawl_id, fs_version)
-            f.write(json.dumps(self.to_jsonl()) + '\n')
+            # Write Snapshot record first (to_json includes crawl_id, fs_version)
+            f.write(json.dumps(self.to_json()) + '\n')

            # Write ArchiveResult records with their associated Binary and Process
            # Use select_related to optimize queries
@@ -991,15 +991,15 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
                # Write Binary record if not already written
                if ar.process and ar.process.binary and ar.process.binary_id not in binaries_seen:
                    binaries_seen.add(ar.process.binary_id)
-                    f.write(json.dumps(ar.process.binary.to_jsonl()) + '\n')
+                    f.write(json.dumps(ar.process.binary.to_json()) + '\n')

                # Write Process record if not already written
                if ar.process and ar.process_id not in processes_seen:
                    processes_seen.add(ar.process_id)
-                    f.write(json.dumps(ar.process.to_jsonl()) + '\n')
+                    f.write(json.dumps(ar.process.to_json()) + '\n')

                # Write ArchiveResult record
-                f.write(json.dumps(ar.to_jsonl()) + '\n')
+                f.write(json.dumps(ar.to_json()) + '\n')

    def read_index_jsonl(self) -> dict:
        """
@@ -1422,9 +1422,9 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea

        return False

-    def to_jsonl(self) -> dict:
+    def to_json(self) -> dict:
        """
-        Convert Snapshot model instance to a JSONL record.
+        Convert Snapshot model instance to a JSON-serializable dict.
        Includes all fields needed to fully reconstruct/identify this snapshot.
        """
        from archivebox.config import VERSION
@@ -1445,9 +1445,9 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
        }

    @staticmethod
-    def from_jsonl(record: Dict[str, Any], overrides: Dict[str, Any] = None, queue_for_extraction: bool = True):
+    def from_json(record: Dict[str, Any], overrides: Dict[str, Any] = None, queue_for_extraction: bool = True):
        """
-        Create/update Snapshot from JSONL record or dict.
+        Create/update Snapshot from JSON dict.

        Unified method that handles:
        - ID-based patching: {"id": "...", "title": "new title"}
@@ -2106,8 +2106,8 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
            result['canonical'] = self.canonical_outputs()
        return result

-    def to_json(self, indent: int = 4) -> str:
-        """Convert to JSON string"""
+    def to_json_str(self, indent: int = 4) -> str:
+        """Convert to JSON string (legacy method, use to_json() for dict)"""
        return to_json(self.to_dict(extended=True), indent=indent)

    def to_csv(self, cols: Optional[List[str]] = None, separator: str = ',', ljust: int = 0) -> str:
@@ -2284,14 +2284,14 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
    hook_name = models.CharField(max_length=255, blank=True, default='', db_index=True, help_text='Full filename of the hook that executed (e.g., on_Snapshot__50_wget.py)')

    # Process FK - tracks execution details (cmd, pwd, stdout, stderr, etc.)
-    # Required - every ArchiveResult must have a Process
-    process = models.OneToOneField(
-        'machine.Process',
-        on_delete=models.PROTECT,
-        null=False,  # Required after migration 4
-        related_name='archiveresult',
-        help_text='Process execution details for this archive result'
-    )
+    # Added POST-v0.9.0, will be added in a separate migration
+    # process = models.OneToOneField(
+    #     'machine.Process',
+    #     on_delete=models.PROTECT,
+    #     null=False,
+    #     related_name='archiveresult',
+    #     help_text='Process execution details for this archive result'
+    # )

    # New output fields (replacing old 'output' field)
    output_str = models.TextField(blank=True, default='', help_text='Human-readable output summary')
@@ -2326,9 +2326,9 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
        """Convenience property to access the user who created this archive result via its snapshot's crawl."""
        return self.snapshot.crawl.created_by

-    def to_jsonl(self) -> dict:
+    def to_json(self) -> dict:
        """
-        Convert ArchiveResult model instance to a JSONL record.
+        Convert ArchiveResult model instance to a JSON-serializable dict.
        """
        from archivebox.config import VERSION
        record = {
@@ -2360,6 +2360,50 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
            record['process_id'] = str(self.process_id)
        return record

+    @staticmethod
+    def from_json(record: Dict[str, Any], overrides: Dict[str, Any] = None):
+        """
+        Create/update ArchiveResult from JSON dict.
+
+        Args:
+            record: JSON dict with 'snapshot_id', 'plugin', etc.
+            overrides: Optional dict of field overrides
+
+        Returns:
+            ArchiveResult instance or None
+        """
+        snapshot_id = record.get('snapshot_id')
+        plugin = record.get('plugin')
+
+        if not snapshot_id or not plugin:
+            return None
+
+        # Try to get existing by ID first
+        result_id = record.get('id')
+        if result_id:
+            try:
+                return ArchiveResult.objects.get(id=result_id)
+            except ArchiveResult.DoesNotExist:
+                pass
+
+        # Get or create by snapshot_id + plugin
+        try:
+            from archivebox.core.models import Snapshot
+            snapshot = Snapshot.objects.get(id=snapshot_id)
+
+            result, _ = ArchiveResult.objects.get_or_create(
+                snapshot=snapshot,
+                plugin=plugin,
+                defaults={
+                    'hook_name': record.get('hook_name', ''),
+                    'status': record.get('status', 'queued'),
+                    'output_str': record.get('output_str', ''),
+                }
+            )
+            return result
+        except Snapshot.DoesNotExist:
+            return None
+
    def save(self, *args, **kwargs):
        is_new = self._state.adding