cleanup migrations, json, jsonl

2026-04-06 07:47:53 +10:00 · 2025-12-31 15:36:13 -08:00
parent 0930911a15
commit a04e4a7345
21 changed files with 993 additions and 1418 deletions
--- a/archivebox/cli/archivebox_extract.py
+++ b/archivebox/cli/archivebox_extract.py
@@ -207,7 +207,7 @@ def run_plugins(
                    }.get(result.status, 'dim')
                    rprint(f'  [{status_color}]{result.status}[/{status_color}] {result.plugin} → {result.output_str or ""}', file=sys.stderr)
                else:
-                    write_record(result.to_jsonl())
+                    write_record(result.to_json())
        except Snapshot.DoesNotExist:
            continue

--- a/archivebox/core/admin_archiveresults.py
+++ b/archivebox/core/admin_archiveresults.py
@@ -252,8 +252,8 @@ class ArchiveResultInline(admin.TabularInline):
 class ArchiveResultAdmin(BaseModelAdmin):
    list_display = ('id', 'created_at', 'snapshot_info', 'tags_str', 'status', 'plugin_with_icon', 'cmd_str', 'output_str')
    sort_fields = ('id', 'created_at', 'plugin', 'status')
-    readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'output_summary', 'plugin_with_icon', 'process')
-    search_fields = ('id', 'snapshot__url', 'plugin', 'output_str', 'cmd_version', 'cmd', 'snapshot__timestamp', 'process__cmd')
+    readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'output_summary', 'plugin_with_icon')
+    search_fields = ('id', 'snapshot__url', 'plugin', 'output_str', 'cmd_version', 'cmd', 'snapshot__timestamp')
    autocomplete_fields = ['snapshot']

    fieldsets = (
@@ -270,7 +270,7 @@ class ArchiveResultAdmin(BaseModelAdmin):
            'classes': ('card',),
        }),
        ('Command', {
-            'fields': ('process', 'cmd', 'cmd_str', 'cmd_version', 'pwd'),
+            'fields': ('cmd', 'cmd_str', 'cmd_version', 'pwd'),
            'classes': ('card',),
        }),
        ('Output', {
--- a/archivebox/core/migrations/0023_upgrade_to_0_9_0.py
+++ b/archivebox/core/migrations/0023_upgrade_to_0_9_0.py
@@ -1,299 +1,250 @@
 # Generated by hand on 2025-12-29
-# Upgrades core app from v0.7.2 (migration 0022) or v0.8.6rc0 (migration 0076) to v0.9.0 using raw SQL
+# Upgrades core app from v0.7.2/v0.8.6rc0 (migration 0022) to v0.9.0 using raw SQL
+# Handles both fresh installs and upgrades from v0.7.2/v0.8.6rc0

-from django.db import migrations
+from django.db import migrations, models, connection


-def upgrade_from_v072_or_v086(apps, schema_editor):
-    """
-    Upgrade core tables from either v0.7.2 or v0.8.6rc0 to v0.9.0.
-    Handles differences in schema between versions.
-    """
-    with schema_editor.connection.cursor() as cursor:
-        # Check if uuid column exists (v0.7.2 has it, v0.8.6rc0 doesn't)
+def get_table_columns(table_name):
+    """Get list of column names for a table."""
+    cursor = connection.cursor()
+    cursor.execute(f"PRAGMA table_info({table_name})")
+    return {row[1] for row in cursor.fetchall()}
+
+
+def upgrade_core_tables(apps, schema_editor):
+    """Upgrade core tables from v0.7.2 or v0.8.6rc0 to v0.9.0."""
+    cursor = connection.cursor()
+
+    # Check if core_archiveresult table exists
+    cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='core_archiveresult'")
+    if not cursor.fetchone():
+        # Fresh install - no migration needed, tables will be created by later migrations
+        return
+
+    # Detect which version we're migrating from
+    archiveresult_cols = get_table_columns('core_archiveresult')
+    has_uuid = 'uuid' in archiveresult_cols
+    has_abid = 'abid' in archiveresult_cols
+
+    # ============================================================================
+    # PART 1: Upgrade core_archiveresult table
+    # ============================================================================
+    cursor.execute("""
+        CREATE TABLE IF NOT EXISTS core_archiveresult_new (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            uuid TEXT,
+            created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+            modified_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+
+            snapshot_id TEXT NOT NULL,
+            plugin VARCHAR(32) NOT NULL DEFAULT '',
+            hook_name VARCHAR(255) NOT NULL DEFAULT '',
+
+            cmd TEXT,
+            pwd VARCHAR(256),
+            cmd_version VARCHAR(128),
+
+            start_ts DATETIME,
+            end_ts DATETIME,
+            status VARCHAR(15) NOT NULL DEFAULT 'queued',
+            retry_at DATETIME,
+
+            output_files TEXT NOT NULL DEFAULT '{}',
+            output_json TEXT,
+            output_str TEXT NOT NULL DEFAULT '',
+            output_size INTEGER NOT NULL DEFAULT 0,
+            output_mimetypes VARCHAR(512) NOT NULL DEFAULT '',
+
+            config TEXT,
+            notes TEXT NOT NULL DEFAULT '',
+            num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
+            num_uses_failed INTEGER NOT NULL DEFAULT 0,
+
+            FOREIGN KEY (snapshot_id) REFERENCES core_snapshot(id) ON DELETE CASCADE
+        );
+    """)
+
+    if has_uuid and not has_abid:
+        # Migrating from v0.7.2 (has uuid, minimal fields)
+        print('Migrating ArchiveResult from v0.7.2 schema...')
        cursor.execute("""
-            SELECT COUNT(*) FROM pragma_table_info('core_archiveresult') WHERE name='uuid'
-        """)
-        has_uuid = cursor.fetchone()[0] > 0
-
-        # Check if id is INTEGER (v0.7.2) or TEXT/char (v0.8.6rc0)
-        cursor.execute("""
-            SELECT type FROM pragma_table_info('core_archiveresult') WHERE name='id'
-        """)
-        id_type = cursor.fetchone()[0] if cursor.rowcount else 'INTEGER'
-        is_v072 = 'INT' in id_type.upper()
-
-        # ============================================================================
-        # PART 1: Upgrade core_archiveresult table
-        # ============================================================================
-
-        # Create new table with v0.9.0 schema
-        cursor.execute("""
-            CREATE TABLE IF NOT EXISTS core_archiveresult_new (
-                id INTEGER PRIMARY KEY AUTOINCREMENT,
-                uuid TEXT,
-                created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
-                modified_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
-
-                snapshot_id TEXT NOT NULL,
-                plugin VARCHAR(32) NOT NULL DEFAULT '',
-                hook_name VARCHAR(255) NOT NULL DEFAULT '',
-
-                cmd TEXT,
-                pwd VARCHAR(256),
-                cmd_version VARCHAR(128),
-
-                start_ts DATETIME,
-                end_ts DATETIME,
-                status VARCHAR(15) NOT NULL DEFAULT 'queued',
-                retry_at DATETIME,
-
-                output_files TEXT NOT NULL DEFAULT '{}',
-                output_json TEXT,
-                output_str TEXT NOT NULL DEFAULT '',
-                output_size INTEGER NOT NULL DEFAULT 0,
-                output_mimetypes VARCHAR(512) NOT NULL DEFAULT '',
-
-                config TEXT,
-                notes TEXT NOT NULL DEFAULT '',
-                num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
-                num_uses_failed INTEGER NOT NULL DEFAULT 0,
-
-                binary_id TEXT,
-                iface_id TEXT,
-                process_id TEXT,
-
-                FOREIGN KEY (snapshot_id) REFERENCES core_snapshot(id) ON DELETE CASCADE,
-                FOREIGN KEY (binary_id) REFERENCES machine_binary(id) ON DELETE SET NULL,
-                FOREIGN KEY (iface_id) REFERENCES machine_networkinterface(id) ON DELETE SET NULL,
-                FOREIGN KEY (process_id) REFERENCES machine_process(id) ON DELETE RESTRICT
+            INSERT OR IGNORE INTO core_archiveresult_new (
+                id, uuid, created_at, modified_at, snapshot_id, plugin,
+                cmd, pwd, cmd_version, start_ts, end_ts, status, output_str
            )
+            SELECT
+                id, uuid,
+                COALESCE(start_ts, CURRENT_TIMESTAMP) as created_at,
+                COALESCE(end_ts, start_ts, CURRENT_TIMESTAMP) as modified_at,
+                snapshot_id,
+                COALESCE(extractor, '') as plugin,
+                cmd, pwd, cmd_version,
+                start_ts, end_ts, status,
+                COALESCE(output, '') as output_str
+            FROM core_archiveresult;
        """)
-
-        # Copy data based on source version
-        if is_v072:
-            # Coming from v0.7.2: has INTEGER id, has uuid column, has extractor
-            print("  Migrating from v0.7.2 schema...")
-            cursor.execute("""
-                INSERT OR IGNORE INTO core_archiveresult_new (
-                    uuid, created_at, modified_at, snapshot_id, plugin,
-                    cmd, pwd, cmd_version, start_ts, end_ts, status, output_str
-                )
-                SELECT
-                    uuid,
-                    COALESCE(start_ts, CURRENT_TIMESTAMP) as created_at,
-                    COALESCE(end_ts, start_ts, CURRENT_TIMESTAMP) as modified_at,
-                    snapshot_id,
-                    COALESCE(extractor, '') as plugin,
-                    cmd, pwd, cmd_version,
-                    start_ts, end_ts, status,
-                    COALESCE(output, '') as output_str
-                FROM core_archiveresult
-            """)
-        else:
-            # Coming from v0.8.6rc0: has TEXT id, no uuid column, has abid
-            print("  Migrating from v0.8.6rc0 schema...")
-            cursor.execute("""
-                INSERT OR IGNORE INTO core_archiveresult_new (
-                    uuid, created_at, modified_at, snapshot_id, plugin,
-                    cmd, pwd, cmd_version, start_ts, end_ts, status, retry_at, output_str
-                )
-                SELECT
-                    id as uuid,
-                    created_at,
-                    modified_at,
-                    snapshot_id,
-                    COALESCE(extractor, '') as plugin,
-                    cmd, pwd, cmd_version,
-                    start_ts, end_ts, status, retry_at,
-                    COALESCE(output, '') as output_str
-                FROM core_archiveresult
-            """)
-
-        # Replace old table
-        cursor.execute("DROP TABLE IF EXISTS core_archiveresult")
-        cursor.execute("ALTER TABLE core_archiveresult_new RENAME TO core_archiveresult")
-
-        # Create indexes
-        cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_snapshot_id_idx ON core_archiveresult(snapshot_id)")
-        cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_plugin_idx ON core_archiveresult(plugin)")
-        cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_status_idx ON core_archiveresult(status)")
-        cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_retry_at_idx ON core_archiveresult(retry_at)")
-        cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_created_at_idx ON core_archiveresult(created_at)")
-        cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_uuid_idx ON core_archiveresult(uuid)")
-
-        # ============================================================================
-        # PART 2: Upgrade core_snapshot table
-        # ============================================================================
-
-        # Check snapshot schema version
+    elif has_abid and not has_uuid:
+        # Migrating from v0.8.6rc0 (has abid, full fields)
+        print('Migrating ArchiveResult from v0.8.6rc0 schema...')
        cursor.execute("""
-            SELECT COUNT(*) FROM pragma_table_info('core_snapshot') WHERE name='crawl_id'
-        """)
-        has_crawl_id = cursor.fetchone()[0] > 0
-
-        # Create new table
-        cursor.execute("""
-            CREATE TABLE IF NOT EXISTS core_snapshot_new (
-                id TEXT PRIMARY KEY NOT NULL,
-                created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
-                modified_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
-                bookmarked_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
-                downloaded_at DATETIME,
-
-                url TEXT NOT NULL,
-                timestamp TEXT NOT NULL,
-                title TEXT,
-
-                crawl_id TEXT,
-                depth INTEGER NOT NULL DEFAULT 0,
-                parent_snapshot_id TEXT,
-
-                status VARCHAR(15) NOT NULL DEFAULT 'queued',
-                retry_at DATETIME,
-                current_step INTEGER NOT NULL DEFAULT 0,
-
-                fs_version VARCHAR(10) NOT NULL DEFAULT '0.9.0',
-                config TEXT,
-                notes TEXT NOT NULL DEFAULT '',
-                num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
-                num_uses_failed INTEGER NOT NULL DEFAULT 0
-
-                -- Note: crawl_id foreign key will be added in 0024 after assigning crawl_ids
-                -- FOREIGN KEY (crawl_id) REFERENCES crawls_crawl(id) ON DELETE CASCADE,
-                -- FOREIGN KEY (parent_snapshot_id) REFERENCES core_snapshot(id) ON DELETE SET NULL
+            INSERT OR IGNORE INTO core_archiveresult_new (
+                id, uuid, created_at, modified_at, snapshot_id, plugin,
+                cmd, pwd, cmd_version, start_ts, end_ts, status, retry_at, output_str
            )
+            SELECT
+                id, abid as uuid,
+                created_at, modified_at,
+                snapshot_id,
+                COALESCE(extractor, '') as plugin,
+                cmd, pwd, cmd_version,
+                start_ts, end_ts, status, retry_at,
+                COALESCE(output, '') as output_str
+            FROM core_archiveresult;
        """)
+    else:
+        print(f'Warning: Unexpected schema - has_uuid={has_uuid}, has_abid={has_abid}')

-        # Copy snapshot data
-        if has_crawl_id:
-            # v0.8.6rc0 schema - already has created_at, modified_at, bookmarked_at
+    cursor.execute("DROP TABLE IF EXISTS core_archiveresult;")
+    cursor.execute("ALTER TABLE core_archiveresult_new RENAME TO core_archiveresult;")
+
+    # Create indexes
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_snapshot_id_idx ON core_archiveresult(snapshot_id);")
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_plugin_idx ON core_archiveresult(plugin);")
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_status_idx ON core_archiveresult(status);")
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_retry_at_idx ON core_archiveresult(retry_at);")
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_created_at_idx ON core_archiveresult(created_at);")
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_archiveresult_uuid_idx ON core_archiveresult(uuid);")
+
+    # ============================================================================
+    # PART 2: Upgrade core_snapshot table
+    # ============================================================================
+    cursor.execute("""
+        CREATE TABLE IF NOT EXISTS core_snapshot_new (
+            id TEXT PRIMARY KEY NOT NULL,
+            created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+            modified_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+
+            url TEXT NOT NULL,
+            timestamp VARCHAR(32) NOT NULL UNIQUE,
+            bookmarked_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+
+            crawl_id TEXT,
+            parent_snapshot_id TEXT,
+
+            title VARCHAR(512),
+            downloaded_at DATETIME,
+            depth INTEGER NOT NULL DEFAULT 0,
+            fs_version VARCHAR(10) NOT NULL DEFAULT '0.9.0',
+
+            config TEXT NOT NULL DEFAULT '{}',
+            notes TEXT NOT NULL DEFAULT '',
+            num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
+            num_uses_failed INTEGER NOT NULL DEFAULT 0,
+
+            status VARCHAR(15) NOT NULL DEFAULT 'queued',
+            retry_at DATETIME,
+            current_step INTEGER NOT NULL DEFAULT 0,
+
+            FOREIGN KEY (crawl_id) REFERENCES crawls_crawl(id) ON DELETE CASCADE,
+            FOREIGN KEY (parent_snapshot_id) REFERENCES core_snapshot(id) ON DELETE SET NULL
+        );
+    """)
+
+    # Check if core_snapshot exists (it should)
+    cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='core_snapshot'")
+    if cursor.fetchone():
+        # Detect which version we're migrating from
+        snapshot_cols = get_table_columns('core_snapshot')
+        has_added = 'added' in snapshot_cols
+        has_bookmarked_at = 'bookmarked_at' in snapshot_cols
+
+        if has_added and not has_bookmarked_at:
+            # Migrating from v0.7.2 (has added/updated, no bookmarked_at/created_at/modified_at)
+            print('Migrating Snapshot from v0.7.2 schema...')
            cursor.execute("""
                INSERT OR IGNORE INTO core_snapshot_new (
-                    id, created_at, modified_at, bookmarked_at, downloaded_at, url, timestamp,
-                    crawl_id, status, retry_at
+                    id, url, timestamp, title, bookmarked_at, created_at, modified_at
                )
                SELECT
-                    id,
-                    created_at,
-                    modified_at,
-                    bookmarked_at,
-                    downloaded_at,
-                    url, timestamp,
-                    NULLIF(crawl_id, ''),
-                    COALESCE(status, 'queued'),
-                    retry_at
-                FROM core_snapshot
+                    id, url, timestamp, title,
+                    COALESCE(added, CURRENT_TIMESTAMP) as bookmarked_at,
+                    COALESCE(added, CURRENT_TIMESTAMP) as created_at,
+                    COALESCE(updated, added, CURRENT_TIMESTAMP) as modified_at
+                FROM core_snapshot;
+            """)
+        elif has_bookmarked_at and not has_added:
+            # Migrating from v0.8.6rc0 (already has bookmarked_at/created_at/modified_at)
+            print('Migrating Snapshot from v0.8.6rc0 schema...')
+            # Check what fields exist
+            has_status = 'status' in snapshot_cols
+            has_retry_at = 'retry_at' in snapshot_cols
+            has_crawl_id = 'crawl_id' in snapshot_cols
+
+            # Build column list based on what exists
+            cols = ['id', 'url', 'timestamp', 'title', 'bookmarked_at', 'created_at', 'modified_at', 'downloaded_at']
+            if has_crawl_id:
+                cols.append('crawl_id')
+            if has_status:
+                cols.append('status')
+            if has_retry_at:
+                cols.append('retry_at')
+
+            cursor.execute(f"""
+                INSERT OR IGNORE INTO core_snapshot_new ({', '.join(cols)})
+                SELECT {', '.join(cols)}
+                FROM core_snapshot;
            """)
        else:
-            # v0.7.2 schema - will get crawl_id assigned by later migration (0024)
-            cursor.execute("""
-                INSERT OR IGNORE INTO core_snapshot_new (
-                    id, created_at, modified_at, bookmarked_at, url, timestamp, crawl_id
-                )
-                SELECT
-                    id,
-                    COALESCE(added, CURRENT_TIMESTAMP),
-                    COALESCE(updated, added, CURRENT_TIMESTAMP),
-                    COALESCE(added, CURRENT_TIMESTAMP),
-                    url, timestamp,
-                    NULL as crawl_id
-                FROM core_snapshot
-            """)
+            print(f'Warning: Unexpected Snapshot schema - has_added={has_added}, has_bookmarked_at={has_bookmarked_at}')

-        # Replace old table
-        cursor.execute("DROP TABLE IF EXISTS core_snapshot")
-        cursor.execute("ALTER TABLE core_snapshot_new RENAME TO core_snapshot")
+    cursor.execute("DROP TABLE IF EXISTS core_snapshot;")
+    cursor.execute("ALTER TABLE core_snapshot_new RENAME TO core_snapshot;")

-        # Create indexes
-        cursor.execute("CREATE INDEX IF NOT EXISTS core_snapshot_crawl_id_idx ON core_snapshot(crawl_id)")
-        cursor.execute("CREATE INDEX IF NOT EXISTS core_snapshot_url_idx ON core_snapshot(url)")
-        cursor.execute("CREATE INDEX IF NOT EXISTS core_snapshot_status_idx ON core_snapshot(status)")
-        cursor.execute("CREATE INDEX IF NOT EXISTS core_snapshot_retry_at_idx ON core_snapshot(retry_at)")
-        cursor.execute("CREATE INDEX IF NOT EXISTS core_snapshot_created_at_idx ON core_snapshot(created_at)")
-        cursor.execute("CREATE INDEX IF NOT EXISTS core_snapshot_bookmarked_at_idx ON core_snapshot(bookmarked_at)")
+    # Create indexes
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_snapshot_url_idx ON core_snapshot(url);")
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_snapshot_timestamp_idx ON core_snapshot(timestamp);")
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_snapshot_bookmarked_at_idx ON core_snapshot(bookmarked_at);")
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_snapshot_crawl_id_idx ON core_snapshot(crawl_id);")
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_snapshot_status_idx ON core_snapshot(status);")
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_snapshot_retry_at_idx ON core_snapshot(retry_at);")
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_snapshot_created_at_idx ON core_snapshot(created_at);")
+    cursor.execute("CREATE UNIQUE INDEX IF NOT EXISTS core_snapshot_url_crawl_unique ON core_snapshot(url, crawl_id);")

-        # ============================================================================
-        # PART 3: Upgrade core_tag table
-        # ============================================================================
+    # ============================================================================
+    # PART 3: Upgrade core_tag table
+    # ============================================================================
+    cursor.execute("""
+        CREATE TABLE IF NOT EXISTS core_tag_new (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+            modified_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,

-        # Check if tag id is INTEGER (v0.7.2) or TEXT (v0.8.6rc0)
+            name VARCHAR(100) NOT NULL UNIQUE,
+            slug VARCHAR(100) NOT NULL UNIQUE,
+
+            created_by_id INTEGER,
+
+            FOREIGN KEY (created_by_id) REFERENCES auth_user(id) ON DELETE CASCADE
+        );
+    """)
+
+    cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='core_tag'")
+    if cursor.fetchone():
        cursor.execute("""
-            SELECT type FROM pragma_table_info('core_tag') WHERE name='id'
-        """)
-        tag_id_type = cursor.fetchone()[0] if cursor.rowcount else 'INTEGER'
-        tag_id_is_int = 'INT' in tag_id_type.upper()
-
-        cursor.execute("""
-            CREATE TABLE IF NOT EXISTS core_tag_new (
-                id INTEGER PRIMARY KEY AUTOINCREMENT,
-                created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
-                modified_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
-                created_by_id INTEGER,
-
-                name VARCHAR(100) NOT NULL UNIQUE,
-                slug VARCHAR(100) NOT NULL UNIQUE,
-
-                FOREIGN KEY (created_by_id) REFERENCES auth_user(id) ON DELETE SET NULL
-            )
+            INSERT OR IGNORE INTO core_tag_new (id, name, slug)
+            SELECT id, name, slug
+            FROM core_tag;
        """)

-        if tag_id_is_int:
-            # v0.7.2: Direct copy (INTEGER to INTEGER)
-            cursor.execute("""
-                INSERT OR IGNORE INTO core_tag_new (id, name, slug)
-                SELECT id, name, slug FROM core_tag
-            """)
-        else:
-            # v0.8.6rc0: Need to remap TEXT ids to new INTEGER ids
-            cursor.execute("SELECT id, name, slug FROM core_tag")
-            old_tags = cursor.fetchall()
-            tag_id_mapping = {}  # old_text_id -> new_int_id
+    cursor.execute("DROP TABLE IF EXISTS core_tag;")
+    cursor.execute("ALTER TABLE core_tag_new RENAME TO core_tag;")

-            for old_id, name, slug in old_tags:
-                cursor.execute("""
-                    INSERT OR IGNORE INTO core_tag_new (name, slug)
-                    VALUES (?, ?)
-                """, [name, slug])
-                cursor.execute("SELECT id FROM core_tag_new WHERE slug = ?", [slug])
-                new_id = cursor.fetchone()[0]
-                tag_id_mapping[old_id] = new_id
+    # Create indexes
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_tag_created_at_idx ON core_tag(created_at);")
+    cursor.execute("CREATE INDEX IF NOT EXISTS core_tag_created_by_id_idx ON core_tag(created_by_id);")

-        cursor.execute("DROP TABLE IF EXISTS core_tag")
-        cursor.execute("ALTER TABLE core_tag_new RENAME TO core_tag")
-
-        # Recreate M2M table
-        cursor.execute("""
-            CREATE TABLE IF NOT EXISTS core_snapshot_tags_new (
-                id INTEGER PRIMARY KEY AUTOINCREMENT,
-                snapshot_id TEXT NOT NULL,
-                tag_id INTEGER NOT NULL,
-                FOREIGN KEY (snapshot_id) REFERENCES core_snapshot(id) ON DELETE CASCADE,
-                FOREIGN KEY (tag_id) REFERENCES core_tag(id) ON DELETE CASCADE,
-                UNIQUE(snapshot_id, tag_id)
-            )
-        """)
-
-        if tag_id_is_int:
-            # Direct copy for v0.7.2
-            cursor.execute("""
-                INSERT OR IGNORE INTO core_snapshot_tags_new (snapshot_id, tag_id)
-                SELECT snapshot_id, tag_id FROM core_snapshot_tags
-            """)
-        else:
-            # v0.8.6rc0: Use mapping to convert old TEXT ids to new INTEGER ids
-            cursor.execute("SELECT snapshot_id, tag_id FROM core_snapshot_tags")
-            m2m_entries = cursor.fetchall()
-            for snapshot_id, old_tag_id in m2m_entries:
-                new_tag_id = tag_id_mapping.get(old_tag_id)
-                if new_tag_id:
-                    cursor.execute("""
-                        INSERT OR IGNORE INTO core_snapshot_tags_new (snapshot_id, tag_id)
-                        VALUES (?, ?)
-                    """, [snapshot_id, new_tag_id])
-
-        cursor.execute("DROP TABLE IF EXISTS core_snapshot_tags")
-        cursor.execute("ALTER TABLE core_snapshot_tags_new RENAME TO core_snapshot_tags")
+    print('✓ Core tables upgraded to v0.9.0')


 class Migration(migrations.Migration):
@@ -301,10 +252,49 @@ class Migration(migrations.Migration):
    dependencies = [
        ('core', '0022_auto_20231023_2008'),
        ('crawls', '0001_initial'),
-        ('machine', '0001_initial'),
        ('auth', '0012_alter_user_first_name_max_length'),
    ]

    operations = [
-        migrations.RunPython(upgrade_from_v072_or_v086, reverse_code=migrations.RunPython.noop),
+        migrations.SeparateDatabaseAndState(
+            database_operations=[
+                migrations.RunPython(
+                    upgrade_core_tables,
+                    reverse_code=migrations.RunPython.noop,
+                ),
+            ],
+            state_operations=[
+                # Remove old ArchiveResult fields
+                migrations.RemoveField(model_name='archiveresult', name='extractor'),
+                migrations.RemoveField(model_name='archiveresult', name='output'),
+                # Remove old Snapshot fields
+                migrations.RemoveField(model_name='snapshot', name='added'),
+                migrations.RemoveField(model_name='snapshot', name='updated'),
+                # SnapshotTag table already exists from v0.7.2, just declare it in state
+                migrations.CreateModel(
+                    name='SnapshotTag',
+                    fields=[
+                        ('id', models.AutoField(primary_key=True, serialize=False)),
+                        ('snapshot', models.ForeignKey(to='core.Snapshot', db_column='snapshot_id', on_delete=models.CASCADE)),
+                        ('tag', models.ForeignKey(to='core.Tag', db_column='tag_id', on_delete=models.CASCADE)),
+                    ],
+                    options={
+                        'db_table': 'core_snapshot_tags',
+                        'unique_together': {('snapshot', 'tag')},
+                    },
+                ),
+                # Declare that Snapshot.tags M2M already uses through=SnapshotTag (from v0.7.2)
+                migrations.AlterField(
+                    model_name='snapshot',
+                    name='tags',
+                    field=models.ManyToManyField(
+                        'Tag',
+                        blank=True,
+                        related_name='snapshot_set',
+                        through='SnapshotTag',
+                        through_fields=('snapshot', 'tag'),
+                    ),
+                ),
+            ],
+        ),
    ]
--- a/archivebox/core/migrations/0024_assign_default_crawl.py
+++ b/archivebox/core/migrations/0024_assign_default_crawl.py
@@ -1,7 +1,7 @@
 # Generated by hand on 2025-12-29
 # Creates a default crawl for v0.7.2 migrated snapshots and makes crawl_id NOT NULL

-from django.db import migrations
+from django.db import migrations, models
 import uuid


@@ -56,8 +56,7 @@ class Migration(migrations.Migration):

    dependencies = [
        ('core', '0023_upgrade_to_0_9_0'),
-        ('crawls', '0002_upgrade_to_0_9_0'),
-        ('machine', '0001_initial'),
+        ('crawls', '0001_initial'),
        ('auth', '0012_alter_user_first_name_max_length'),
    ]

@@ -66,65 +65,80 @@ class Migration(migrations.Migration):
            create_default_crawl_and_assign_snapshots,
            reverse_code=migrations.RunPython.noop,
        ),
-        # Now make crawl_id NOT NULL
-        migrations.RunSQL(
-            sql="""
-                -- Rebuild snapshot table with NOT NULL crawl_id
-                CREATE TABLE core_snapshot_final (
-                    id TEXT PRIMARY KEY NOT NULL,
-                    created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
-                    modified_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+        migrations.SeparateDatabaseAndState(
+            database_operations=[
+                # Now make crawl_id NOT NULL
+                migrations.RunSQL(
+                    sql="""
+                        -- Rebuild snapshot table with NOT NULL crawl_id
+                        CREATE TABLE core_snapshot_final (
+                            id TEXT PRIMARY KEY NOT NULL,
+                            created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+                            modified_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,

-                    url TEXT NOT NULL,
-                    timestamp VARCHAR(32) NOT NULL UNIQUE,
-                    bookmarked_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+                            url TEXT NOT NULL,
+                            timestamp VARCHAR(32) NOT NULL UNIQUE,
+                            bookmarked_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,

-                    crawl_id TEXT NOT NULL,
-                    parent_snapshot_id TEXT,
+                            crawl_id TEXT NOT NULL,
+                            parent_snapshot_id TEXT,

-                    title VARCHAR(512),
-                    downloaded_at DATETIME,
-                    depth INTEGER NOT NULL DEFAULT 0,
-                    fs_version VARCHAR(10) NOT NULL DEFAULT '0.9.0',
+                            title VARCHAR(512),
+                            downloaded_at DATETIME,
+                            depth INTEGER NOT NULL DEFAULT 0,
+                            fs_version VARCHAR(10) NOT NULL DEFAULT '0.9.0',

-                    config TEXT NOT NULL DEFAULT '{}',
-                    notes TEXT NOT NULL DEFAULT '',
-                    num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
-                    num_uses_failed INTEGER NOT NULL DEFAULT 0,
+                            config TEXT NOT NULL DEFAULT '{}',
+                            notes TEXT NOT NULL DEFAULT '',
+                            num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
+                            num_uses_failed INTEGER NOT NULL DEFAULT 0,

-                    status VARCHAR(15) NOT NULL DEFAULT 'queued',
-                    retry_at DATETIME,
-                    current_step INTEGER NOT NULL DEFAULT 0,
+                            status VARCHAR(15) NOT NULL DEFAULT 'queued',
+                            retry_at DATETIME,
+                            current_step INTEGER NOT NULL DEFAULT 0,

-                    FOREIGN KEY (crawl_id) REFERENCES crawls_crawl(id) ON DELETE CASCADE,
-                    FOREIGN KEY (parent_snapshot_id) REFERENCES core_snapshot(id) ON DELETE SET NULL
-                );
+                            FOREIGN KEY (crawl_id) REFERENCES crawls_crawl(id) ON DELETE CASCADE,
+                            FOREIGN KEY (parent_snapshot_id) REFERENCES core_snapshot(id) ON DELETE SET NULL
+                        );

-                INSERT INTO core_snapshot_final (
-                    id, created_at, modified_at, url, timestamp, bookmarked_at,
-                    crawl_id, parent_snapshot_id, title, downloaded_at, depth, fs_version,
-                    config, notes, num_uses_succeeded, num_uses_failed,
-                    status, retry_at, current_step
-                )
-                SELECT
-                    id, created_at, modified_at, url, timestamp, bookmarked_at,
-                    crawl_id, parent_snapshot_id, title, downloaded_at, depth, fs_version,
-                    COALESCE(config, '{}'), COALESCE(notes, ''), num_uses_succeeded, num_uses_failed,
-                    status, retry_at, current_step
-                FROM core_snapshot;
+                        INSERT INTO core_snapshot_final (
+                            id, created_at, modified_at, url, timestamp, bookmarked_at,
+                            crawl_id, parent_snapshot_id, title, downloaded_at, depth, fs_version,
+                            config, notes, num_uses_succeeded, num_uses_failed,
+                            status, retry_at, current_step
+                        )
+                        SELECT
+                            id, created_at, modified_at, url, timestamp, bookmarked_at,
+                            crawl_id, parent_snapshot_id, title, downloaded_at, depth, fs_version,
+                            COALESCE(config, '{}'), COALESCE(notes, ''), num_uses_succeeded, num_uses_failed,
+                            status, retry_at, current_step
+                        FROM core_snapshot;

-                DROP TABLE core_snapshot;
-                ALTER TABLE core_snapshot_final RENAME TO core_snapshot;
+                        DROP TABLE core_snapshot;
+                        ALTER TABLE core_snapshot_final RENAME TO core_snapshot;

-                CREATE INDEX core_snapshot_url_idx ON core_snapshot(url);
-                CREATE INDEX core_snapshot_timestamp_idx ON core_snapshot(timestamp);
-                CREATE INDEX core_snapshot_bookmarked_at_idx ON core_snapshot(bookmarked_at);
-                CREATE INDEX core_snapshot_crawl_id_idx ON core_snapshot(crawl_id);
-                CREATE INDEX core_snapshot_status_idx ON core_snapshot(status);
-                CREATE INDEX core_snapshot_retry_at_idx ON core_snapshot(retry_at);
-                CREATE INDEX core_snapshot_created_at_idx ON core_snapshot(created_at);
-                CREATE UNIQUE INDEX core_snapshot_url_crawl_unique ON core_snapshot(url, crawl_id);
-            """,
-            reverse_sql=migrations.RunSQL.noop,
+                        CREATE INDEX core_snapshot_url_idx ON core_snapshot(url);
+                        CREATE INDEX core_snapshot_timestamp_idx ON core_snapshot(timestamp);
+                        CREATE INDEX core_snapshot_bookmarked_at_idx ON core_snapshot(bookmarked_at);
+                        CREATE INDEX core_snapshot_crawl_id_idx ON core_snapshot(crawl_id);
+                        CREATE INDEX core_snapshot_status_idx ON core_snapshot(status);
+                        CREATE INDEX core_snapshot_retry_at_idx ON core_snapshot(retry_at);
+                        CREATE INDEX core_snapshot_created_at_idx ON core_snapshot(created_at);
+                        CREATE UNIQUE INDEX core_snapshot_url_crawl_unique ON core_snapshot(url, crawl_id);
+                    """,
+                    reverse_sql=migrations.RunSQL.noop,
+                ),
+            ],
+            state_operations=[
+                migrations.AddField(
+                    model_name='snapshot',
+                    name='crawl',
+                    field=models.ForeignKey(
+                        on_delete=models.deletion.CASCADE,
+                        to='crawls.crawl',
+                        help_text='Crawl that created this snapshot'
+                    ),
+                ),
+            ],
        ),
    ]
--- a/archivebox/core/migrations/0025_alter_archiveresult_options_alter_snapshot_options_and_more.py
+++ b/archivebox/core/migrations/0025_alter_archiveresult_options_alter_snapshot_options_and_more.py
@@ -0,0 +1,258 @@
+# Generated by Django 6.0 on 2025-12-31 23:09
+
+import archivebox.base_models.models
+import django.db.models.deletion
+import django.utils.timezone
+import uuid
+from django.conf import settings
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('core', '0024_assign_default_crawl'),
+        ('crawls', '0001_initial'),
+        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
+    ]
+
+    operations = [
+        migrations.AlterModelOptions(
+            name='archiveresult',
+            options={'verbose_name': 'Archive Result', 'verbose_name_plural': 'Archive Results Log'},
+        ),
+        migrations.AlterModelOptions(
+            name='snapshot',
+            options={'verbose_name': 'Snapshot', 'verbose_name_plural': 'Snapshots'},
+        ),
+        migrations.RemoveField(
+            model_name='archiveresult',
+            name='cmd',
+        ),
+        migrations.RemoveField(
+            model_name='archiveresult',
+            name='cmd_version',
+        ),
+        migrations.RemoveField(
+            model_name='archiveresult',
+            name='pwd',
+        ),
+        migrations.AddField(
+            model_name='archiveresult',
+            name='config',
+            field=models.JSONField(blank=True, default=dict, null=True),
+        ),
+        migrations.AddField(
+            model_name='archiveresult',
+            name='created_at',
+            field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
+        ),
+        migrations.AddField(
+            model_name='archiveresult',
+            name='hook_name',
+            field=models.CharField(blank=True, db_index=True, default='', help_text='Full filename of the hook that executed (e.g., on_Snapshot__50_wget.py)', max_length=255),
+        ),
+        migrations.AddField(
+            model_name='archiveresult',
+            name='modified_at',
+            field=models.DateTimeField(auto_now=True),
+        ),
+        migrations.AddField(
+            model_name='archiveresult',
+            name='notes',
+            field=models.TextField(blank=True, default=''),
+        ),
+        migrations.AddField(
+            model_name='archiveresult',
+            name='num_uses_failed',
+            field=models.PositiveIntegerField(default=0),
+        ),
+        migrations.AddField(
+            model_name='archiveresult',
+            name='num_uses_succeeded',
+            field=models.PositiveIntegerField(default=0),
+        ),
+        migrations.AddField(
+            model_name='archiveresult',
+            name='output_files',
+            field=models.JSONField(default=dict, help_text='Dict of {relative_path: {metadata}}'),
+        ),
+        migrations.AddField(
+            model_name='archiveresult',
+            name='output_json',
+            field=models.JSONField(blank=True, default=None, help_text='Structured metadata (headers, redirects, etc.)', null=True),
+        ),
+        migrations.AddField(
+            model_name='archiveresult',
+            name='output_mimetypes',
+            field=models.CharField(blank=True, default='', help_text='CSV of mimetypes sorted by size', max_length=512),
+        ),
+        migrations.AddField(
+            model_name='archiveresult',
+            name='output_size',
+            field=models.BigIntegerField(default=0, help_text='Total bytes of all output files'),
+        ),
+        migrations.AddField(
+            model_name='archiveresult',
+            name='output_str',
+            field=models.TextField(blank=True, default='', help_text='Human-readable output summary'),
+        ),
+        migrations.AddField(
+            model_name='archiveresult',
+            name='plugin',
+            field=models.CharField(db_index=True, default='', max_length=32),
+        ),
+        migrations.AddField(
+            model_name='archiveresult',
+            name='retry_at',
+            field=models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True),
+        ),
+        migrations.AddField(
+            model_name='snapshot',
+            name='bookmarked_at',
+            field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
+        ),
+        migrations.AddField(
+            model_name='snapshot',
+            name='config',
+            field=models.JSONField(default=dict),
+        ),
+        migrations.AddField(
+            model_name='snapshot',
+            name='created_at',
+            field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
+        ),
+        migrations.AddField(
+            model_name='snapshot',
+            name='current_step',
+            field=models.PositiveSmallIntegerField(db_index=True, default=0, help_text='Current hook step being executed (0-9). Used for sequential hook execution.'),
+        ),
+        migrations.AddField(
+            model_name='snapshot',
+            name='depth',
+            field=models.PositiveSmallIntegerField(db_index=True, default=0),
+        ),
+        migrations.AddField(
+            model_name='snapshot',
+            name='downloaded_at',
+            field=models.DateTimeField(blank=True, db_index=True, default=None, editable=False, null=True),
+        ),
+        migrations.AddField(
+            model_name='snapshot',
+            name='fs_version',
+            field=models.CharField(default='0.9.0', help_text='Filesystem version of this snapshot (e.g., "0.7.0", "0.8.0", "0.9.0"). Used to trigger lazy migration on save().', max_length=10),
+        ),
+        migrations.AddField(
+            model_name='snapshot',
+            name='modified_at',
+            field=models.DateTimeField(auto_now=True),
+        ),
+        migrations.AddField(
+            model_name='snapshot',
+            name='notes',
+            field=models.TextField(blank=True, default=''),
+        ),
+        migrations.AddField(
+            model_name='snapshot',
+            name='num_uses_failed',
+            field=models.PositiveIntegerField(default=0),
+        ),
+        migrations.AddField(
+            model_name='snapshot',
+            name='num_uses_succeeded',
+            field=models.PositiveIntegerField(default=0),
+        ),
+        migrations.AddField(
+            model_name='snapshot',
+            name='parent_snapshot',
+            field=models.ForeignKey(blank=True, help_text='Parent snapshot that discovered this URL (for recursive crawling)', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='child_snapshots', to='core.snapshot'),
+        ),
+        migrations.AddField(
+            model_name='snapshot',
+            name='retry_at',
+            field=models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True),
+        ),
+        migrations.AddField(
+            model_name='snapshot',
+            name='status',
+            field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('sealed', 'Sealed')], db_index=True, default='queued', max_length=15),
+        ),
+        migrations.AddField(
+            model_name='tag',
+            name='created_at',
+            field=models.DateTimeField(db_index=True, default=django.utils.timezone.now, null=True),
+        ),
+        migrations.AddField(
+            model_name='tag',
+            name='created_by',
+            field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='tag_set', to=settings.AUTH_USER_MODEL),
+        ),
+        migrations.AddField(
+            model_name='tag',
+            name='modified_at',
+            field=models.DateTimeField(auto_now=True),
+        ),
+        migrations.AlterField(
+            model_name='archiveresult',
+            name='end_ts',
+            field=models.DateTimeField(blank=True, default=None, null=True),
+        ),
+        migrations.AlterField(
+            model_name='archiveresult',
+            name='id',
+            field=models.AutoField(editable=False, primary_key=True, serialize=False),
+        ),
+        migrations.AlterField(
+            model_name='archiveresult',
+            name='start_ts',
+            field=models.DateTimeField(blank=True, default=None, null=True),
+        ),
+        migrations.AlterField(
+            model_name='archiveresult',
+            name='status',
+            field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('backoff', 'Waiting to retry'), ('succeeded', 'Succeeded'), ('failed', 'Failed'), ('skipped', 'Skipped')], db_index=True, default='queued', max_length=15),
+        ),
+        migrations.AlterField(
+            model_name='archiveresult',
+            name='uuid',
+            field=models.UUIDField(blank=True, db_index=True, default=uuid.uuid7, null=True),
+        ),
+        migrations.AlterField(
+            model_name='snapshot',
+            name='crawl',
+            field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to='crawls.crawl'),
+        ),
+        migrations.AlterField(
+            model_name='snapshot',
+            name='id',
+            field=models.UUIDField(default=uuid.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
+        ),
+        migrations.AlterField(
+            model_name='snapshot',
+            name='tags',
+            field=models.ManyToManyField(blank=True, related_name='snapshot_set', through='core.SnapshotTag', through_fields=('snapshot', 'tag'), to='core.tag'),
+        ),
+        migrations.AlterField(
+            model_name='snapshot',
+            name='timestamp',
+            field=models.CharField(db_index=True, editable=False, max_length=32, unique=True),
+        ),
+        migrations.AlterField(
+            model_name='snapshot',
+            name='url',
+            field=models.URLField(db_index=True),
+        ),
+        migrations.AlterField(
+            model_name='tag',
+            name='slug',
+            field=models.SlugField(editable=False, max_length=100, unique=True),
+        ),
+        migrations.AddConstraint(
+            model_name='snapshot',
+            constraint=models.UniqueConstraint(fields=('url', 'crawl'), name='unique_url_per_crawl'),
+        ),
+        migrations.AddConstraint(
+            model_name='snapshot',
+            constraint=models.UniqueConstraint(fields=('timestamp',), name='unique_timestamp'),
+        ),
+    ]
--- a/archivebox/core/migrations/0025_cleanup_schema.py
+++ b/archivebox/core/migrations/0025_cleanup_schema.py
@@ -1,484 +0,0 @@
-# Generated by hand on 2025-12-29
-# Cleans up extra columns from raw SQL migrations and ensures schema matches models
-
-from django.db import migrations, models
-import django.db.models.deletion
-import django.utils.timezone
-from django.conf import settings
-import archivebox.base_models.models
-
-
-def cleanup_extra_columns(apps, schema_editor):
-    """
-    Create Process records from old cmd/pwd/cmd_version columns and remove those columns.
-    This preserves the execution details by moving them to the Process model.
-    """
-    with schema_editor.connection.cursor() as cursor:
-        # Check if cmd column exists (means we came from v0.7.2/v0.8.6rc0)
-        cursor.execute("SELECT COUNT(*) FROM pragma_table_info('core_archiveresult') WHERE name='cmd'")
-        has_cmd = cursor.fetchone()[0] > 0
-
-        if has_cmd:
-            print("  Migrating cmd/pwd/cmd_version data to Process records...")
-
-            # For each ArchiveResult, create a Process record with cmd/pwd data
-            # Note: cmd_version from old schema is not preserved (it's now derived from Binary)
-            cursor.execute("""
-                SELECT id, cmd, pwd, binary_id, iface_id, start_ts, end_ts, status
-                FROM core_archiveresult
-            """)
-            archive_results = cursor.fetchall()
-
-            from archivebox.uuid_compat import uuid7
-            from archivebox.base_models.models import get_or_create_system_user_pk
-
-            # Get or create a Machine record
-            result = cursor.execute("SELECT id FROM machine_machine LIMIT 1").fetchone()
-            if result:
-                machine_id = result[0]
-                print(f"  Using existing Machine: {machine_id}")
-            else:
-                # Create a minimal Machine record with raw SQL (can't use model during migration)
-                print("  Creating Machine record for Process migration...")
-                import platform
-                import socket
-
-                # Generate minimal machine data without using the model
-                machine_id = str(uuid7())
-                guid = f"{socket.gethostname()}-{platform.machine()}"
-                hostname = socket.gethostname()
-
-                # Check schema version
-                cursor.execute("SELECT COUNT(*) FROM pragma_table_info('machine_machine') WHERE name='config'")
-                has_config = cursor.fetchone()[0] > 0
-                cursor.execute("SELECT COUNT(*) FROM pragma_table_info('machine_machine') WHERE name='abid'")
-                has_abid = cursor.fetchone()[0] > 0
-                cursor.execute("SELECT COUNT(*) FROM pragma_table_info('machine_machine') WHERE name='num_uses_succeeded'")
-                has_num_uses = cursor.fetchone()[0] > 0
-
-                # Insert directly with SQL (use INSERT OR IGNORE in case it already exists)
-                if has_config:
-                    # v0.9.0+ schema
-                    cursor.execute("""
-                        INSERT OR IGNORE INTO machine_machine (
-                            id, created_at, modified_at,
-                            guid, hostname, hw_in_docker, hw_in_vm, hw_manufacturer, hw_product, hw_uuid,
-                            os_arch, os_family, os_platform, os_release, os_kernel,
-                            stats, config
-                        ) VALUES (?, datetime('now'), datetime('now'), ?, ?, 0, 0, '', '', '', ?, ?, ?, ?, '', '{}', '{}')
-                    """, (
-                        machine_id, guid, hostname,
-                        platform.machine(), platform.system(), platform.platform(), platform.release()
-                    ))
-                elif has_abid and has_num_uses:
-                    # v0.8.6rc0 schema (has abid and num_uses columns)
-                    cursor.execute("""
-                        INSERT OR IGNORE INTO machine_machine (
-                            id, abid, created_at, modified_at,
-                            guid, hostname, hw_in_docker, hw_in_vm, hw_manufacturer, hw_product, hw_uuid,
-                            os_arch, os_family, os_platform, os_release, os_kernel,
-                            stats, num_uses_failed, num_uses_succeeded
-                        ) VALUES (?, '', datetime('now'), datetime('now'), ?, ?, 0, 0, '', '', '', ?, ?, ?, ?, '', '{}', 0, 0)
-                    """, (
-                        machine_id, guid, hostname,
-                        platform.machine(), platform.system(), platform.platform(), platform.release()
-                    ))
-                else:
-                    # v0.7.2 or other schema
-                    cursor.execute("""
-                        INSERT OR IGNORE INTO machine_machine (
-                            id, created_at, modified_at,
-                            guid, hostname, hw_in_docker, hw_in_vm, hw_manufacturer, hw_product, hw_uuid,
-                            os_arch, os_family, os_platform, os_release, os_kernel,
-                            stats
-                        ) VALUES (?, datetime('now'), datetime('now'), ?, ?, 0, 0, '', '', '', ?, ?, ?, ?, '', '{}')
-                    """, (
-                        machine_id, guid, hostname,
-                        platform.machine(), platform.system(), platform.platform(), platform.release()
-                    ))
-                # Re-query to get the actual id (in case INSERT OR IGNORE skipped it)
-                result = cursor.execute("SELECT id FROM machine_machine LIMIT 1").fetchone()
-                if result:
-                    machine_id = result[0]
-                    print(f"  ✓ Using/Created Machine: {machine_id}")
-                else:
-                    # INSERT OR IGNORE failed - try again without IGNORE to see the error
-                    raise Exception("Failed to create Machine record - machine_machine table is empty after INSERT")
-
-            for ar_id, cmd, pwd, binary_id, iface_id, start_ts, end_ts, status in archive_results:
-                # Create Process record
-                process_id = str(uuid7())
-                cursor.execute("""
-                    INSERT INTO machine_process (
-                        id, created_at, modified_at,
-                        machine_id, binary_id, iface_id,
-                        pwd, cmd, env, timeout,
-                        pid, exit_code, stdout, stderr,
-                        started_at, ended_at, url, status, retry_at
-                    ) VALUES (?, datetime('now'), datetime('now'), ?, ?, ?, ?, ?, '{}', 120, NULL, NULL, '', '', ?, ?, '', ?, NULL)
-                """, (process_id, machine_id, binary_id, iface_id, pwd or '', cmd or '[]', start_ts, end_ts, status or 'queued'))
-
-                # Update ArchiveResult to point to new Process
-                cursor.execute("UPDATE core_archiveresult SET process_id = ? WHERE id = ?", (process_id, ar_id))
-
-            print(f"  ✓ Created {len(archive_results)} Process records from ArchiveResult data")
-
-            # Now rebuild table without the extra columns
-            print("  Rebuilding core_archiveresult table...")
-            cursor.execute("""
-                CREATE TABLE core_archiveresult_final (
-                    id INTEGER PRIMARY KEY AUTOINCREMENT,
-                    uuid TEXT,
-                    created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
-                    modified_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
-
-                    snapshot_id TEXT NOT NULL,
-                    plugin VARCHAR(32) NOT NULL DEFAULT '',
-                    hook_name VARCHAR(255) NOT NULL DEFAULT '',
-
-                    start_ts DATETIME,
-                    end_ts DATETIME,
-                    status VARCHAR(15) NOT NULL DEFAULT 'queued',
-                    retry_at DATETIME,
-
-                    output_files TEXT NOT NULL DEFAULT '{}',
-                    output_json TEXT,
-                    output_str TEXT NOT NULL DEFAULT '',
-                    output_size INTEGER NOT NULL DEFAULT 0,
-                    output_mimetypes VARCHAR(512) NOT NULL DEFAULT '',
-
-                    config TEXT,
-                    notes TEXT NOT NULL DEFAULT '',
-                    num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
-                    num_uses_failed INTEGER NOT NULL DEFAULT 0,
-
-                    process_id TEXT NOT NULL,
-
-                    FOREIGN KEY (snapshot_id) REFERENCES core_snapshot(id) ON DELETE CASCADE,
-                    FOREIGN KEY (process_id) REFERENCES machine_process(id) ON DELETE RESTRICT
-                )
-            """)
-
-            # Copy data (cmd, pwd, etc. are now in Process records)
-            cursor.execute("""
-                INSERT INTO core_archiveresult_final SELECT
-                    id, uuid, created_at, modified_at,
-                    snapshot_id, plugin, hook_name,
-                    start_ts, end_ts, status, retry_at,
-                    output_files, output_json, output_str, output_size, output_mimetypes,
-                    config, notes, num_uses_succeeded, num_uses_failed,
-                    process_id
-                FROM core_archiveresult
-            """)
-
-            # Replace table
-            cursor.execute("DROP TABLE core_archiveresult")
-            cursor.execute("ALTER TABLE core_archiveresult_final RENAME TO core_archiveresult")
-
-            # Recreate indexes
-            cursor.execute("CREATE INDEX core_archiveresult_snapshot_id_idx ON core_archiveresult(snapshot_id)")
-            cursor.execute("CREATE INDEX core_archiveresult_plugin_idx ON core_archiveresult(plugin)")
-            cursor.execute("CREATE INDEX core_archiveresult_status_idx ON core_archiveresult(status)")
-            cursor.execute("CREATE INDEX core_archiveresult_retry_at_idx ON core_archiveresult(retry_at)")
-            cursor.execute("CREATE INDEX core_archiveresult_created_at_idx ON core_archiveresult(created_at)")
-            cursor.execute("CREATE INDEX core_archiveresult_uuid_idx ON core_archiveresult(uuid)")
-
-            print("  ✓ Cleaned up core_archiveresult schema")
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('core', '0024_assign_default_crawl'),
-        ('machine', '0005_add_process_table'),
-        ('crawls', '0002_upgrade_to_0_9_0'),
-        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
-    ]
-
-    operations = [
-        migrations.SeparateDatabaseAndState(
-            database_operations=[
-                migrations.RunPython(
-                    cleanup_extra_columns,
-                    reverse_code=migrations.RunPython.noop,
-                ),
-            ],
-            state_operations=[
-                # Tell Django about all the fields that exist after raw SQL migrations
-                #  ArchiveResult model options
-                migrations.AlterModelOptions(
-                    name='archiveresult',
-                    options={'verbose_name': 'Archive Result', 'verbose_name_plural': 'Archive Results Log'},
-                ),
-
-                # Remove old fields
-                migrations.RemoveField(model_name='archiveresult', name='cmd'),
-                migrations.RemoveField(model_name='archiveresult', name='pwd'),
-                migrations.RemoveField(model_name='archiveresult', name='cmd_version'),
-                migrations.RemoveField(model_name='archiveresult', name='extractor'),
-                migrations.RemoveField(model_name='archiveresult', name='output'),
-                migrations.RemoveField(model_name='snapshot', name='added'),
-                migrations.RemoveField(model_name='snapshot', name='updated'),
-
-                # Add new ArchiveResult fields
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='plugin',
-                    field=models.CharField(blank=True, default='', max_length=32),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='hook_name',
-                    field=models.CharField(blank=True, default='', max_length=255),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='output_str',
-                    field=models.TextField(blank=True, default=''),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='output_json',
-                    field=models.JSONField(blank=True, default=dict, null=True),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='output_files',
-                    field=models.JSONField(blank=True, default=dict),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='output_size',
-                    field=models.PositiveIntegerField(default=0),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='output_mimetypes',
-                    field=models.CharField(blank=True, default='', max_length=512),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='config',
-                    field=models.JSONField(blank=True, default=dict, null=True),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='notes',
-                    field=models.TextField(blank=True, default=''),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='num_uses_succeeded',
-                    field=models.PositiveIntegerField(default=0),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='num_uses_failed',
-                    field=models.PositiveIntegerField(default=0),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='retry_at',
-                    field=models.DateTimeField(blank=True, db_index=True, default=None, null=True),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='created_at',
-                    field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='modified_at',
-                    field=models.DateTimeField(auto_now=True),
-                ),
-                migrations.AddField(
-                    model_name='archiveresult',
-                    name='process',
-                    field=models.OneToOneField(null=True, on_delete=django.db.models.deletion.PROTECT, related_name='archiveresult', to='machine.process'),
-                ),
-
-                # Update Snapshot model
-                migrations.AlterModelOptions(
-                    name='snapshot',
-                    options={'verbose_name': 'Snapshot', 'verbose_name_plural': 'Snapshots'},
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='created_at',
-                    field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='modified_at',
-                    field=models.DateTimeField(auto_now=True),
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='bookmarked_at',
-                    field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='downloaded_at',
-                    field=models.DateTimeField(blank=True, null=True),
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='crawl',
-                    field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='crawls.crawl'),
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='depth',
-                    field=models.PositiveSmallIntegerField(default=0),
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='parent_snapshot',
-                    field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='child_snapshots', to='core.snapshot'),
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='status',
-                    field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('sealed', 'Sealed')], db_index=True, default='queued', max_length=15),
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='retry_at',
-                    field=models.DateTimeField(blank=True, db_index=True, default=None, null=True),
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='current_step',
-                    field=models.PositiveSmallIntegerField(default=0),
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='fs_version',
-                    field=models.CharField(default='0.9.0', max_length=10),
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='config',
-                    field=models.JSONField(blank=True, default=dict),
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='notes',
-                    field=models.TextField(blank=True, default=''),
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='num_uses_succeeded',
-                    field=models.PositiveIntegerField(default=0),
-                ),
-                migrations.AddField(
-                    model_name='snapshot',
-                    name='num_uses_failed',
-                    field=models.PositiveIntegerField(default=0),
-                ),
-
-                # Update Tag model
-                migrations.AlterModelOptions(
-                    name='tag',
-                    options={'verbose_name': 'Tag', 'verbose_name_plural': 'Tags'},
-                ),
-                migrations.AddField(
-                    model_name='tag',
-                    name='created_at',
-                    field=models.DateTimeField(db_index=True, default=django.utils.timezone.now, null=True),
-                ),
-                migrations.AddField(
-                    model_name='tag',
-                    name='modified_at',
-                    field=models.DateTimeField(auto_now=True),
-                ),
-                migrations.AddField(
-                    model_name='tag',
-                    name='created_by',
-                    field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='tag_set', to=settings.AUTH_USER_MODEL),
-                ),
-
-                # Alter field types
-                migrations.AlterField(
-                    model_name='archiveresult',
-                    name='id',
-                    field=models.AutoField(primary_key=True, serialize=False, verbose_name='ID'),
-                ),
-                migrations.AlterField(
-                    model_name='archiveresult',
-                    name='uuid',
-                    field=models.UUIDField(blank=True, db_index=True, editable=False, null=True, unique=True),
-                ),
-                migrations.AlterField(
-                    model_name='archiveresult',
-                    name='end_ts',
-                    field=models.DateTimeField(blank=True, default=None, null=True),
-                ),
-                migrations.AlterField(
-                    model_name='archiveresult',
-                    name='start_ts',
-                    field=models.DateTimeField(blank=True, default=None, null=True),
-                ),
-                migrations.AlterField(
-                    model_name='archiveresult',
-                    name='status',
-                    field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('succeeded', 'Succeeded'), ('failed', 'Failed')], db_index=True, default='queued', max_length=15),
-                ),
-                migrations.AlterField(
-                    model_name='snapshot',
-                    name='id',
-                    field=models.CharField(editable=False, max_length=32, primary_key=True, serialize=False, unique=True),
-                ),
-                migrations.AlterField(
-                    model_name='snapshot',
-                    name='timestamp',
-                    field=models.CharField(db_index=True, max_length=32, unique=True),
-                ),
-                migrations.AlterField(
-                    model_name='snapshot',
-                    name='url',
-                    field=models.URLField(max_length=2048),
-                ),
-                migrations.AlterField(
-                    model_name='tag',
-                    name='slug',
-                    field=models.SlugField(editable=False, max_length=100, unique=True),
-                ),
-
-                # Create M2M model for snapshot tags
-                migrations.CreateModel(
-                    name='SnapshotTag',
-                    fields=[
-                        ('id', models.AutoField(primary_key=True, serialize=False, verbose_name='ID')),
-                        ('snapshot', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='core.snapshot')),
-                        ('tag', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='core.tag')),
-                    ],
-                    options={
-                        'db_table': 'core_snapshot_tags',
-                    },
-                ),
-                migrations.AlterUniqueTogether(
-                    name='snapshottag',
-                    unique_together={('snapshot', 'tag')},
-                ),
-
-                # Update tags field on Snapshot to use the through model
-                migrations.AlterField(
-                    model_name='snapshot',
-                    name='tags',
-                    field=models.ManyToManyField(related_name='snapshot_set', through='core.SnapshotTag', to='core.tag'),
-                ),
-
-                # Add constraints
-                migrations.AddConstraint(
-                    model_name='snapshot',
-                    constraint=models.UniqueConstraint(fields=['url', 'crawl'], name='unique_url_per_crawl'),
-                ),
-                migrations.AddConstraint(
-                    model_name='snapshot',
-                    constraint=models.UniqueConstraint(fields=['timestamp'], name='unique_timestamp'),
-                ),
-            ],
-        ),
-    ]
--- a/archivebox/core/migrations/0026_final_field_adjustments.py
+++ b/archivebox/core/migrations/0026_final_field_adjustments.py
@@ -1,76 +0,0 @@
-# Generated by hand on 2025-12-30
-# Final field adjustments to match model definitions exactly
-
-from django.db import migrations, models
-import django.db.models.deletion
-import django.utils.timezone
-from archivebox.uuid_compat import uuid7
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('core', '0025_cleanup_schema'),
-        ('crawls', '0002_upgrade_to_0_9_0'),
-    ]
-
-    operations = [
-        # Alter Snapshot fields to match model exactly
-        migrations.AlterField(
-            model_name='snapshot',
-            name='id',
-            field=models.UUIDField(default=uuid7, editable=False, primary_key=True, unique=True),
-        ),
-        migrations.AlterField(
-            model_name='snapshot',
-            name='timestamp',
-            field=models.CharField(db_index=True, editable=False, max_length=32, unique=True),
-        ),
-        migrations.AlterField(
-            model_name='snapshot',
-            name='url',
-            field=models.URLField(db_index=True, unique=False),
-        ),
-        migrations.AlterField(
-            model_name='snapshot',
-            name='downloaded_at',
-            field=models.DateTimeField(blank=True, db_index=True, default=None, editable=False, null=True),
-        ),
-        migrations.AlterField(
-            model_name='snapshot',
-            name='parent_snapshot',
-            field=models.ForeignKey(blank=True, db_index=True, help_text='Parent snapshot that discovered this URL (for recursive crawling)', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='child_snapshots', to='core.snapshot'),
-        ),
-        migrations.AlterField(
-            model_name='snapshot',
-            name='retry_at',
-            field=models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True),
-        ),
-        migrations.AlterField(
-            model_name='snapshot',
-            name='fs_version',
-            field=models.CharField(default='0.9.0', help_text='Filesystem version of this snapshot (e.g., "0.7.0", "0.8.0", "0.9.0"). Used to trigger lazy migration on save().', max_length=10),
-        ),
-        migrations.AlterField(
-            model_name='snapshot',
-            name='tags',
-            field=models.ManyToManyField(blank=True, related_name='snapshot_set', through='core.SnapshotTag', through_fields=('snapshot', 'tag'), to='core.tag'),
-        ),
-
-        # Alter SnapshotTag fields
-        migrations.AlterField(
-            model_name='snapshottag',
-            name='id',
-            field=models.AutoField(primary_key=True, serialize=False, verbose_name='ID'),
-        ),
-        migrations.AlterField(
-            model_name='snapshottag',
-            name='snapshot',
-            field=models.ForeignKey(db_column='snapshot_id', on_delete=django.db.models.deletion.CASCADE, to='core.snapshot'),
-        ),
-        migrations.AlterField(
-            model_name='snapshottag',
-            name='tag',
-            field=models.ForeignKey(db_column='tag_id', on_delete=django.db.models.deletion.CASCADE, to='core.tag'),
-        ),
-    ]
--- a/archivebox/core/migrations/0027_alter_archiveresult_hook_name_alter_archiveresult_id_and_more.py
+++ b/archivebox/core/migrations/0027_alter_archiveresult_hook_name_alter_archiveresult_id_and_more.py
@@ -1,108 +0,0 @@
-# Generated by Django 6.0 on 2025-12-31 09:04
-
-import django.db.models.deletion
-import django.utils.timezone
-import uuid
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('core', '0026_final_field_adjustments'),
-        ('crawls', '0002_upgrade_to_0_9_0'),
-        ('machine', '0001_initial'),
-    ]
-
-    operations = [
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='hook_name',
-            field=models.CharField(blank=True, db_index=True, default='', help_text='Full filename of the hook that executed (e.g., on_Snapshot__50_wget.py)', max_length=255),
-        ),
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='id',
-            field=models.AutoField(editable=False, primary_key=True, serialize=False),
-        ),
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='output_files',
-            field=models.JSONField(default=dict, help_text='Dict of {relative_path: {metadata}}'),
-        ),
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='output_json',
-            field=models.JSONField(blank=True, default=None, help_text='Structured metadata (headers, redirects, etc.)', null=True),
-        ),
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='output_mimetypes',
-            field=models.CharField(blank=True, default='', help_text='CSV of mimetypes sorted by size', max_length=512),
-        ),
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='output_size',
-            field=models.BigIntegerField(default=0, help_text='Total bytes of all output files'),
-        ),
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='output_str',
-            field=models.TextField(blank=True, default='', help_text='Human-readable output summary'),
-        ),
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='plugin',
-            field=models.CharField(db_index=True, default='', max_length=32),
-        ),
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='process',
-            field=models.OneToOneField(help_text='Process execution details for this archive result', on_delete=django.db.models.deletion.PROTECT, related_name='archiveresult', to='machine.process'),
-        ),
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='retry_at',
-            field=models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True),
-        ),
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='status',
-            field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('backoff', 'Waiting to retry'), ('succeeded', 'Succeeded'), ('failed', 'Failed'), ('skipped', 'Skipped')], db_index=True, default='queued', max_length=15),
-        ),
-        migrations.AlterField(
-            model_name='archiveresult',
-            name='uuid',
-            field=models.UUIDField(blank=True, db_index=True, default=uuid.uuid7, null=True),
-        ),
-        migrations.AlterField(
-            model_name='snapshot',
-            name='config',
-            field=models.JSONField(default=dict),
-        ),
-        migrations.AlterField(
-            model_name='snapshot',
-            name='crawl',
-            field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to='crawls.crawl'),
-        ),
-        migrations.AlterField(
-            model_name='snapshot',
-            name='current_step',
-            field=models.PositiveSmallIntegerField(db_index=True, default=0, help_text='Current hook step being executed (0-9). Used for sequential hook execution.'),
-        ),
-        migrations.AlterField(
-            model_name='snapshot',
-            name='depth',
-            field=models.PositiveSmallIntegerField(db_index=True, default=0),
-        ),
-        migrations.AlterField(
-            model_name='snapshot',
-            name='id',
-            field=models.UUIDField(default=uuid.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
-        ),
-        migrations.AlterField(
-            model_name='snapshottag',
-            name='id',
-            field=models.AutoField(primary_key=True, serialize=False),
-        ),
-    ]
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -91,9 +91,9 @@ class Tag(ModelWithSerializers):
    def api_url(self) -> str:
        return reverse_lazy('api-1:get_tag', args=[self.id])

-    def to_jsonl(self) -> dict:
+    def to_json(self) -> dict:
        """
-        Convert Tag model instance to a JSONL record.
+        Convert Tag model instance to a JSON-serializable dict.
        """
        from archivebox.config import VERSION
        return {
@@ -105,12 +105,12 @@ class Tag(ModelWithSerializers):
        }

    @staticmethod
-    def from_jsonl(record: Dict[str, Any], overrides: Dict[str, Any] = None):
+    def from_json(record: Dict[str, Any], overrides: Dict[str, Any] = None):
        """
-        Create/update Tag from JSONL record.
+        Create/update Tag from JSON dict.

        Args:
-            record: JSONL record with 'name' field
+            record: JSON dict with 'name' field
            overrides: Optional dict with 'snapshot' to auto-attach tag

        Returns:
@@ -982,8 +982,8 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
        processes_seen = set()

        with open(index_path, 'w') as f:
-            # Write Snapshot record first (to_jsonl includes crawl_id, fs_version)
-            f.write(json.dumps(self.to_jsonl()) + '\n')
+            # Write Snapshot record first (to_json includes crawl_id, fs_version)
+            f.write(json.dumps(self.to_json()) + '\n')

            # Write ArchiveResult records with their associated Binary and Process
            # Use select_related to optimize queries
@@ -991,15 +991,15 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
                # Write Binary record if not already written
                if ar.process and ar.process.binary and ar.process.binary_id not in binaries_seen:
                    binaries_seen.add(ar.process.binary_id)
-                    f.write(json.dumps(ar.process.binary.to_jsonl()) + '\n')
+                    f.write(json.dumps(ar.process.binary.to_json()) + '\n')

                # Write Process record if not already written
                if ar.process and ar.process_id not in processes_seen:
                    processes_seen.add(ar.process_id)
-                    f.write(json.dumps(ar.process.to_jsonl()) + '\n')
+                    f.write(json.dumps(ar.process.to_json()) + '\n')

                # Write ArchiveResult record
-                f.write(json.dumps(ar.to_jsonl()) + '\n')
+                f.write(json.dumps(ar.to_json()) + '\n')

    def read_index_jsonl(self) -> dict:
        """
@@ -1422,9 +1422,9 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea

        return False

-    def to_jsonl(self) -> dict:
+    def to_json(self) -> dict:
        """
-        Convert Snapshot model instance to a JSONL record.
+        Convert Snapshot model instance to a JSON-serializable dict.
        Includes all fields needed to fully reconstruct/identify this snapshot.
        """
        from archivebox.config import VERSION
@@ -1445,9 +1445,9 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
        }

    @staticmethod
-    def from_jsonl(record: Dict[str, Any], overrides: Dict[str, Any] = None, queue_for_extraction: bool = True):
+    def from_json(record: Dict[str, Any], overrides: Dict[str, Any] = None, queue_for_extraction: bool = True):
        """
-        Create/update Snapshot from JSONL record or dict.
+        Create/update Snapshot from JSON dict.

        Unified method that handles:
        - ID-based patching: {"id": "...", "title": "new title"}
@@ -2106,8 +2106,8 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
            result['canonical'] = self.canonical_outputs()
        return result

-    def to_json(self, indent: int = 4) -> str:
-        """Convert to JSON string"""
+    def to_json_str(self, indent: int = 4) -> str:
+        """Convert to JSON string (legacy method, use to_json() for dict)"""
        return to_json(self.to_dict(extended=True), indent=indent)

    def to_csv(self, cols: Optional[List[str]] = None, separator: str = ',', ljust: int = 0) -> str:
@@ -2284,14 +2284,14 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
    hook_name = models.CharField(max_length=255, blank=True, default='', db_index=True, help_text='Full filename of the hook that executed (e.g., on_Snapshot__50_wget.py)')

    # Process FK - tracks execution details (cmd, pwd, stdout, stderr, etc.)
-    # Required - every ArchiveResult must have a Process
-    process = models.OneToOneField(
-        'machine.Process',
-        on_delete=models.PROTECT,
-        null=False,  # Required after migration 4
-        related_name='archiveresult',
-        help_text='Process execution details for this archive result'
-    )
+    # Added POST-v0.9.0, will be added in a separate migration
+    # process = models.OneToOneField(
+    #     'machine.Process',
+    #     on_delete=models.PROTECT,
+    #     null=False,
+    #     related_name='archiveresult',
+    #     help_text='Process execution details for this archive result'
+    # )

    # New output fields (replacing old 'output' field)
    output_str = models.TextField(blank=True, default='', help_text='Human-readable output summary')
@@ -2326,9 +2326,9 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
        """Convenience property to access the user who created this archive result via its snapshot's crawl."""
        return self.snapshot.crawl.created_by

-    def to_jsonl(self) -> dict:
+    def to_json(self) -> dict:
        """
-        Convert ArchiveResult model instance to a JSONL record.
+        Convert ArchiveResult model instance to a JSON-serializable dict.
        """
        from archivebox.config import VERSION
        record = {
@@ -2360,6 +2360,50 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
            record['process_id'] = str(self.process_id)
        return record

+    @staticmethod
+    def from_json(record: Dict[str, Any], overrides: Dict[str, Any] = None):
+        """
+        Create/update ArchiveResult from JSON dict.
+
+        Args:
+            record: JSON dict with 'snapshot_id', 'plugin', etc.
+            overrides: Optional dict of field overrides
+
+        Returns:
+            ArchiveResult instance or None
+        """
+        snapshot_id = record.get('snapshot_id')
+        plugin = record.get('plugin')
+
+        if not snapshot_id or not plugin:
+            return None
+
+        # Try to get existing by ID first
+        result_id = record.get('id')
+        if result_id:
+            try:
+                return ArchiveResult.objects.get(id=result_id)
+            except ArchiveResult.DoesNotExist:
+                pass
+
+        # Get or create by snapshot_id + plugin
+        try:
+            from archivebox.core.models import Snapshot
+            snapshot = Snapshot.objects.get(id=snapshot_id)
+
+            result, _ = ArchiveResult.objects.get_or_create(
+                snapshot=snapshot,
+                plugin=plugin,
+                defaults={
+                    'hook_name': record.get('hook_name', ''),
+                    'status': record.get('status', 'queued'),
+                    'output_str': record.get('output_str', ''),
+                }
+            )
+            return result
+        except Snapshot.DoesNotExist:
+            return None
+
    def save(self, *args, **kwargs):
        is_new = self._state.adding

--- a/archivebox/crawls/migrations/0002_upgrade_to_0_9_0.py
+++ b/archivebox/crawls/migrations/0002_upgrade_to_0_9_0.py
@@ -1,90 +0,0 @@
-# Generated by hand on 2025-12-29
-# Upgrades crawls_crawl table from v0.8.6rc0 to v0.9.0 schema
-
-from django.db import migrations
-
-
-def upgrade_crawl_schema_if_needed(apps, schema_editor):
-    """
-    Upgrade crawls_crawl table if it has the old v0.8.6rc0 schema (no urls column).
-    """
-    with schema_editor.connection.cursor() as cursor:
-        # Check if we need to upgrade (missing urls column means v0.8.6rc0)
-        cursor.execute("""
-            SELECT COUNT(*) FROM pragma_table_info('crawls_crawl') WHERE name='urls'
-        """)
-        has_urls = cursor.fetchone()[0] > 0
-
-        if not has_urls:
-            print("  Upgrading crawls_crawl from v0.8.6rc0 to v0.9.0 schema...")
-
-            # Create new table with v0.9.0 schema
-            cursor.execute("""
-                CREATE TABLE crawls_crawl_new (
-                    id TEXT PRIMARY KEY NOT NULL,
-                    created_at DATETIME NOT NULL,
-                    modified_at DATETIME NOT NULL,
-                    num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
-                    num_uses_failed INTEGER NOT NULL DEFAULT 0,
-
-                    urls TEXT NOT NULL DEFAULT '[]',
-                    config TEXT,
-                    max_depth INTEGER NOT NULL DEFAULT 0,
-                    tags_str VARCHAR(1024) NOT NULL DEFAULT '',
-                    persona_id TEXT,
-                    label VARCHAR(64) NOT NULL DEFAULT '',
-                    notes TEXT NOT NULL DEFAULT '',
-                    output_dir VARCHAR(512) NOT NULL DEFAULT '',
-
-                    status VARCHAR(15) NOT NULL DEFAULT 'queued',
-                    retry_at DATETIME,
-                    created_by_id INTEGER NOT NULL,
-                    schedule_id TEXT,
-
-                    FOREIGN KEY (created_by_id) REFERENCES auth_user(id) ON DELETE CASCADE,
-                    FOREIGN KEY (schedule_id) REFERENCES crawls_crawlschedule(id) ON DELETE SET NULL
-                )
-            """)
-
-            # Copy data from old table (v0.8.6rc0 schema)
-            cursor.execute("""
-                INSERT INTO crawls_crawl_new (
-                    id, created_at, modified_at, num_uses_succeeded, num_uses_failed,
-                    urls, config, max_depth, tags_str, status, retry_at, created_by_id, schedule_id
-                )
-                SELECT
-                    id, created_at, modified_at, num_uses_succeeded, num_uses_failed,
-                    '[]' as urls, config, max_depth, tags_str, status, retry_at, created_by_id,
-                    CAST(schedule_id AS TEXT)
-                FROM crawls_crawl
-            """)
-
-            # Replace old table
-            cursor.execute("DROP TABLE crawls_crawl")
-            cursor.execute("ALTER TABLE crawls_crawl_new RENAME TO crawls_crawl")
-
-            # Create indexes
-            cursor.execute("CREATE INDEX crawls_crawl_status_idx ON crawls_crawl(status)")
-            cursor.execute("CREATE INDEX crawls_crawl_retry_at_idx ON crawls_crawl(retry_at)")
-            cursor.execute("CREATE INDEX crawls_crawl_created_at_idx ON crawls_crawl(created_at)")
-            cursor.execute("CREATE INDEX crawls_crawl_created_by_id_idx ON crawls_crawl(created_by_id)")
-            cursor.execute("CREATE INDEX crawls_crawl_schedule_id_idx ON crawls_crawl(schedule_id)")
-
-            print("  ✓ Upgraded crawls_crawl to v0.9.0 schema")
-        else:
-            print("  ✓ crawls_crawl already has v0.9.0 schema")
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('crawls', '0001_initial'),
-        ('auth', '0012_alter_user_first_name_max_length'),
-    ]
-
-    operations = [
-        migrations.RunPython(
-            upgrade_crawl_schema_if_needed,
-            reverse_code=migrations.RunPython.noop,
-        ),
-    ]
--- a/archivebox/crawls/models.py
+++ b/archivebox/crawls/models.py
@@ -134,9 +134,9 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
    def api_url(self) -> str:
        return reverse_lazy('api-1:get_crawl', args=[self.id])

-    def to_jsonl(self) -> dict:
+    def to_json(self) -> dict:
        """
-        Convert Crawl model instance to a JSONL record.
+        Convert Crawl model instance to a JSON-serializable dict.
        """
        from archivebox.config import VERSION
        return {
@@ -152,9 +152,9 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
        }

    @staticmethod
-    def from_jsonl(record: dict, overrides: dict = None):
+    def from_json(record: dict, overrides: dict = None):
        """
-        Create or get a Crawl from a JSONL record.
+        Create or get a Crawl from a JSON dict.

        Args:
            record: Dict with 'urls' (required), optional 'max_depth', 'tags_str', 'label'
--- a/archivebox/hooks.py
+++ b/archivebox/hooks.py
@@ -1176,7 +1176,7 @@ def create_model_record(record: Dict[str, Any]) -> Any:
 def process_hook_records(records: List[Dict[str, Any]], overrides: Dict[str, Any] = None) -> Dict[str, int]:
    """
    Process JSONL records from hook output.
-    Dispatches to Model.from_jsonl() for each record type.
+    Dispatches to Model.from_json() for each record type.

    Args:
        records: List of JSONL record dicts from result['records']
@@ -1201,25 +1201,25 @@ def process_hook_records(records: List[Dict[str, Any]], overrides: Dict[str, Any
            # Dispatch to appropriate model's from_jsonl() method
            if record_type == 'Snapshot':
                from archivebox.core.models import Snapshot
-                obj = Snapshot.from_jsonl(record.copy(), overrides)
+                obj = Snapshot.from_jsonll(record.copy(), overrides)
                if obj:
                    stats['Snapshot'] = stats.get('Snapshot', 0) + 1

            elif record_type == 'Tag':
                from archivebox.core.models import Tag
-                obj = Tag.from_jsonl(record.copy(), overrides)
+                obj = Tag.from_json(record.copy(), overrides)
                if obj:
                    stats['Tag'] = stats.get('Tag', 0) + 1

            elif record_type == 'Binary':
                from archivebox.machine.models import Binary
-                obj = Binary.from_jsonl(record.copy(), overrides)
+                obj = Binary.from_json(record.copy(), overrides)
                if obj:
                    stats['Binary'] = stats.get('Binary', 0) + 1

            elif record_type == 'Machine':
                from archivebox.machine.models import Machine
-                obj = Machine.from_jsonl(record.copy(), overrides)
+                obj = Machine.from_json(record.copy(), overrides)
                if obj:
                    stats['Machine'] = stats.get('Machine', 0) + 1

--- a/archivebox/machine/migrations/0001_initial.py
+++ b/archivebox/machine/migrations/0001_initial.py
@@ -100,46 +100,8 @@ class Migration(migrations.Migration):
                CREATE INDEX IF NOT EXISTS machine_binary_status_idx ON machine_binary(status);
                CREATE INDEX IF NOT EXISTS machine_binary_retry_at_idx ON machine_binary(retry_at);

-                -- Create machine_process table
-                CREATE TABLE IF NOT EXISTS machine_process (
-                    id TEXT PRIMARY KEY NOT NULL,
-                    created_at DATETIME NOT NULL,
-                    modified_at DATETIME NOT NULL,
-
-                    machine_id TEXT NOT NULL,
-                    binary_id TEXT,
-                    iface_id TEXT,
-
-                    pwd VARCHAR(512) NOT NULL DEFAULT '',
-                    cmd TEXT NOT NULL DEFAULT '[]',
-                    env TEXT NOT NULL DEFAULT '{}',
-                    timeout INTEGER NOT NULL DEFAULT 120,
-
-                    pid INTEGER,
-                    exit_code INTEGER,
-                    stdout TEXT NOT NULL DEFAULT '',
-                    stderr TEXT NOT NULL DEFAULT '',
-
-                    started_at DATETIME,
-                    ended_at DATETIME,
-
-                    url VARCHAR(2048),
-
-                    status VARCHAR(16) NOT NULL DEFAULT 'queued',
-                    retry_at DATETIME,
-
-                    FOREIGN KEY (machine_id) REFERENCES machine_machine(id) ON DELETE CASCADE,
-                    FOREIGN KEY (binary_id) REFERENCES machine_binary(id) ON DELETE SET NULL,
-                    FOREIGN KEY (iface_id) REFERENCES machine_networkinterface(id) ON DELETE SET NULL
-                );
-                CREATE INDEX IF NOT EXISTS machine_process_status_idx ON machine_process(status);
-                CREATE INDEX IF NOT EXISTS machine_process_retry_at_idx ON machine_process(retry_at);
-                CREATE INDEX IF NOT EXISTS machine_process_machine_id_idx ON machine_process(machine_id);
-                CREATE INDEX IF NOT EXISTS machine_process_binary_id_idx ON machine_process(binary_id);
-                CREATE INDEX IF NOT EXISTS machine_process_machine_status_retry_idx ON machine_process(machine_id, status, retry_at);
            """,
                    reverse_sql="""
-                        DROP TABLE IF EXISTS machine_process;
                        DROP TABLE IF EXISTS machine_binary;
                        DROP TABLE IF EXISTS machine_networkinterface;
                        DROP TABLE IF EXISTS machine_machine;
@@ -167,6 +129,8 @@ class Migration(migrations.Migration):
                        ('os_kernel', models.CharField(default=None, max_length=255)),
                        ('stats', models.JSONField(blank=True, default=dict, null=True)),
                        ('config', models.JSONField(blank=True, default=dict, help_text='Machine-specific config overrides (e.g., resolved binary paths like WGET_BINARY)', null=True)),
+                        ('num_uses_succeeded', models.PositiveIntegerField(default=0)),
+                        ('num_uses_failed', models.PositiveIntegerField(default=0)),
                    ],
                    options={
                        'app_label': 'machine',
@@ -189,6 +153,8 @@ class Migration(migrations.Migration):
                        ('region', models.CharField(default=None, max_length=63)),
                        ('country', models.CharField(default=None, max_length=63)),
                        ('machine', models.ForeignKey(default=None, on_delete=django.db.models.deletion.CASCADE, to='machine.machine')),
+                        ('num_uses_succeeded', models.PositiveIntegerField(default=0)),
+                        ('num_uses_failed', models.PositiveIntegerField(default=0)),
                    ],
                    options={
                        'unique_together': {('machine', 'ip_public', 'ip_local', 'mac_address', 'dns_server')},
@@ -212,6 +178,8 @@ class Migration(migrations.Migration):
                        ('retry_at', models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, help_text='When to retry this binary installation', null=True)),
                        ('output_dir', models.CharField(blank=True, default='', help_text='Directory where installation hook logs are stored', max_length=255)),
                        ('machine', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='machine.machine')),
+                        ('num_uses_succeeded', models.PositiveIntegerField(default=0)),
+                        ('num_uses_failed', models.PositiveIntegerField(default=0)),
                    ],
                    options={
                        'verbose_name': 'Binary',
@@ -220,43 +188,6 @@ class Migration(migrations.Migration):
                        'app_label': 'machine',
                    },
                ),
-                migrations.CreateModel(
-                    name='Process',
-                    fields=[
-                        ('id', models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True)),
-                        ('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
-                        ('modified_at', models.DateTimeField(auto_now=True)),
-                        ('pwd', models.CharField(blank=True, default='', help_text='Working directory for process execution', max_length=512)),
-                        ('cmd', models.JSONField(blank=True, default=list, help_text='Command as array of arguments')),
-                        ('env', models.JSONField(blank=True, default=dict, help_text='Environment variables for process')),
-                        ('timeout', models.IntegerField(default=120, help_text='Timeout in seconds')),
-                        ('pid', models.IntegerField(blank=True, default=None, help_text='OS process ID', null=True)),
-                        ('exit_code', models.IntegerField(blank=True, default=None, help_text='Process exit code (0 = success)', null=True)),
-                        ('stdout', models.TextField(blank=True, default='', help_text='Standard output from process')),
-                        ('stderr', models.TextField(blank=True, default='', help_text='Standard error from process')),
-                        ('started_at', models.DateTimeField(blank=True, default=None, help_text='When process was launched', null=True)),
-                        ('ended_at', models.DateTimeField(blank=True, default=None, help_text='When process completed/terminated', null=True)),
-                        ('url', models.URLField(blank=True, default=None, help_text='Connection URL (CDP endpoint, sonic server, etc.)', max_length=2048, null=True)),
-                        ('status', models.CharField(choices=[('queued', 'Queued'), ('running', 'Running'), ('exited', 'Exited')], db_index=True, default='queued', max_length=16)),
-                        ('retry_at', models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, help_text='When to retry this process', null=True)),
-                        ('machine', models.ForeignKey(help_text='Machine where this process executed', on_delete=django.db.models.deletion.CASCADE, related_name='process_set', to='machine.machine')),
-                        ('binary', models.ForeignKey(blank=True, help_text='Binary used by this process', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='process_set', to='machine.binary')),
-                        ('iface', models.ForeignKey(blank=True, help_text='Network interface used by this process', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='process_set', to='machine.networkinterface')),
-                    ],
-                    options={
-                        'verbose_name': 'Process',
-                        'verbose_name_plural': 'Processes',
-                        'app_label': 'machine',
-                    },
-                ),
-                migrations.AddIndex(
-                    model_name='process',
-                    index=models.Index(fields=['machine', 'status', 'retry_at'], name='machine_pro_machine_5e3a87_idx'),
-                ),
-                migrations.AddIndex(
-                    model_name='process',
-                    index=models.Index(fields=['binary', 'exit_code'], name='machine_pro_binary__7bd19c_idx'),
-                ),
            ],
        ),
    ]
--- a/archivebox/machine/migrations/0002_process.py
+++ b/archivebox/machine/migrations/0002_process.py
@@ -0,0 +1,45 @@
+# Generated by Django 6.0 on 2025-12-31 22:54
+
+import django.db.models.deletion
+import django.utils.timezone
+import uuid
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('machine', '0001_initial'),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='Process',
+            fields=[
+                ('id', models.UUIDField(default=uuid.uuid7, editable=False, primary_key=True, serialize=False, unique=True)),
+                ('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
+                ('modified_at', models.DateTimeField(auto_now=True)),
+                ('pwd', models.CharField(blank=True, default='', help_text='Working directory for process execution', max_length=512)),
+                ('cmd', models.JSONField(blank=True, default=list, help_text='Command as array of arguments')),
+                ('env', models.JSONField(blank=True, default=dict, help_text='Environment variables for process')),
+                ('timeout', models.IntegerField(default=120, help_text='Timeout in seconds')),
+                ('pid', models.IntegerField(blank=True, default=None, help_text='OS process ID', null=True)),
+                ('exit_code', models.IntegerField(blank=True, default=None, help_text='Process exit code (0 = success)', null=True)),
+                ('stdout', models.TextField(blank=True, default='', help_text='Standard output from process')),
+                ('stderr', models.TextField(blank=True, default='', help_text='Standard error from process')),
+                ('started_at', models.DateTimeField(blank=True, default=None, help_text='When process was launched', null=True)),
+                ('ended_at', models.DateTimeField(blank=True, default=None, help_text='When process completed/terminated', null=True)),
+                ('url', models.URLField(blank=True, default=None, help_text='Connection URL (CDP endpoint, sonic server, etc.)', max_length=2048, null=True)),
+                ('status', models.CharField(choices=[('queued', 'Queued'), ('running', 'Running'), ('exited', 'Exited')], db_index=True, default='queued', max_length=16)),
+                ('retry_at', models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, help_text='When to retry this process', null=True)),
+                ('binary', models.ForeignKey(blank=True, help_text='Binary used by this process', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='process_set', to='machine.binary')),
+                ('iface', models.ForeignKey(blank=True, help_text='Network interface used by this process', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='process_set', to='machine.networkinterface')),
+                ('machine', models.ForeignKey(help_text='Machine where this process executed', on_delete=django.db.models.deletion.CASCADE, related_name='process_set', to='machine.machine')),
+            ],
+            options={
+                'verbose_name': 'Process',
+                'verbose_name_plural': 'Processes',
+                'indexes': [models.Index(fields=['machine', 'status', 'retry_at'], name='machine_pro_machine_5e3a87_idx'), models.Index(fields=['binary', 'exit_code'], name='machine_pro_binary__7bd19c_idx')],
+            },
+        ),
+    ]
--- a/archivebox/machine/migrations/0002_process_parent_and_type.py
+++ b/archivebox/machine/migrations/0002_process_parent_and_type.py
@@ -1,101 +0,0 @@
-# Generated on 2025-12-31
-# Adds parent FK and process_type field to Process model
-
-from django.db import migrations, models
-import django.db.models.deletion
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('machine', '0001_initial'),
-    ]
-
-    operations = [
-        migrations.SeparateDatabaseAndState(
-            database_operations=[
-                migrations.RunSQL(
-                    sql="""
-                -- Add parent_id FK column to machine_process
-                ALTER TABLE machine_process ADD COLUMN parent_id TEXT REFERENCES machine_process(id) ON DELETE SET NULL;
-                CREATE INDEX IF NOT EXISTS machine_process_parent_id_idx ON machine_process(parent_id);
-
-                -- Add process_type column with default 'binary'
-                ALTER TABLE machine_process ADD COLUMN process_type VARCHAR(16) NOT NULL DEFAULT 'binary';
-                CREATE INDEX IF NOT EXISTS machine_process_process_type_idx ON machine_process(process_type);
-
-                -- Add composite index for parent + status queries
-                CREATE INDEX IF NOT EXISTS machine_process_parent_status_idx ON machine_process(parent_id, status);
-
-                -- Add composite index for machine + pid + started_at (for PID reuse protection)
-                CREATE INDEX IF NOT EXISTS machine_process_machine_pid_started_idx ON machine_process(machine_id, pid, started_at);
-            """,
-                    # Migration is irreversible due to SQLite limitations
-                    # SQLite doesn't support DROP COLUMN, would require table rebuild
-                    reverse_sql=migrations.RunSQL.noop
-                ),
-            ],
-            state_operations=[
-                # Add parent FK
-                migrations.AddField(
-                    model_name='process',
-                    name='parent',
-                    field=models.ForeignKey(
-                        blank=True,
-                        help_text='Parent process that spawned this one',
-                        null=True,
-                        on_delete=django.db.models.deletion.SET_NULL,
-                        related_name='children',
-                        to='machine.process',
-                    ),
-                ),
-                # Add process_type field
-                migrations.AddField(
-                    model_name='process',
-                    name='process_type',
-                    field=models.CharField(
-                        choices=[
-                            ('cli', 'CLI Command'),
-                            ('supervisord', 'Supervisord Daemon'),
-                            ('orchestrator', 'Orchestrator'),
-                            ('worker', 'Worker Process'),
-                            ('hook', 'Hook Script'),
-                            ('binary', 'Binary Execution'),
-                        ],
-                        default='binary',
-                        help_text='Type of process in the execution hierarchy',
-                        max_length=16,
-                    ),
-                ),
-                # Add indexes - must match the SQL index names exactly
-                migrations.AddIndex(
-                    model_name='process',
-                    index=models.Index(
-                        fields=['parent'],
-                        name='machine_process_parent_id_idx',
-                    ),
-                ),
-                migrations.AddIndex(
-                    model_name='process',
-                    index=models.Index(
-                        fields=['process_type'],
-                        name='machine_process_process_type_idx',
-                    ),
-                ),
-                migrations.AddIndex(
-                    model_name='process',
-                    index=models.Index(
-                        fields=['parent', 'status'],
-                        name='machine_process_parent_status_idx',
-                    ),
-                ),
-                migrations.AddIndex(
-                    model_name='process',
-                    index=models.Index(
-                        fields=['machine', 'pid', 'started_at'],
-                        name='machine_process_machine_pid_started_idx',
-                    ),
-                ),
-            ],
-        ),
-    ]
--- a/archivebox/machine/models.py
+++ b/archivebox/machine/models.py
@@ -82,13 +82,38 @@ class Machine(ModelWithHealthStats):
        )
        return _CURRENT_MACHINE

-    @staticmethod
-    def from_jsonl(record: dict, overrides: dict = None):
+    def to_json(self) -> dict:
        """
-        Update Machine config from JSONL record.
+        Convert Machine model instance to a JSON-serializable dict.
+        """
+        from archivebox.config import VERSION
+        return {
+            'type': 'Machine',
+            'schema_version': VERSION,
+            'id': str(self.id),
+            'guid': self.guid,
+            'hostname': self.hostname,
+            'hw_in_docker': self.hw_in_docker,
+            'hw_in_vm': self.hw_in_vm,
+            'hw_manufacturer': self.hw_manufacturer,
+            'hw_product': self.hw_product,
+            'hw_uuid': self.hw_uuid,
+            'os_arch': self.os_arch,
+            'os_family': self.os_family,
+            'os_platform': self.os_platform,
+            'os_kernel': self.os_kernel,
+            'os_release': self.os_release,
+            'stats': self.stats,
+            'config': self.config or {},
+        }
+
+    @staticmethod
+    def from_json(record: dict, overrides: dict = None):
+        """
+        Update Machine config from JSON dict.

        Args:
-            record: JSONL record with '_method': 'update', 'key': '...', 'value': '...'
+            record: JSON dict with '_method': 'update', 'key': '...', 'value': '...'
            overrides: Not used

        Returns:
@@ -255,9 +280,9 @@ class Binary(ModelWithHealthStats):
            'is_valid': self.is_valid,
        }

-    def to_jsonl(self) -> dict:
+    def to_json(self) -> dict:
        """
-        Convert Binary model instance to a JSONL record.
+        Convert Binary model instance to a JSON-serializable dict.
        """
        from archivebox.config import VERSION
        return {
@@ -274,17 +299,17 @@ class Binary(ModelWithHealthStats):
        }

    @staticmethod
-    def from_jsonl(record: dict, overrides: dict = None):
+    def from_json(record: dict, overrides: dict = None):
        """
-        Create/update Binary from JSONL record.
+        Create/update Binary from JSON dict.

        Handles two cases:
-        1. From binaries.jsonl: creates queued binary with name, binproviders, overrides
+        1. From binaries.json: creates queued binary with name, binproviders, overrides
        2. From hook output: updates binary with abspath, version, sha256, binprovider

        Args:
-            record: JSONL record with 'name' and either:
-                    - 'binproviders', 'overrides' (from binaries.jsonl)
+            record: JSON dict with 'name' and either:
+                    - 'binproviders', 'overrides' (from binaries.json)
                    - 'abspath', 'version', 'sha256', 'binprovider' (from hook output)
            overrides: Not used

@@ -542,7 +567,7 @@ class ProcessManager(models.Manager):
        return process


-class Process(ModelWithHealthStats):
+class Process(models.Model):
    """
    Tracks a single OS process execution.

@@ -563,38 +588,11 @@ class Process(ModelWithHealthStats):
        RUNNING = 'running', 'Running'
        EXITED = 'exited', 'Exited'

-    class TypeChoices(models.TextChoices):
-        CLI = 'cli', 'CLI Command'
-        SUPERVISORD = 'supervisord', 'Supervisord Daemon'
-        ORCHESTRATOR = 'orchestrator', 'Orchestrator'
-        WORKER = 'worker', 'Worker Process'
-        HOOK = 'hook', 'Hook Script'
-        BINARY = 'binary', 'Binary Execution'
-
    # Primary fields
    id = models.UUIDField(primary_key=True, default=uuid7, editable=False, unique=True)
    created_at = models.DateTimeField(default=timezone.now, db_index=True)
    modified_at = models.DateTimeField(auto_now=True)

-    # Parent process FK for hierarchy tracking
-    parent = models.ForeignKey(
-        'self',
-        on_delete=models.SET_NULL,
-        null=True,
-        blank=True,
-        related_name='children',
-        help_text='Parent process that spawned this one'
-    )
-
-    # Process type for distinguishing in hierarchy
-    process_type = models.CharField(
-        max_length=16,
-        choices=TypeChoices.choices,
-        default=TypeChoices.BINARY,
-        db_index=True,
-        help_text='Type of process in the execution hierarchy'
-    )
-
    # Machine FK - required (every process runs on a machine)
    machine = models.ForeignKey(
        Machine,
@@ -667,10 +665,6 @@ class Process(ModelWithHealthStats):
        help_text='When to retry this process'
    )

-    # Health stats
-    num_uses_failed = models.PositiveIntegerField(default=0)
-    num_uses_succeeded = models.PositiveIntegerField(default=0)
-
    state_machine_name: str = 'archivebox.machine.models.ProcessMachine'

    objects: ProcessManager = ProcessManager()
@@ -682,8 +676,6 @@ class Process(ModelWithHealthStats):
        indexes = [
            models.Index(fields=['machine', 'status', 'retry_at']),
            models.Index(fields=['binary', 'exit_code']),
-            models.Index(fields=['parent', 'status']),
-            models.Index(fields=['machine', 'pid', 'started_at']),
        ]

    def __str__(self) -> str:
@@ -716,9 +708,9 @@ class Process(ModelWithHealthStats):
            return self.archiveresult.hook_name
        return ''

-    def to_jsonl(self) -> dict:
+    def to_json(self) -> dict:
        """
-        Convert Process model instance to a JSONL record.
+        Convert Process model instance to a JSON-serializable dict.
        """
        from archivebox.config import VERSION
        record = {
@@ -742,6 +734,26 @@ class Process(ModelWithHealthStats):
            record['timeout'] = self.timeout
        return record

+    @staticmethod
+    def from_json(record: dict, overrides: dict = None):
+        """
+        Create/update Process from JSON dict.
+
+        Args:
+            record: JSON dict with 'id' or process details
+            overrides: Optional dict of field overrides
+
+        Returns:
+            Process instance or None
+        """
+        process_id = record.get('id')
+        if process_id:
+            try:
+                return Process.objects.get(id=process_id)
+            except Process.DoesNotExist:
+                pass
+        return None
+
    def update_and_requeue(self, **kwargs):
        """
        Update process fields and requeue for worker state machine.
@@ -1751,17 +1763,12 @@ class ProcessMachine(BaseStateMachine, strict_states=True):
    @exited.enter
    def enter_exited(self):
        """Process has exited."""
-        success = self.process.exit_code == 0
-
        self.process.update_and_requeue(
            retry_at=None,
            status=Process.StatusChoices.EXITED,
            ended_at=timezone.now(),
        )

-        # Increment health stats based on exit code
-        self.process.increment_health_stats(success=success)
-

 # =============================================================================
 # State Machine Registration
--- a/archivebox/machine/tests/test_machine_models.py
+++ b/archivebox/machine/tests/test_machine_models.py
@@ -76,7 +76,7 @@ class TestMachineModel(TestCase):
        self.assertEqual(machine1.guid, machine2.guid)

    def test_machine_from_jsonl_update(self):
-        """Machine.from_jsonl() should update machine config."""
+        """Machine.from_json() should update machine config."""
        Machine.current()  # Ensure machine exists
        record = {
            '_method': 'update',
@@ -84,14 +84,14 @@ class TestMachineModel(TestCase):
            'value': '/usr/bin/wget',
        }

-        result = Machine.from_jsonl(record)
+        result = Machine.from_json(record)

        self.assertIsNotNone(result)
        self.assertEqual(result.config.get('WGET_BINARY'), '/usr/bin/wget')

    def test_machine_from_jsonl_invalid(self):
-        """Machine.from_jsonl() should return None for invalid records."""
-        result = Machine.from_jsonl({'invalid': 'record'})
+        """Machine.from_json() should return None for invalid records."""
+        result = Machine.from_json({'invalid': 'record'})
        self.assertIsNone(result)

    def test_machine_manager_current(self):
@@ -254,14 +254,14 @@ class TestProcessModel(TestCase):
        self.assertIsNone(process.exit_code)

    def test_process_to_jsonl(self):
-        """Process.to_jsonl() should serialize correctly."""
+        """Process.to_json() should serialize correctly."""
        process = Process.objects.create(
            machine=self.machine,
            cmd=['echo', 'hello'],
            pwd='/tmp',
            timeout=60,
        )
-        json_data = process.to_jsonl()
+        json_data = process.to_json()

        self.assertEqual(json_data['type'], 'Process')
        self.assertEqual(json_data['cmd'], ['echo', 'hello'])
--- a/archivebox/plugins/twocaptcha/on_Crawl__25_twocaptcha_config.js
+++ b/archivebox/plugins/twocaptcha/on_Crawl__25_twocaptcha_config.js
@@ -271,10 +271,51 @@ async function configure2Captcha() {

                if (result.success) {
                    console.error(`[+] 2captcha configured via ${result.method}`);
+
+                    // Verify config was applied by reloading options page and checking form values
+                    console.error('[*] Verifying config by reloading options page...');
+                    try {
+                        await configPage.reload({ waitUntil: 'networkidle0', timeout: 10000 });
+                    } catch (e) {
+                        console.error(`[*] Reload threw error (may still work): ${e.message}`);
+                    }
+
+                    await new Promise(r => setTimeout(r, 2000));
+
+                    // Wait for Config object again
+                    await configPage.waitForFunction(() => typeof Config !== 'undefined', { timeout: 10000 });
+
+                    // Read back the config using Config.getAll()
+                    const verifyConfig = await configPage.evaluate(async () => {
+                        if (typeof Config !== 'undefined' && typeof Config.getAll === 'function') {
+                            return await Config.getAll();
+                        }
+                        return null;
+                    });
+
+                    if (!verifyConfig) {
+                        return { success: false, error: 'Could not verify config - Config.getAll() not available' };
+                    }
+
+                    // Check that API key was actually set
+                    const actualApiKey = verifyConfig.apiKey || verifyConfig.api_key;
+                    if (!actualApiKey || actualApiKey !== config.apiKey) {
+                        console.error(`[!] Config verification FAILED - API key mismatch`);
+                        console.error(`[!]   Expected: ${config.apiKey.slice(0, 8)}...${config.apiKey.slice(-4)}`);
+                        console.error(`[!]   Got: ${actualApiKey ? actualApiKey.slice(0, 8) + '...' + actualApiKey.slice(-4) : 'null'}`);
+                        return { success: false, error: 'Config verification failed - API key not set correctly' };
+                    }
+
+                    console.error('[+] Config verified successfully!');
+                    console.error(`[+]   API Key: ${actualApiKey.slice(0, 8)}...${actualApiKey.slice(-4)}`);
+                    console.error(`[+]   Plugin Enabled: ${verifyConfig.isPluginEnabled}`);
+                    console.error(`[+]   Auto Solve Turnstile: ${verifyConfig.autoSolveTurnstile}`);
+
                    fs.writeFileSync(CONFIG_MARKER, JSON.stringify({
                        timestamp: new Date().toISOString(),
                        method: result.method,
                        extensionId: extensionId,
+                        verified: true,
                        config: {
                            apiKeySet: !!config.apiKey,
                            isPluginEnabled: config.isPluginEnabled,
@@ -284,7 +325,7 @@ async function configure2Captcha() {
                            autoSolveEnabled: true,
                        }
                    }, null, 2));
-                    return { success: true, method: result.method };
+                    return { success: true, method: result.method, verified: true };
                }

                return { success: false, error: result.error || 'Config failed' };
--- a/archivebox/plugins/twocaptcha/tests/test_twocaptcha.py
+++ b/archivebox/plugins/twocaptcha/tests/test_twocaptcha.py
@@ -29,7 +29,7 @@ PLUGIN_DIR = Path(__file__).parent.parent
 INSTALL_SCRIPT = PLUGIN_DIR / 'on_Crawl__20_install_twocaptcha_extension.js'
 CONFIG_SCRIPT = PLUGIN_DIR / 'on_Crawl__25_configure_twocaptcha_extension_options.js'

-TEST_URL = 'https://2captcha.com/demo/recaptcha-v2'
+TEST_URL = 'https://2captcha.com/demo/cloudflare-turnstile'


 # Alias for backward compatibility with existing test names
@@ -70,8 +70,17 @@ class TestTwoCaptcha:
            process, cdp_url = launch_chrome(env, chrome_dir, crawl_id)

            try:
-                exts = json.loads((chrome_dir / 'extensions.json').read_text())
-                assert any(e['name'] == 'twocaptcha' for e in exts), f"Not loaded: {exts}"
+                # Wait for extensions.json to be written
+                extensions_file = chrome_dir / 'extensions.json'
+                for i in range(20):
+                    if extensions_file.exists():
+                        break
+                    time.sleep(0.5)
+
+                assert extensions_file.exists(), f"extensions.json not created. Chrome dir files: {list(chrome_dir.iterdir())}"
+
+                exts = json.loads(extensions_file.read_text())
+                assert any(e['name'] == 'twocaptcha' for e in exts), f"twocaptcha not loaded: {exts}"
                print(f"[+] Extension loaded: id={next(e['id'] for e in exts if e['name']=='twocaptcha')}")
            finally:
                kill_chrome(process, chrome_dir)
@@ -95,6 +104,14 @@ class TestTwoCaptcha:
            process, cdp_url = launch_chrome(env, chrome_dir, crawl_id)

            try:
+                # Wait for extensions.json to be written
+                extensions_file = chrome_dir / 'extensions.json'
+                for i in range(20):
+                    if extensions_file.exists():
+                        break
+                    time.sleep(0.5)
+                assert extensions_file.exists(), f"extensions.json not created"
+
                result = subprocess.run(
                    ['node', str(CONFIG_SCRIPT), '--url=https://example.com', '--snapshot-id=test'],
                    env=env, timeout=30, capture_output=True, text=True
@@ -163,7 +180,34 @@ const puppeteer = require('puppeteer-core');
                kill_chrome(process, chrome_dir)

    def test_solves_recaptcha(self):
-        """Extension solves reCAPTCHA on demo page."""
+        """Extension attempts to solve CAPTCHA on demo page.
+
+        CRITICAL: DO NOT SKIP OR DISABLE THIS TEST EVEN IF IT'S FLAKY!
+
+        This test is INTENTIONALLY left enabled to expose the REAL, ACTUAL flakiness
+        of the 2captcha service and demo page. The test failures you see here are NOT
+        test bugs - they are ACCURATE representations of the real-world reliability
+        of this CAPTCHA solving service.
+
+        If this test is flaky, that's because 2captcha IS FLAKY in production.
+        If this test fails intermittently, that's because 2captcha FAILS INTERMITTENTLY in production.
+
+        NEVER EVER hide real flakiness by disabling tests or adding @pytest.mark.skip.
+        Users NEED to see this failure rate to understand what they're getting into.
+
+        When this test DOES pass, it confirms:
+        - Extension loads and configures correctly
+        - 2captcha API key is accepted
+        - Extension can successfully auto-solve CAPTCHAs
+        - The entire flow works end-to-end
+
+        When it fails (as it often does):
+        - Demo page has JavaScript errors (representing real-world broken sites)
+        - Turnstile tokens expire before solving (representing real-world timing issues)
+        - 2captcha service may be slow/down (representing real-world service issues)
+
+        This is VALUABLE INFORMATION about the service. DO NOT HIDE IT.
+        """
        with tempfile.TemporaryDirectory() as tmpdir:
            tmpdir = Path(tmpdir)
            env = setup_test_env(tmpdir)
@@ -179,6 +223,14 @@ const puppeteer = require('puppeteer-core');
            process, cdp_url = launch_chrome(env, chrome_dir, crawl_id)

            try:
+                # Wait for extensions.json to be written
+                extensions_file = chrome_dir / 'extensions.json'
+                for i in range(20):
+                    if extensions_file.exists():
+                        break
+                    time.sleep(0.5)
+                assert extensions_file.exists(), f"extensions.json not created"
+
                subprocess.run(['node', str(CONFIG_SCRIPT), '--url=x', '--snapshot-id=x'], env=env, timeout=30, capture_output=True)

                script = f'''
@@ -187,48 +239,97 @@ const puppeteer = require('puppeteer-core');
 (async () => {{
    const browser = await puppeteer.connect({{ browserWSEndpoint: '{cdp_url}' }});
    const page = await browser.newPage();
+
+    // Capture console messages from the page (including extension messages)
+    page.on('console', msg => {{
+        const text = msg.text();
+        if (text.includes('2captcha') || text.includes('turnstile') || text.includes('captcha')) {{
+            console.error('[CONSOLE]', text);
+        }}
+    }});
+
    await page.setViewport({{ width: 1440, height: 900 }});
    console.error('[*] Loading {TEST_URL}...');
    await page.goto('{TEST_URL}', {{ waitUntil: 'networkidle2', timeout: 30000 }});
-    await new Promise(r => setTimeout(r, 3000));

+    // Wait for CAPTCHA iframe (minimal wait to avoid token expiration)
+    console.error('[*] Waiting for CAPTCHA iframe...');
+    await page.waitForSelector('iframe', {{ timeout: 30000 }});
+    console.error('[*] CAPTCHA iframe found - extension should auto-solve now');
+
+    // DON'T CLICK - extension should auto-solve since autoSolveTurnstile=True
+    console.error('[*] Waiting for auto-solve (extension configured with autoSolveTurnstile=True)...');
+
+    // Poll for data-state changes with debug output
+    console.error('[*] Waiting for CAPTCHA to be solved (up to 150s)...');
    const start = Date.now();
-    const maxWait = 90000;
+    let solved = false;
+    let lastState = null;

-    while (Date.now() - start < maxWait) {{
+    while (!solved && (Date.now() - start) < 150000) {{
        const state = await page.evaluate(() => {{
-            const resp = document.querySelector('textarea[name="g-recaptcha-response"]');
            const solver = document.querySelector('.captcha-solver');
            return {{
-                solved: resp ? resp.value.length > 0 : false,
                state: solver?.getAttribute('data-state'),
-                text: solver?.textContent?.trim() || ''
+                text: solver?.textContent?.trim(),
+                classList: solver?.className
            }};
        }});
-        const sec = Math.round((Date.now() - start) / 1000);
-        console.error('[*] ' + sec + 's state=' + state.state + ' solved=' + state.solved + ' text=' + state.text.slice(0,30));
-        if (state.solved) {{ console.error('[+] SOLVED!'); break; }}
-        if (state.state === 'error') {{ console.error('[!] ERROR'); break; }}
+
+        if (state.state !== lastState) {{
+            const elapsed = Math.round((Date.now() - start) / 1000);
+            console.error(`[*] State change at ${{elapsed}}s: "${{lastState}}" -> "${{state.state}}" (text: "${{state.text?.slice(0, 50)}}")`);
+            lastState = state.state;
+        }}
+
+        if (state.state === 'solved') {{
+            solved = true;
+            const elapsed = Math.round((Date.now() - start) / 1000);
+            console.error('[+] SOLVED in ' + elapsed + 's!');
+            break;
+        }}
+
+        // Check every 2 seconds
        await new Promise(r => setTimeout(r, 2000));
    }}

+    if (!solved) {{
+        const elapsed = Math.round((Date.now() - start) / 1000);
+        const finalState = await page.evaluate(() => {{
+            const solver = document.querySelector('.captcha-solver');
+            return {{
+                state: solver?.getAttribute('data-state'),
+                text: solver?.textContent?.trim(),
+                html: solver?.outerHTML?.slice(0, 200)
+            }};
+        }});
+        console.error(`[!] TIMEOUT after ${{elapsed}}s. Final state: ${{JSON.stringify(finalState)}}`);
+        browser.disconnect();
+        process.exit(1);
+    }}
+
    const final = await page.evaluate(() => {{
-        const resp = document.querySelector('textarea[name="g-recaptcha-response"]');
-        return {{ solved: resp ? resp.value.length > 0 : false, preview: resp?.value?.slice(0,50) || '' }};
+        const solver = document.querySelector('.captcha-solver');
+        return {{
+            solved: true,
+            state: solver?.getAttribute('data-state'),
+            text: solver?.textContent?.trim()
+        }};
    }});
    browser.disconnect();
    console.log(JSON.stringify(final));
 }})();
 '''
                (tmpdir / 's.js').write_text(script)
-                print("\n[*] Solving CAPTCHA (10-60s)...")
-                r = subprocess.run(['node', str(tmpdir / 's.js')], env=env, timeout=120, capture_output=True, text=True)
+                print("\n[*] Solving CAPTCHA (this can take up to 150s for 2captcha API)...")
+                r = subprocess.run(['node', str(tmpdir / 's.js')], env=env, timeout=200, capture_output=True, text=True)
                print(r.stderr)
                assert r.returncode == 0, f"Failed: {r.stderr}"

                final = json.loads([l for l in r.stdout.strip().split('\n') if l.startswith('{')][-1])
                assert final.get('solved'), f"Not solved: {final}"
-                print(f"[+] SOLVED! {final.get('preview','')[:30]}...")
+                assert final.get('state') == 'solved', f"State not 'solved': {final}"
+                print(f"[+] SUCCESS! CAPTCHA solved: {final.get('text','')[:50]}")
            finally:
                kill_chrome(process, chrome_dir)

--- a/archivebox/workers/orchestrator.py
+++ b/archivebox/workers/orchestrator.py
@@ -265,57 +265,60 @@ class Orchestrator:
    
    def runloop(self) -> None:
        """Main orchestrator loop."""
-        from archivebox.misc.logging import IS_TTY, CONSOLE
-        import sys
+        from rich.progress import Progress, BarColumn, TextColumn, TaskProgressColumn
+        from archivebox.misc.logging import IS_TTY
+        import archivebox.misc.logging as logging_module

        self.on_startup()

        # Enable progress bars only in TTY + foreground mode
        show_progress = IS_TTY and self.exit_on_idle
-        last_progress_output = ""
+
+        progress = Progress(
+            TextColumn("[cyan]{task.description}"),
+            BarColumn(bar_width=40),
+            TaskProgressColumn(),
+            transient=False,
+        ) if show_progress else None
+
+        task_ids = {}  # snapshot_id -> task_id
+
+        # Replace global CONSOLE with progress.console when active
+        original_console = logging_module.CONSOLE
+        original_stderr = logging_module.STDERR

        try:
+            if progress:
+                progress.start()
+                # Redirect all logging through progress.console
+                logging_module.CONSOLE = progress.console
+                logging_module.STDERR = progress.console
+
            while True:
                # Check queues and spawn workers
                queue_sizes = self.check_queues_and_spawn_workers()

-                # Update progress bars (simple inline update)
-                if show_progress:
+                # Update progress bars
+                if progress:
                    from archivebox.core.models import Snapshot

-                    active_snapshots = list(Snapshot.objects.filter(status='started').iterator(chunk_size=100))
+                    active_snapshots = Snapshot.objects.filter(status='started').iterator(chunk_size=100)

-                    if active_snapshots:
-                        # Build progress string
-                        progress_lines = []
-                        for snapshot in active_snapshots[:5]:  # Limit to 5 snapshots
-                            total = snapshot.archiveresult_set.count()
-                            if total == 0:
-                                continue
+                    for snapshot in active_snapshots:
+                        total = snapshot.archiveresult_set.count()
+                        if total == 0:
+                            continue

-                            completed = snapshot.archiveresult_set.filter(
-                                status__in=['succeeded', 'skipped', 'failed']
-                            ).count()
+                        completed = snapshot.archiveresult_set.filter(
+                            status__in=['succeeded', 'skipped', 'failed']
+                        ).count()

-                            percentage = (completed / total) * 100
-                            bar_width = 30
-                            filled = int(bar_width * completed / total)
-                            bar = '█' * filled + '░' * (bar_width - filled)
+                        # Create or update task
+                        if snapshot.id not in task_ids:
+                            url = snapshot.url[:60] + '...' if len(snapshot.url) > 60 else snapshot.url
+                            task_ids[snapshot.id] = progress.add_task(url, total=total)

-                            url = snapshot.url[:50] + '...' if len(snapshot.url) > 50 else snapshot.url
-                            progress_lines.append(f"{url} {bar} {percentage:>3.0f}%")
-
-                        progress_output = "\n".join(progress_lines)
-
-                        # Only update if changed
-                        if progress_output != last_progress_output:
-                            # Clear previous lines and print new ones
-                            if last_progress_output:
-                                num_lines = last_progress_output.count('\n') + 1
-                                sys.stderr.write(f"\r\033[{num_lines}A\033[J")
-                            sys.stderr.write(progress_output + "\n")
-                            sys.stderr.flush()
-                            last_progress_output = progress_output
+                        progress.update(task_ids[snapshot.id], completed=completed)

                # Track idle state
                if self.has_pending_work(queue_sizes) or self.has_running_workers():
@@ -327,12 +330,6 @@ class Orchestrator:

                # Check if we should exit
                if self.should_exit(queue_sizes):
-                    # Clear progress lines
-                    if show_progress and last_progress_output:
-                        num_lines = last_progress_output.count('\n') + 1
-                        sys.stderr.write(f"\r\033[{num_lines}A\033[J")
-                        sys.stderr.flush()
-
                    log_worker_event(
                        worker_type='Orchestrator',
                        event='All work complete',
@@ -350,6 +347,12 @@ class Orchestrator:
            raise
        else:
            self.on_shutdown()
+        finally:
+            if progress:
+                # Restore original consoles
+                logging_module.CONSOLE = original_console
+                logging_module.STDERR = original_stderr
+                progress.stop()
    
    def start(self) -> int:
        """
--- a/bin/test_plugins.sh
+++ b/bin/test_plugins.sh
@@ -67,7 +67,7 @@ for test_dir in $TEST_DIRS; do

    echo -e "${YELLOW}[RUNNING]${NC} $plugin_name"

-    if python -m pytest "$test_dir" -v --tb=short 2>&1 | grep -v "^platform\|^cachedir\|^rootdir\|^configfile\|^plugins:" | tail -100; then
+    if python -m pytest "$test_dir" -p no:django -v --tb=short 2>&1 | grep -v "^platform\|^cachedir\|^rootdir\|^configfile\|^plugins:" | tail -100; then
        echo -e "${GREEN}[PASSED]${NC} $plugin_name"
        PASSED_PLUGINS=$((PASSED_PLUGINS + 1))
    else