WIP: checkpoint working tree before rebasing onto dev

2026-04-06 07:47:53 +10:00 · 2026-03-22 20:23:45 -07:00
parent a6548df8d0
commit f400a2cd67
87 changed files with 12607 additions and 1808 deletions
--- a/.github/workflows/release-runner.yml
+++ b/.github/workflows/release-runner.yml
@@ -0,0 +1,45 @@
+name: Release State
+
+on:
+  push:
+    branches:
+      - '**'
+  workflow_dispatch:
+
+permissions:
+  contents: write
+  id-token: write
+
+jobs:
+  release-state:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          submodules: true
+          ref: ${{ github.ref_name }}
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.13"
+
+      - uses: astral-sh/setup-uv@v6
+        with:
+          enable-cache: true
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+
+      - name: Configure git identity
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+
+      - name: Run release script
+        env:
+          DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
+          GH_TOKEN: ${{ github.token }}
+          PYPI_PAT_SECRET: ${{ secrets.PYPI_PAT_SECRET }}
+        run: ./bin/release.sh
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -9,7 +9,6 @@ name: Release
 # This workflow ensures the correct ordering during a release.

 on:
-  workflow_dispatch:
  release:
    types: [published]

--- a/archivebox/api/urls.py
+++ b/archivebox/api/urls.py
@@ -6,8 +6,9 @@ from django.views.generic.base import RedirectView
 from .v1_api import urls as v1_api_urls

 urlpatterns = [
-    path("",                 RedirectView.as_view(url='/api/v1')),
+    path("",                 RedirectView.as_view(url='/api/v1/docs')),

+    path("v1/",              RedirectView.as_view(url='/api/v1/docs')),
    path("v1/",              v1_api_urls),
    path("v1",               RedirectView.as_view(url='/api/v1/docs')),

--- a/archivebox/api/v1_core.py
+++ b/archivebox/api/v1_core.py
@@ -6,7 +6,8 @@ from typing import List, Optional, Union, Any, Annotated
 from datetime import datetime

 from django.db.models import Model, Q
-from django.http import HttpRequest
+from django.conf import settings
+from django.http import HttpRequest, HttpResponse
 from django.core.exceptions import ValidationError
 from django.contrib.auth import get_user_model
 from django.contrib.auth.models import User
@@ -18,6 +19,22 @@ from ninja.pagination import paginate, PaginationBase
 from ninja.errors import HttpError

 from archivebox.core.models import Snapshot, ArchiveResult, Tag
+from archivebox.api.auth import auth_using_token
+from archivebox.config.common import SERVER_CONFIG
+from archivebox.core.tag_utils import (
+    build_tag_cards,
+    delete_tag as delete_tag_record,
+    export_tag_snapshots_jsonl,
+    export_tag_urls,
+    get_matching_tags,
+    get_or_create_tag,
+    get_tag_by_ref,
+    normalize_created_by_filter,
+    normalize_created_year_filter,
+    normalize_has_snapshots_filter,
+    normalize_tag_sort,
+    rename_tag as rename_tag_record,
+)
 from archivebox.crawls.models import Crawl
 from archivebox.api.v1_crawls import CrawlSchema

@@ -404,7 +421,7 @@ class TagSchema(Schema):
 def get_tags(request: HttpRequest):
    setattr(request, 'with_snapshots', False)
    setattr(request, 'with_archiveresults', False)
-    return Tag.objects.all().distinct()
+    return get_matching_tags()


@router.get("/tag/{tag_id}", response=TagSchema, url_name="get_tag")
@@ -412,9 +429,9 @@ def get_tag(request: HttpRequest, tag_id: str, with_snapshots: bool = True):
    setattr(request, 'with_snapshots', with_snapshots)
    setattr(request, 'with_archiveresults', False)
    try:
-        return Tag.objects.get(id__icontains=tag_id)
+        return get_tag_by_ref(tag_id)
    except (Tag.DoesNotExist, ValidationError):
-        return Tag.objects.get(slug__icontains=tag_id)
+        raise HttpError(404, 'Tag not found')


@router.get("/any/{id}", response=Union[SnapshotSchema, ArchiveResultSchema, TagSchema, CrawlSchema], url_name="get_any", summary="Get any object by its ID")
@@ -459,6 +476,55 @@ class TagCreateResponseSchema(Schema):
    created: bool


+class TagSearchSnapshotSchema(Schema):
+    id: str
+    title: str
+    url: str
+    favicon_url: str
+    admin_url: str
+    archive_url: str
+    downloaded_at: Optional[str] = None
+
+
+class TagSearchCardSchema(Schema):
+    id: int
+    name: str
+    slug: str
+    num_snapshots: int
+    filter_url: str
+    edit_url: str
+    export_urls_url: str
+    export_jsonl_url: str
+    rename_url: str
+    delete_url: str
+    snapshots: List[TagSearchSnapshotSchema]
+
+
+class TagSearchResponseSchema(Schema):
+    tags: List[TagSearchCardSchema]
+    sort: str
+    created_by: str
+    year: str
+    has_snapshots: str
+
+
+class TagUpdateSchema(Schema):
+    name: str
+
+
+class TagUpdateResponseSchema(Schema):
+    success: bool
+    tag_id: int
+    tag_name: str
+    slug: str
+
+
+class TagDeleteResponseSchema(Schema):
+    success: bool
+    tag_id: int
+    deleted_count: int
+
+
 class TagSnapshotRequestSchema(Schema):
    snapshot_id: str
    tag_name: Optional[str] = None
@@ -471,41 +537,82 @@ class TagSnapshotResponseSchema(Schema):
    tag_name: str


-@router.get("/tags/autocomplete/", response=TagAutocompleteSchema, url_name="tags_autocomplete")
+@router.get("/tags/search/", response=TagSearchResponseSchema, url_name="search_tags")
+def search_tags(
+    request: HttpRequest,
+    q: str = "",
+    sort: str = 'created_desc',
+    created_by: str = '',
+    year: str = '',
+    has_snapshots: str = 'all',
+):
+    """Return detailed tag cards for admin/live-search UIs."""
+    normalized_sort = normalize_tag_sort(sort)
+    normalized_created_by = normalize_created_by_filter(created_by)
+    normalized_year = normalize_created_year_filter(year)
+    normalized_has_snapshots = normalize_has_snapshots_filter(has_snapshots)
+    return {
+        'tags': build_tag_cards(
+            query=q,
+            request=request,
+            sort=normalized_sort,
+            created_by=normalized_created_by,
+            year=normalized_year,
+            has_snapshots=normalized_has_snapshots,
+        ),
+        'sort': normalized_sort,
+        'created_by': normalized_created_by,
+        'year': normalized_year,
+        'has_snapshots': normalized_has_snapshots,
+    }
+
+
+def _public_tag_listing_enabled() -> bool:
+    explicit = getattr(settings, 'PUBLIC_SNAPSHOTS_LIST', None)
+    if explicit is not None:
+        return bool(explicit)
+    return bool(getattr(settings, 'PUBLIC_INDEX', SERVER_CONFIG.PUBLIC_INDEX))
+
+
+def _request_has_tag_autocomplete_access(request: HttpRequest) -> bool:
+    user = getattr(request, 'user', None)
+    if getattr(user, 'is_authenticated', False):
+        return True
+
+    token = request.GET.get('api_key') or request.headers.get('X-ArchiveBox-API-Key')
+    auth_header = request.headers.get('Authorization', '')
+    if not token and auth_header.lower().startswith('bearer '):
+        token = auth_header.split(None, 1)[1].strip()
+
+    if token and auth_using_token(token=token, request=request):
+        return True
+
+    return _public_tag_listing_enabled()
+
+
+@router.get("/tags/autocomplete/", response=TagAutocompleteSchema, url_name="tags_autocomplete", auth=None)
 def tags_autocomplete(request: HttpRequest, q: str = ""):
    """Return tags matching the query for autocomplete."""
-    if not q:
-        # Return all tags if no query (limited to 50)
-        tags = Tag.objects.all().order_by('name')[:50]
-    else:
-        tags = Tag.objects.filter(name__icontains=q).order_by('name')[:20]
+    if not _request_has_tag_autocomplete_access(request):
+        raise HttpError(401, 'Authentication required')
+
+    tags = get_matching_tags(q)[:50 if not q else 20]

    return {
-        'tags': [{'id': tag.pk, 'name': tag.name, 'slug': tag.slug} for tag in tags]
+        'tags': [{'id': tag.pk, 'name': tag.name, 'slug': tag.slug, 'num_snapshots': getattr(tag, 'num_snapshots', 0)} for tag in tags]
    }


@router.post("/tags/create/", response=TagCreateResponseSchema, url_name="tags_create")
 def tags_create(request: HttpRequest, data: TagCreateSchema):
    """Create a new tag or return existing one."""
-    name = data.name.strip()
-    if not name:
-        raise HttpError(400, 'Tag name is required')
-
-    tag, created = Tag.objects.get_or_create(
-        name__iexact=name,
-        defaults={
-            'name': name,
-            'created_by': request.user if request.user.is_authenticated else None,
-        }
-    )
-
-    # If found by case-insensitive match, use that tag
-    if not created:
-        existing_tag = Tag.objects.filter(name__iexact=name).first()
-        if existing_tag is None:
-            raise HttpError(500, 'Failed to load existing tag after get_or_create')
-        tag = existing_tag
+    try:
+        tag, created = get_or_create_tag(
+            data.name,
+            created_by=request.user if request.user.is_authenticated else None,
+        )
+    except ValueError as err:
+        raise HttpError(400, str(err)) from err

    return {
        'success': True,
@@ -515,6 +622,62 @@ def tags_create(request: HttpRequest, data: TagCreateSchema):
    }


+@router.post("/tag/{tag_id}/rename", response=TagUpdateResponseSchema, url_name="rename_tag")
+def rename_tag(request: HttpRequest, tag_id: int, data: TagUpdateSchema):
+    try:
+        tag = rename_tag_record(get_tag_by_ref(tag_id), data.name)
+    except Tag.DoesNotExist as err:
+        raise HttpError(404, 'Tag not found') from err
+    except ValueError as err:
+        raise HttpError(400, str(err)) from err
+
+    return {
+        'success': True,
+        'tag_id': tag.pk,
+        'tag_name': tag.name,
+        'slug': tag.slug,
+    }
+
+
+@router.delete("/tag/{tag_id}", response=TagDeleteResponseSchema, url_name="delete_tag")
+def delete_tag(request: HttpRequest, tag_id: int):
+    try:
+        tag = get_tag_by_ref(tag_id)
+    except Tag.DoesNotExist as err:
+        raise HttpError(404, 'Tag not found') from err
+
+    deleted_count, _ = delete_tag_record(tag)
+    return {
+        'success': True,
+        'tag_id': int(tag_id),
+        'deleted_count': deleted_count,
+    }
+
+
+@router.get("/tag/{tag_id}/urls.txt", url_name="tag_urls_export")
+def tag_urls_export(request: HttpRequest, tag_id: int):
+    try:
+        tag = get_tag_by_ref(tag_id)
+    except Tag.DoesNotExist as err:
+        raise HttpError(404, 'Tag not found') from err
+
+    response = HttpResponse(export_tag_urls(tag), content_type='text/plain; charset=utf-8')
+    response['Content-Disposition'] = f'attachment; filename="tag-{tag.slug}-urls.txt"'
+    return response
+
+
+@router.get("/tag/{tag_id}/snapshots.jsonl", url_name="tag_snapshots_export")
+def tag_snapshots_export(request: HttpRequest, tag_id: int):
+    try:
+        tag = get_tag_by_ref(tag_id)
+    except Tag.DoesNotExist as err:
+        raise HttpError(404, 'Tag not found') from err
+
+    response = HttpResponse(export_tag_snapshots_jsonl(tag), content_type='application/x-ndjson; charset=utf-8')
+    response['Content-Disposition'] = f'attachment; filename="tag-{tag.slug}-snapshots.jsonl"'
+    return response
+
+
@router.post("/tags/add-to-snapshot/", response=TagSnapshotResponseSchema, url_name="tags_add_to_snapshot")
 def tags_add_to_snapshot(request: HttpRequest, data: TagSnapshotRequestSchema):
    """Add a tag to a snapshot. Creates the tag if it doesn't exist."""
@@ -534,24 +697,16 @@ def tags_add_to_snapshot(request: HttpRequest, data: TagSnapshotRequestSchema):

    # Get or create the tag
    if data.tag_name:
-        name = data.tag_name.strip()
-        if not name:
-            raise HttpError(400, 'Tag name is required')
-
-        tag, _ = Tag.objects.get_or_create(
-            name__iexact=name,
-            defaults={
-                'name': name,
-                'created_by': request.user if request.user.is_authenticated else None,
-            }
-        )
-        # If found by case-insensitive match, use that tag
-        existing_tag = Tag.objects.filter(name__iexact=name).first()
-        if existing_tag is not None:
-            tag = existing_tag
+        try:
+            tag, _ = get_or_create_tag(
+                data.tag_name,
+                created_by=request.user if request.user.is_authenticated else None,
+            )
+        except ValueError as err:
+            raise HttpError(400, str(err)) from err
    elif data.tag_id:
        try:
-            tag = Tag.objects.get(pk=data.tag_id)
+            tag = get_tag_by_ref(data.tag_id)
        except Tag.DoesNotExist:
            raise HttpError(404, 'Tag not found')
    else:
--- a/archivebox/base_models/admin.py
+++ b/archivebox/base_models/admin.py
@@ -4,7 +4,7 @@ __package__ = 'archivebox.base_models'

 import json
 from collections.abc import Mapping
-from typing import TypedDict
+from typing import NotRequired, TypedDict

 from django import forms
 from django.contrib import admin
@@ -17,9 +17,13 @@ from django_object_actions import DjangoObjectActions

 class ConfigOption(TypedDict):
    plugin: str
-    type: str
+    type: str | list[str]
    default: object
    description: str
+    enum: NotRequired[list[object]]
+    pattern: NotRequired[str]
+    minimum: NotRequired[int | float]
+    maximum: NotRequired[int | float]


 class KeyValueWidget(forms.Widget):
@@ -44,12 +48,16 @@ class KeyValueWidget(forms.Widget):
            options: dict[str, ConfigOption] = {}
            for plugin_name, schema in plugin_configs.items():
                for key, prop in schema.get('properties', {}).items():
-                    options[key] = {
+                    option: ConfigOption = {
                        'plugin': plugin_name,
                        'type': prop.get('type', 'string'),
                        'default': prop.get('default', ''),
                        'description': prop.get('description', ''),
                    }
+                    for schema_key in ('enum', 'pattern', 'minimum', 'maximum'):
+                        if schema_key in prop:
+                            option[schema_key] = prop[schema_key]
+                    options[key] = option
            return options
        except Exception:
            return {}
@@ -98,14 +106,12 @@ class KeyValueWidget(forms.Widget):
        '''

        # Render existing key-value pairs
-        row_idx = 0
        for key, val in data.items():
            val_str = json.dumps(val) if not isinstance(val, str) else val
-            html += self._render_row(widget_id, row_idx, key, val_str)
-            row_idx += 1
+            html += self._render_row(widget_id, key, val_str)

        # Always add one empty row for new entries
-        html += self._render_row(widget_id, row_idx, '', '')
+        html += self._render_row(widget_id, '', '')

        html += f'''
            </div>
@@ -114,22 +120,450 @@ class KeyValueWidget(forms.Widget):
                        style="padding: 4px 12px; cursor: pointer; background: #417690; color: white; border: none; border-radius: 4px;">
                    + Add Row
                </button>
-                <span id="{widget_id}_hint" style="font-size: 11px; color: #666; font-style: italic;"></span>
            </div>
            <input type="hidden" name="{name}" id="{widget_id}" value="">
            <script>
                (function() {{
                    var configMeta_{widget_id} = {config_meta_json};
+                    var rowCounter_{widget_id} = 0;

-                    function showKeyHint_{widget_id}(key) {{
-                        var hint = document.getElementById('{widget_id}_hint');
-                        var meta = configMeta_{widget_id}[key];
-                        if (meta) {{
-                            hint.innerHTML = '<b>' + key + '</b>: ' + (meta.description || meta.type) +
-                                (meta.default !== '' ? ' <span style="color:#888">(default: ' + meta.default + ')</span>' : '');
-                        }} else {{
-                            hint.textContent = key ? 'Custom key: ' + key : '';
+                    function stringifyValue_{widget_id}(value) {{
+                        return typeof value === 'string' ? value : JSON.stringify(value);
+                    }}
+
+                    function getTypes_{widget_id}(meta) {{
+                        if (!meta || meta.type === undefined || meta.type === null) {{
+                            return [];
                        }}
+                        return Array.isArray(meta.type) ? meta.type : [meta.type];
+                    }}
+
+                    function getMetaForKey_{widget_id}(key) {{
+                        if (!key) {{
+                            return null;
+                        }}
+
+                        var explicitMeta = configMeta_{widget_id}[key];
+                        if (explicitMeta) {{
+                            return Object.assign({{ key: key }}, explicitMeta);
+                        }}
+
+                        if (key.endsWith('_BINARY')) {{
+                            return {{
+                                key: key,
+                                plugin: 'custom',
+                                type: 'string',
+                                default: '',
+                                description: 'Path to binary executable',
+                            }};
+                        }}
+
+                        if (isRegexConfigKey_{widget_id}(key)) {{
+                            return {{
+                                key: key,
+                                plugin: 'custom',
+                                type: 'string',
+                                default: '',
+                                description: 'Regex pattern list',
+                            }};
+                        }}
+
+                        return null;
+                    }}
+
+                    function describeMeta_{widget_id}(meta) {{
+                        if (!meta) {{
+                            return '';
+                        }}
+
+                        var details = '';
+                        if (Array.isArray(meta.enum) && meta.enum.length) {{
+                            details = 'Allowed: ' + meta.enum.map(stringifyValue_{widget_id}).join(', ');
+                        }} else {{
+                            var types = getTypes_{widget_id}(meta);
+                            if (types.length) {{
+                                details = 'Expected: ' + types.join(' or ');
+                            }}
+                        }}
+
+                        if (meta.minimum !== undefined || meta.maximum !== undefined) {{
+                            var bounds = [];
+                            if (meta.minimum !== undefined) bounds.push('min ' + meta.minimum);
+                            if (meta.maximum !== undefined) bounds.push('max ' + meta.maximum);
+                            details += (details ? ' ' : '') + '(' + bounds.join(', ') + ')';
+                        }}
+
+                        return [meta.description || '', details].filter(Boolean).join(' ');
+                    }}
+
+                    function getExampleInput_{widget_id}(key, meta) {{
+                        var types = getTypes_{widget_id}(meta);
+                        if (key.endsWith('_BINARY')) {{
+                            return 'Example: wget or /usr/bin/wget';
+                        }}
+                        if (key.endsWith('_ARGS_EXTRA') || key.endsWith('_ARGS')) {{
+                            return 'Example: ["--extra-arg"]';
+                        }}
+                        if (types.includes('array')) {{
+                            return 'Example: ["value"]';
+                        }}
+                        if (types.includes('object')) {{
+                            if (key === 'SAVE_ALLOWLIST' || key === 'SAVE_DENYLIST') {{
+                                return 'Example: {{"^https://example\\\\.com": ["wget"]}}';
+                            }}
+                            return 'Example: {{"key": "value"}}';
+                        }}
+                        return '';
+                    }}
+
+                    function isRegexConfigKey_{widget_id}(key) {{
+                        return key === 'URL_ALLOWLIST' ||
+                            key === 'URL_DENYLIST' ||
+                            key === 'SAVE_ALLOWLIST' ||
+                            key === 'SAVE_DENYLIST' ||
+                            key.endsWith('_PATTERN') ||
+                            key.includes('REGEX');
+                    }}
+
+                    function isSimpleFilterPattern_{widget_id}(pattern) {{
+                        return /^[\\w.*:-]+$/.test(pattern);
+                    }}
+
+                    function validateRegexPattern_{widget_id}(pattern) {{
+                        if (!pattern || isSimpleFilterPattern_{widget_id}(pattern)) {{
+                            return '';
+                        }}
+
+                        try {{
+                            new RegExp(pattern);
+                        }} catch (error) {{
+                            return error && error.message ? error.message : 'Invalid regex';
+                        }}
+                        return '';
+                    }}
+
+                    function validateRegexConfig_{widget_id}(key, raw, typeName) {{
+                        if (typeName === 'object') {{
+                            var parsed;
+                            try {{
+                                parsed = JSON.parse(raw);
+                            }} catch (error) {{
+                                return {{ ok: false, value: raw, message: 'Must be valid JSON' }};
+                            }}
+                            if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {{
+                                return {{ ok: false, value: parsed, message: 'Must be a JSON object' }};
+                            }}
+                            for (var regexKey in parsed) {{
+                                var objectRegexError = validateRegexPattern_{widget_id}(regexKey);
+                                if (objectRegexError) {{
+                                    return {{ ok: false, value: parsed, message: 'Invalid regex key "' + regexKey + '": ' + objectRegexError }};
+                                }}
+                            }}
+                            return {{ ok: true, value: parsed, message: '' }};
+                        }}
+
+                        var patterns = raw.split(/[\\n,]+/).map(function(pattern) {{
+                            return pattern.trim();
+                        }}).filter(Boolean);
+                        for (var i = 0; i < patterns.length; i++) {{
+                            var regexError = validateRegexPattern_{widget_id}(patterns[i]);
+                            if (regexError) {{
+                                return {{ ok: false, value: raw, message: 'Invalid regex "' + patterns[i] + '": ' + regexError }};
+                            }}
+                        }}
+                        return {{ ok: true, value: raw, message: '' }};
+                    }}
+
+                    function validateBinaryValue_{widget_id}(raw) {{
+                        if (!raw) {{
+                            return {{ ok: true, value: raw, message: '' }};
+                        }}
+
+                        if (/['"`]/.test(raw)) {{
+                            return {{ ok: false, value: raw, message: 'Binary paths cannot contain quotes' }};
+                        }}
+
+                        if (/[;&|<>$(){{}}\\[\\]!]/.test(raw)) {{
+                            return {{ ok: false, value: raw, message: 'Binary paths can only be a binary name or absolute path' }};
+                        }}
+
+                        if (raw.startsWith('/')) {{
+                            if (/^[A-Za-z0-9_./+\\- ]+$/.test(raw)) {{
+                                return {{ ok: true, value: raw, message: '' }};
+                            }}
+                            return {{ ok: false, value: raw, message: 'Absolute paths may only contain path-safe characters' }};
+                        }}
+
+                        if (/^[A-Za-z0-9_.+-]+$/.test(raw)) {{
+                            return {{ ok: true, value: raw, message: '' }};
+                        }}
+
+                        return {{ ok: false, value: raw, message: 'Enter a binary name like wget or an absolute path like /usr/bin/wget' }};
+                    }}
+
+                    function parseValue_{widget_id}(raw) {{
+                        try {{
+                            if (raw === 'true') return true;
+                            if (raw === 'false') return false;
+                            if (raw === 'null') return null;
+                            if (raw !== '' && !isNaN(raw)) return Number(raw);
+                            if ((raw.startsWith('{{') && raw.endsWith('}}')) ||
+                                (raw.startsWith('[') && raw.endsWith(']')) ||
+                                (raw.startsWith('"') && raw.endsWith('"'))) {{
+                                return JSON.parse(raw);
+                            }}
+                        }} catch (error) {{
+                            return raw;
+                        }}
+                        return raw;
+                    }}
+
+                    function sameValue_{widget_id}(left, right) {{
+                        return left === right || JSON.stringify(left) === JSON.stringify(right);
+                    }}
+
+                    function parseTypedValue_{widget_id}(raw, typeName, meta) {{
+                        var numberValue;
+                        var parsed;
+
+                        if (typeName && meta && meta.key && isRegexConfigKey_{widget_id}(meta.key)) {{
+                            return validateRegexConfig_{widget_id}(meta.key, raw, typeName);
+                        }}
+
+                        if (typeName === 'string' && meta && meta.key && meta.key.endsWith('_BINARY')) {{
+                            return validateBinaryValue_{widget_id}(raw);
+                        }}
+
+                        if (typeName === 'string') {{
+                            if (meta.pattern) {{
+                                try {{
+                                    if (!(new RegExp(meta.pattern)).test(raw)) {{
+                                        return {{ ok: false, value: raw, message: 'Must match pattern ' + meta.pattern }};
+                                    }}
+                                }} catch (error) {{}}
+                            }}
+                            return {{ ok: true, value: raw, message: '' }};
+                        }}
+
+                        if (typeName === 'integer') {{
+                            if (!/^-?\\d+$/.test(raw)) {{
+                                return {{ ok: false, value: raw, message: 'Must be an integer' }};
+                            }}
+                            numberValue = Number(raw);
+                            if (meta.minimum !== undefined && numberValue < meta.minimum) {{
+                                return {{ ok: false, value: numberValue, message: 'Must be at least ' + meta.minimum }};
+                            }}
+                            if (meta.maximum !== undefined && numberValue > meta.maximum) {{
+                                return {{ ok: false, value: numberValue, message: 'Must be at most ' + meta.maximum }};
+                            }}
+                            return {{ ok: true, value: numberValue, message: '' }};
+                        }}
+
+                        if (typeName === 'number') {{
+                            if (raw === '' || isNaN(raw)) {{
+                                return {{ ok: false, value: raw, message: 'Must be a number' }};
+                            }}
+                            numberValue = Number(raw);
+                            if (meta.minimum !== undefined && numberValue < meta.minimum) {{
+                                return {{ ok: false, value: numberValue, message: 'Must be at least ' + meta.minimum }};
+                            }}
+                            if (meta.maximum !== undefined && numberValue > meta.maximum) {{
+                                return {{ ok: false, value: numberValue, message: 'Must be at most ' + meta.maximum }};
+                            }}
+                            return {{ ok: true, value: numberValue, message: '' }};
+                        }}
+
+                        if (typeName === 'boolean') {{
+                            var lowered = raw.toLowerCase();
+                            if (lowered === 'true' || raw === '1') return {{ ok: true, value: true, message: '' }};
+                            if (lowered === 'false' || raw === '0') return {{ ok: true, value: false, message: '' }};
+                            return {{ ok: false, value: raw, message: 'Must be true or false' }};
+                        }}
+
+                        if (typeName === 'null') {{
+                            return raw === 'null'
+                                ? {{ ok: true, value: null, message: '' }}
+                                : {{ ok: false, value: raw, message: 'Must be null' }};
+                        }}
+
+                        if (typeName === 'array' || typeName === 'object') {{
+                            try {{
+                                parsed = JSON.parse(raw);
+                            }} catch (error) {{
+                                return {{ ok: false, value: raw, message: 'Must be valid JSON' }};
+                            }}
+
+                            if (typeName === 'array' && Array.isArray(parsed)) {{
+                                return {{ ok: true, value: parsed, message: '' }};
+                            }}
+                            if (typeName === 'object' && parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {{
+                                return {{ ok: true, value: parsed, message: '' }};
+                            }}
+
+                            return {{
+                                ok: false,
+                                value: parsed,
+                                message: typeName === 'array' ? 'Must be a JSON array' : 'Must be a JSON object',
+                            }};
+                        }}
+
+                        return {{ ok: true, value: parseValue_{widget_id}(raw), message: '' }};
+                    }}
+
+                    function validateValueAgainstMeta_{widget_id}(raw, meta) {{
+                        if (!meta || raw === '') {{
+                            return {{ state: 'neutral', value: raw, message: '' }};
+                        }}
+
+                        var enumValues = Array.isArray(meta.enum) ? meta.enum : [];
+                        var types = getTypes_{widget_id}(meta);
+                        if (!types.length) {{
+                            types = ['string'];
+                        }}
+
+                        var error = 'Invalid value';
+                        for (var i = 0; i < types.length; i++) {{
+                            var candidate = parseTypedValue_{widget_id}(raw, types[i], meta);
+                            if (!candidate.ok) {{
+                                error = candidate.message || error;
+                                continue;
+                            }}
+                            if (enumValues.length && !enumValues.some(function(enumValue) {{
+                                return sameValue_{widget_id}(enumValue, candidate.value) || stringifyValue_{widget_id}(enumValue) === raw;
+                            }})) {{
+                                error = 'Must be one of: ' + enumValues.map(stringifyValue_{widget_id}).join(', ');
+                                continue;
+                            }}
+                            return {{ state: 'valid', value: candidate.value, message: '' }};
+                        }}
+
+                        return {{ state: 'invalid', value: raw, message: error }};
+                    }}
+
+                    function ensureRowId_{widget_id}(row) {{
+                        if (!row.dataset.rowId) {{
+                            row.dataset.rowId = String(rowCounter_{widget_id}++);
+                        }}
+                        return row.dataset.rowId;
+                    }}
+
+                    function setRowHelp_{widget_id}(row) {{
+                        var keyInput = row.querySelector('.kv-key');
+                        var help = row.querySelector('.kv-help');
+                        if (!keyInput || !help) {{
+                            return;
+                        }}
+
+                        var key = keyInput.value.trim();
+                        if (!key) {{
+                            help.textContent = '';
+                            return;
+                        }}
+
+                        var meta = getMetaForKey_{widget_id}(key);
+                        if (meta) {{
+                            var extra = isRegexConfigKey_{widget_id}(key)
+                                ? ((meta.type === 'object' || (Array.isArray(meta.type) && meta.type.includes('object')))
+                                    ? ' Expected: JSON object with regex keys.'
+                                    : ' Expected: valid regex.')
+                                : '';
+                            var example = getExampleInput_{widget_id}(key, meta);
+                            help.textContent = [describeMeta_{widget_id}(meta) + extra, example].filter(Boolean).join(' ');
+                        }} else {{
+                            help.textContent = 'Custom key';
+                        }}
+                    }}
+
+                    function configureValueInput_{widget_id}(row) {{
+                        var keyInput = row.querySelector('.kv-key');
+                        var valueInput = row.querySelector('.kv-value');
+                        var datalist = row.querySelector('.kv-value-options');
+                        if (!keyInput || !valueInput || !datalist) {{
+                            return;
+                        }}
+
+                        var rowId = ensureRowId_{widget_id}(row);
+                        datalist.id = '{widget_id}_value_options_' + rowId;
+
+                        var meta = getMetaForKey_{widget_id}(keyInput.value.trim());
+                        var enumValues = Array.isArray(meta && meta.enum) ? meta.enum : [];
+                        var types = getTypes_{widget_id}(meta);
+                        if (!enumValues.length && types.includes('boolean')) {{
+                            enumValues = ['True', 'False'];
+                        }}
+                        if (enumValues.length) {{
+                            datalist.innerHTML = enumValues.map(function(enumValue) {{
+                                return '<option value="' + stringifyValue_{widget_id}(enumValue).replace(/"/g, '&quot;') + '"></option>';
+                            }}).join('');
+                            valueInput.setAttribute('list', datalist.id);
+                        }} else {{
+                            datalist.innerHTML = '';
+                            valueInput.removeAttribute('list');
+                        }}
+                    }}
+
+                    function setValueValidationState_{widget_id}(input, state, message) {{
+                        if (!input) {{
+                            return;
+                        }}
+
+                        if (state === 'valid') {{
+                            input.style.borderColor = '#2da44e';
+                            input.style.boxShadow = '0 0 0 1px rgba(45, 164, 78, 0.18)';
+                            input.style.backgroundColor = '#f6ffed';
+                        }} else if (state === 'invalid') {{
+                            input.style.borderColor = '#cf222e';
+                            input.style.boxShadow = '0 0 0 1px rgba(207, 34, 46, 0.18)';
+                            input.style.backgroundColor = '#fff8f8';
+                        }} else {{
+                            input.style.borderColor = '#ccc';
+                            input.style.boxShadow = 'none';
+                            input.style.backgroundColor = '';
+                        }}
+                        input.title = message || '';
+                    }}
+
+                    function applyValueValidation_{widget_id}(row) {{
+                        var keyInput = row.querySelector('.kv-key');
+                        var valueInput = row.querySelector('.kv-value');
+                        if (!keyInput || !valueInput) {{
+                            return;
+                        }}
+
+                        var key = keyInput.value.trim();
+                        if (!key) {{
+                            setValueValidationState_{widget_id}(valueInput, 'neutral', '');
+                            return;
+                        }}
+
+                        var meta = getMetaForKey_{widget_id}(key);
+                        if (!meta) {{
+                            setValueValidationState_{widget_id}(valueInput, 'neutral', '');
+                            return;
+                        }}
+
+                        var validation = validateValueAgainstMeta_{widget_id}(valueInput.value.trim(), meta);
+                        setValueValidationState_{widget_id}(valueInput, validation.state, validation.message);
+                    }}
+
+                    function coerceValueForStorage_{widget_id}(key, raw) {{
+                        var meta = getMetaForKey_{widget_id}(key);
+                        if (!meta) {{
+                            return parseValue_{widget_id}(raw);
+                        }}
+
+                        var validation = validateValueAgainstMeta_{widget_id}(raw, meta);
+                        return validation.state === 'valid' ? validation.value : raw;
+                    }}
+
+                    function initializeRows_{widget_id}() {{
+                        var container = document.getElementById('{widget_id}_rows');
+                        container.querySelectorAll('.key-value-row').forEach(function(row) {{
+                            ensureRowId_{widget_id}(row);
+                            configureValueInput_{widget_id}(row);
+                            setRowHelp_{widget_id}(row);
+                            applyValueValidation_{widget_id}(row);
+                        }});
                    }}

                    function updateHiddenField_{widget_id}() {{
@@ -142,20 +576,7 @@ class KeyValueWidget(forms.Widget):
                            if (keyInput && valInput && keyInput.value.trim()) {{
                                var key = keyInput.value.trim();
                                var val = valInput.value.trim();
-                                // Try to parse as JSON (for booleans, numbers, etc)
-                                try {{
-                                    if (val === 'true') result[key] = true;
-                                    else if (val === 'false') result[key] = false;
-                                    else if (val === 'null') result[key] = null;
-                                    else if (!isNaN(val) && val !== '') result[key] = Number(val);
-                                    else if ((val.startsWith('{{') && val.endsWith('}}')) ||
-                                             (val.startsWith('[') && val.endsWith(']')) ||
-                                             (val.startsWith('"') && val.endsWith('"')))
-                                        result[key] = JSON.parse(val);
-                                    else result[key] = val;
-                                }} catch(e) {{
-                                    result[key] = val;
-                                }}
+                                result[key] = coerceValueForStorage_{widget_id}(key, val);
                            }}
                        }});
                        document.getElementById('{widget_id}').value = JSON.stringify(result);
@@ -163,60 +584,85 @@ class KeyValueWidget(forms.Widget):

                    window.addKeyValueRow_{widget_id} = function() {{
                        var container = document.getElementById('{widget_id}_rows');
-                        var rows = container.querySelectorAll('.key-value-row');
-                        var newIdx = rows.length;
                        var newRow = document.createElement('div');
                        newRow.className = 'key-value-row';
-                        newRow.style.cssText = 'display: flex; gap: 8px; margin-bottom: 6px; align-items: center;';
-                        newRow.innerHTML = '<input type="text" class="kv-key" placeholder="KEY" list="{widget_id}_keys" ' +
-                            'style="flex: 1; padding: 6px 8px; border: 1px solid #ccc; border-radius: 4px; font-family: monospace; font-size: 12px;" ' +
-                            'onchange="updateHiddenField_{widget_id}()" oninput="updateHiddenField_{widget_id}(); showKeyHint_{widget_id}(this.value)" onfocus="showKeyHint_{widget_id}(this.value)">' +
+                        newRow.style.cssText = 'margin-bottom: 6px;';
+                        newRow.innerHTML = '<div style="display: flex; gap: 8px; align-items: center;">' +
+                            '<input type="text" class="kv-key" placeholder="KEY" list="{widget_id}_keys" ' +
+                            'style="flex: 1; padding: 6px 8px; border: 1px solid #ccc; border-radius: 4px; font-family: monospace; font-size: 12px;">' +
                            '<input type="text" class="kv-value" placeholder="value" ' +
-                            'style="flex: 2; padding: 6px 8px; border: 1px solid #ccc; border-radius: 4px; font-family: monospace; font-size: 12px;" ' +
-                            'onchange="updateHiddenField_{widget_id}()" oninput="updateHiddenField_{widget_id}()">' +
+                            'style="flex: 2; padding: 6px 8px; border: 1px solid #ccc; border-radius: 4px; font-family: monospace; font-size: 12px;">' +
+                            '<datalist class="kv-value-options"></datalist>' +
                            '<button type="button" onclick="removeKeyValueRow_{widget_id}(this)" ' +
-                            'style="padding: 4px 10px; cursor: pointer; background: #ba2121; color: white; border: none; border-radius: 4px; font-weight: bold;">−</button>';
+                            'style="padding: 4px 10px; cursor: pointer; background: #ba2121; color: white; border: none; border-radius: 4px; font-weight: bold;">−</button>' +
+                            '</div>' +
+                            '<div class="kv-help" style="margin-top: 4px; font-size: 11px; color: #666; font-style: italic;"></div>';
                        container.appendChild(newRow);
+                        ensureRowId_{widget_id}(newRow);
+                        configureValueInput_{widget_id}(newRow);
+                        setRowHelp_{widget_id}(newRow);
+                        applyValueValidation_{widget_id}(newRow);
+                        updateHiddenField_{widget_id}();
                        newRow.querySelector('.kv-key').focus();
                    }};

                    window.removeKeyValueRow_{widget_id} = function(btn) {{
-                        var row = btn.parentElement;
+                        var row = btn.closest('.key-value-row');
                        row.remove();
                        updateHiddenField_{widget_id}();
                    }};

-                    window.showKeyHint_{widget_id} = showKeyHint_{widget_id};
                    window.updateHiddenField_{widget_id} = updateHiddenField_{widget_id};

                    // Initialize on load
                    document.addEventListener('DOMContentLoaded', function() {{
+                        initializeRows_{widget_id}();
                        updateHiddenField_{widget_id}();
                    }});
                    // Also run immediately in case DOM is already ready
                    if (document.readyState !== 'loading') {{
+                        initializeRows_{widget_id}();
                        updateHiddenField_{widget_id}();
                    }}

                    // Update on any input change
-                    document.getElementById('{widget_id}_rows').addEventListener('input', updateHiddenField_{widget_id});
+                    var rowsEl_{widget_id} = document.getElementById('{widget_id}_rows');
+
+                    rowsEl_{widget_id}.addEventListener('input', function(event) {{
+                        var row = event.target.closest('.key-value-row');
+                        if (!row) {{
+                            return;
+                        }}
+
+                        if (event.target.classList.contains('kv-key')) {{
+                            configureValueInput_{widget_id}(row);
+                            setRowHelp_{widget_id}(row);
+                        }}
+
+                        if (event.target.classList.contains('kv-key') || event.target.classList.contains('kv-value')) {{
+                            applyValueValidation_{widget_id}(row);
+                            updateHiddenField_{widget_id}();
+                        }}
+                    }});
                }})();
            </script>
        </div>
        '''
        return mark_safe(html)

-    def _render_row(self, widget_id: str, idx: int, key: str, value: str) -> str:
+    def _render_row(self, widget_id: str, key: str, value: str) -> str:
        return f'''
-            <div class="key-value-row" style="display: flex; gap: 8px; margin-bottom: 6px; align-items: center;">
-                <input type="text" class="kv-key" value="{self._escape(key)}" placeholder="KEY" list="{widget_id}_keys"
-                       style="flex: 1; padding: 6px 8px; border: 1px solid #ccc; border-radius: 4px; font-family: monospace; font-size: 12px;"
-                       onchange="updateHiddenField_{widget_id}()" oninput="updateHiddenField_{widget_id}(); showKeyHint_{widget_id}(this.value)" onfocus="showKeyHint_{widget_id}(this.value)">
-                <input type="text" class="kv-value" value="{self._escape(value)}" placeholder="value"
-                       style="flex: 2; padding: 6px 8px; border: 1px solid #ccc; border-radius: 4px; font-family: monospace; font-size: 12px;"
-                       onchange="updateHiddenField_{widget_id}()" oninput="updateHiddenField_{widget_id}()">
-                <button type="button" onclick="removeKeyValueRow_{widget_id}(this)"
-                        style="padding: 4px 10px; cursor: pointer; background: #ba2121; color: white; border: none; border-radius: 4px; font-weight: bold;">−</button>
+            <div class="key-value-row" style="margin-bottom: 6px;">
+                <div style="display: flex; gap: 8px; align-items: center;">
+                    <input type="text" class="kv-key" value="{self._escape(key)}" placeholder="KEY" list="{widget_id}_keys"
+                           style="flex: 1; padding: 6px 8px; border: 1px solid #ccc; border-radius: 4px; font-family: monospace; font-size: 12px;">
+                    <input type="text" class="kv-value" value="{self._escape(value)}" placeholder="value"
+                           style="flex: 2; padding: 6px 8px; border: 1px solid #ccc; border-radius: 4px; font-family: monospace; font-size: 12px;">
+                    <datalist class="kv-value-options"></datalist>
+                    <button type="button" onclick="removeKeyValueRow_{widget_id}(this)"
+                            style="padding: 4px 10px; cursor: pointer; background: #ba2121; color: white; border: none; border-radius: 4px; font-weight: bold;">−</button>
+                </div>
+                <div class="kv-help" style="margin-top: 4px; font-size: 11px; color: #666; font-style: italic;"></div>
            </div>
        '''

--- a/archivebox/cli/archivebox_add.py
+++ b/archivebox/cli/archivebox_add.py
@@ -47,11 +47,13 @@ def _collect_input_urls(args: tuple[str, ...]) -> list[str]:
 def add(urls: str | list[str],
        depth: int | str=0,
        tag: str='',
+        url_allowlist: str='',
+        url_denylist: str='',
        parser: str="auto",
        plugins: str="",
        persona: str='Default',
        overwrite: bool=False,
-        update: bool=not ARCHIVING_CONFIG.ONLY_NEW,
+        update: bool | None=None,
        index_only: bool=False,
        bg: bool=False,
        created_by_id: int | None=None) -> tuple['Crawl', QuerySet['Snapshot']]:
@@ -85,6 +87,8 @@ def add(urls: str | list[str],

    created_by_id = created_by_id or get_or_create_system_user_pk()
    started_at = timezone.now()
+    if update is None:
+        update = not ARCHIVING_CONFIG.ONLY_NEW

    # 1. Save the provided URLs to sources/2024-11-05__23-59-59__cli_add.txt
    sources_file = CONSTANTS.SOURCES_DIR / f'{timezone.now().strftime("%Y-%m-%d__%H-%M-%S")}__cli_add.txt'
@@ -120,6 +124,8 @@ def add(urls: str | list[str],
            'PLUGINS': plugins,
            'DEFAULT_PERSONA': persona_name,
            'PARSER': parser,
+            **({'URL_ALLOWLIST': url_allowlist} if url_allowlist else {}),
+            **({'URL_DENYLIST': url_denylist} if url_denylist else {}),
        }
    )

@@ -150,6 +156,9 @@ def add(urls: str | list[str],
            snapshot.ensure_crawl_symlink()
        return crawl, crawl.snapshot_set.all()

+    if bg:
+        crawl.create_snapshots_from_urls()
+
    # 5. Start the crawl runner to process the queue
    #    The runner will:
    #    - Process Crawl -> create Snapshots from all URLs
@@ -192,8 +201,7 @@ def add(urls: str | list[str],
            except Exception:
                rel_output_str = str(crawl.output_dir)

-            # Build admin URL from SERVER_CONFIG
-            bind_addr = SERVER_CONFIG.BIND_ADDR
+            bind_addr = SERVER_CONFIG.BIND_ADDR or '127.0.0.1:8000'
            if bind_addr.startswith('http://') or bind_addr.startswith('https://'):
                base_url = bind_addr
            else:
@@ -218,11 +226,13 @@ def add(urls: str | list[str],
@click.command()
@click.option('--depth', '-d', type=click.Choice([str(i) for i in range(5)]), default='0', help='Recursively archive linked pages up to N hops away')
@click.option('--tag', '-t', default='', help='Comma-separated list of tags to add to each snapshot e.g. tag1,tag2,tag3')
+@click.option('--url-allowlist', '--domain-allowlist', default='', help='Comma-separated URL/domain allowlist for this crawl')
+@click.option('--url-denylist', '--domain-denylist', default='', help='Comma-separated URL/domain denylist for this crawl')
@click.option('--parser', default='auto', help='Parser for reading input URLs (auto, txt, html, rss, json, jsonl, netscape, ...)')
@click.option('--plugins', '-p', default='', help='Comma-separated list of plugins to run e.g. title,favicon,screenshot,singlefile,...')
@click.option('--persona', default='Default', help='Authentication profile to use when archiving')
@click.option('--overwrite', '-F', is_flag=True, help='Overwrite existing data if URLs have been archived previously')
-@click.option('--update', is_flag=True, default=ARCHIVING_CONFIG.ONLY_NEW, help='Retry any previously skipped/failed URLs when re-adding them')
+@click.option('--update', is_flag=True, default=None, help='Retry any previously skipped/failed URLs when re-adding them')
@click.option('--index-only', is_flag=True, help='Just add the URLs to the index without archiving them now')
@click.option('--bg', is_flag=True, help='Run archiving in background (queue work and return immediately)')
@click.argument('urls', nargs=-1, type=click.Path())
--- a/archivebox/cli/archivebox_archiveresult.py
+++ b/archivebox/cli/archivebox_archiveresult.py
@@ -42,6 +42,16 @@ from rich import print as rprint
 from archivebox.cli.cli_utils import apply_filters


+def build_archiveresult_request(snapshot_id: str, plugin: str, hook_name: str = '', status: str = 'queued') -> dict:
+    return {
+        'type': 'ArchiveResult',
+        'snapshot_id': str(snapshot_id),
+        'plugin': plugin,
+        'hook_name': hook_name,
+        'status': status,
+    }
+
+
 # =============================================================================
 # CREATE
 # =============================================================================
@@ -52,21 +62,21 @@ def create_archiveresults(
    status: str = 'queued',
 ) -> int:
    """
-    Create ArchiveResults for Snapshots.
+    Create ArchiveResult request records for Snapshots.

-    Reads Snapshot records from stdin and creates ArchiveResult entries.
+    Reads Snapshot records from stdin and emits ArchiveResult request JSONL.
    Pass-through: Non-Snapshot/ArchiveResult records are output unchanged.
-    If --plugin is specified, only creates results for that plugin.
-    Otherwise, creates results for all pending plugins.
+    If --plugin is specified, only emits requests for that plugin.
+    Otherwise, emits requests for all enabled snapshot hooks.

    Exit codes:
        0: Success
        1: Failure
    """
-    from django.utils import timezone
-
+    from archivebox.config.configset import get_config
+    from archivebox.hooks import discover_hooks
    from archivebox.misc.jsonl import read_stdin, write_record, TYPE_SNAPSHOT, TYPE_ARCHIVERESULT
-    from archivebox.core.models import Snapshot, ArchiveResult
+    from archivebox.core.models import Snapshot

    is_tty = sys.stdout.isatty()

@@ -135,33 +145,20 @@ def create_archiveresults(
    created_count = 0
    for snapshot in snapshots:
        if plugin:
-            # Create for specific plugin only
-            result, created = ArchiveResult.objects.get_or_create(
-                snapshot=snapshot,
-                plugin=plugin,
-                defaults={
-                    'status': status,
-                    'retry_at': timezone.now(),
-                }
-            )
-            if not created and result.status in [ArchiveResult.StatusChoices.FAILED, ArchiveResult.StatusChoices.SKIPPED]:
-                # Reset for retry
-                result.status = status
-                result.retry_at = timezone.now()
-                result.save()
-
            if not is_tty:
-                write_record(result.to_json())
+                write_record(build_archiveresult_request(snapshot.id, plugin, status=status))
            created_count += 1
        else:
-            # Create all pending plugins
-            snapshot.create_pending_archiveresults()
-            for result in snapshot.archiveresult_set.filter(status=ArchiveResult.StatusChoices.QUEUED):
+            config = get_config(crawl=snapshot.crawl, snapshot=snapshot)
+            hooks = discover_hooks('Snapshot', config=config)
+            for hook_path in hooks:
+                hook_name = hook_path.name
+                plugin_name = hook_path.parent.name
                if not is_tty:
-                    write_record(result.to_json())
+                    write_record(build_archiveresult_request(snapshot.id, plugin_name, hook_name=hook_name, status=status))
                created_count += 1

-    rprint(f'[green]Created/queued {created_count} archive results[/green]', file=sys.stderr)
+    rprint(f'[green]Created {created_count} archive result request records[/green]', file=sys.stderr)
    return 0


@@ -205,6 +202,7 @@ def list_archiveresults(
                'succeeded': 'green',
                'failed': 'red',
                'skipped': 'dim',
+                'noresults': 'dim',
                'backoff': 'magenta',
            }.get(result.status, 'dim')
            rprint(f'[{status_color}]{result.status:10}[/{status_color}] {result.plugin:15} [dim]{result.id}[/dim] {result.snapshot.url[:40]}')
@@ -233,8 +231,6 @@ def update_archiveresults(
        0: Success
        1: No input or error
    """
-    from django.utils import timezone
-
    from archivebox.misc.jsonl import read_stdin, write_record
    from archivebox.core.models import ArchiveResult

@@ -257,7 +253,6 @@ def update_archiveresults(
            # Apply updates from CLI flags
            if status:
                result.status = status
-                result.retry_at = timezone.now()

            result.save()
            updated_count += 1
--- a/archivebox/cli/archivebox_extract.py
+++ b/archivebox/cli/archivebox_extract.py
@@ -38,15 +38,16 @@ import rich_click as click

 def process_archiveresult_by_id(archiveresult_id: str) -> int:
    """
-    Run extraction for a single ArchiveResult by ID (used by workers).
+    Re-run extraction for a single ArchiveResult by ID.

-    Triggers the ArchiveResult's state machine tick() to run the extractor
-    plugin, but only after claiming ownership via retry_at. This keeps direct
-    CLI execution aligned with the worker lifecycle and prevents duplicate hook
-    runs if another process already owns the same ArchiveResult.
+    ArchiveResults are projected status rows, not queued work items. Re-running
+    a single result means resetting that row and queueing its parent snapshot
+    through the shared crawl runner with the corresponding plugin selected.
    """
    from rich import print as rprint
+    from django.utils import timezone
    from archivebox.core.models import ArchiveResult
+    from archivebox.services.runner import run_crawl

    try:
        archiveresult = ArchiveResult.objects.get(id=archiveresult_id)
@@ -57,16 +58,27 @@ def process_archiveresult_by_id(archiveresult_id: str) -> int:
    rprint(f'[blue]Extracting {archiveresult.plugin} for {archiveresult.snapshot.url}[/blue]', file=sys.stderr)

    try:
-        # Claim-before-tick is the required calling pattern for direct
-        # state-machine drivers. If another worker already owns this row,
-        # report that and exit without running duplicate extractor side effects.
-        if not archiveresult.tick_claimed(lock_seconds=120):
-            print(f'[yellow]Extraction already claimed by another process: {archiveresult.plugin}[/yellow]')
-            return 0
+        archiveresult.reset_for_retry()
+        snapshot = archiveresult.snapshot
+        snapshot.status = snapshot.StatusChoices.QUEUED
+        snapshot.retry_at = timezone.now()
+        snapshot.save(update_fields=['status', 'retry_at', 'modified_at'])
+
+        crawl = snapshot.crawl
+        if crawl.status != crawl.StatusChoices.STARTED:
+            crawl.status = crawl.StatusChoices.QUEUED
+        crawl.retry_at = timezone.now()
+        crawl.save(update_fields=['status', 'retry_at', 'modified_at'])
+
+        run_crawl(str(crawl.id), snapshot_ids=[str(snapshot.id)], selected_plugins=[archiveresult.plugin])
+        archiveresult.refresh_from_db()

        if archiveresult.status == ArchiveResult.StatusChoices.SUCCEEDED:
            print(f'[green]Extraction succeeded: {archiveresult.output_str}[/green]')
            return 0
+        elif archiveresult.status == ArchiveResult.StatusChoices.NORESULTS:
+            print(f'[dim]Extraction completed with no results: {archiveresult.output_str}[/dim]')
+            return 0
        elif archiveresult.status == ArchiveResult.StatusChoices.FAILED:
            print(f'[red]Extraction failed: {archiveresult.output_str}[/red]', file=sys.stderr)
            return 1
@@ -121,8 +133,9 @@ def run_plugins(
        rprint('[yellow]No snapshots provided. Pass snapshot IDs as arguments or via stdin.[/yellow]', file=sys.stderr)
        return 1

-    # Gather snapshot IDs to process
+    # Gather snapshot IDs and optional plugin constraints to process
    snapshot_ids = set()
+    requested_plugins_by_snapshot: dict[str, set[str]] = defaultdict(set)
    for record in records:
        record_type = record.get('type')

@@ -142,6 +155,9 @@ def run_plugins(
            snapshot_id = record.get('snapshot_id')
            if snapshot_id:
                snapshot_ids.add(snapshot_id)
+                plugin_name = record.get('plugin')
+                if plugin_name and not plugins_list:
+                    requested_plugins_by_snapshot[str(snapshot_id)].add(str(plugin_name))

        elif 'id' in record:
            # Assume it's a snapshot ID
@@ -160,26 +176,15 @@ def run_plugins(
            rprint(f'[yellow]Snapshot {snapshot_id} not found[/yellow]', file=sys.stderr)
            continue

-        # Create pending ArchiveResults if needed
-        if plugins_list:
-            # Only create for specific plugins
-            for plugin_name in plugins_list:
-                result, created = ArchiveResult.objects.get_or_create(
-                    snapshot=snapshot,
-                    plugin=plugin_name,
-                    defaults={
-                        'status': ArchiveResult.StatusChoices.QUEUED,
-                        'retry_at': timezone.now(),
-                    }
-                )
-                if not created and result.status in [ArchiveResult.StatusChoices.FAILED, ArchiveResult.StatusChoices.SKIPPED]:
-                    # Reset for retry
-                    result.status = ArchiveResult.StatusChoices.QUEUED
-                    result.retry_at = timezone.now()
-                    result.save()
-        else:
-            # Create all pending plugins
-            snapshot.create_pending_archiveresults()
+        for plugin_name in requested_plugins_by_snapshot.get(str(snapshot.id), set()):
+            existing_result = snapshot.archiveresult_set.filter(plugin=plugin_name).order_by('-created_at').first()
+            if existing_result and existing_result.status in [
+                ArchiveResult.StatusChoices.FAILED,
+                ArchiveResult.StatusChoices.SKIPPED,
+                ArchiveResult.StatusChoices.NORESULTS,
+                ArchiveResult.StatusChoices.BACKOFF,
+            ]:
+                existing_result.reset_for_retry()

        # Reset snapshot status to allow processing
        if snapshot.status == Snapshot.StatusChoices.SEALED:
@@ -207,10 +212,15 @@ def run_plugins(
            snapshot_ids_by_crawl[str(snapshot.crawl_id)].add(str(snapshot.id))

        for crawl_id, crawl_snapshot_ids in snapshot_ids_by_crawl.items():
+            selected_plugins = plugins_list or sorted({
+                plugin
+                for snapshot_id in crawl_snapshot_ids
+                for plugin in requested_plugins_by_snapshot.get(str(snapshot_id), set())
+            }) or None
            run_crawl(
                crawl_id,
                snapshot_ids=sorted(crawl_snapshot_ids),
-                selected_plugins=plugins_list or None,
+                selected_plugins=selected_plugins,
            )

    # Output results as JSONL (when piped) or human-readable (when TTY)
--- a/archivebox/cli/archivebox_list.py
+++ b/archivebox/cli/archivebox_list.py
@@ -18,9 +18,13 @@ from archivebox.cli.archivebox_snapshot import list_snapshots
@click.option('--tag', '-t', help='Filter by tag name')
@click.option('--crawl-id', help='Filter by crawl ID')
@click.option('--limit', '-n', type=int, help='Limit number of results')
+@click.option('--sort', '-o', type=str, help='Field to sort by, e.g. url, created_at, bookmarked_at, downloaded_at')
+@click.option('--csv', '-C', type=str, help='Print output as CSV with the provided fields, e.g.: timestamp,url,title')
+@click.option('--with-headers', is_flag=True, help='Include column headers in structured output')
 def main(status: Optional[str], url__icontains: Optional[str], url__istartswith: Optional[str],
-         tag: Optional[str], crawl_id: Optional[str], limit: Optional[int]) -> None:
-    """List Snapshots as JSONL."""
+         tag: Optional[str], crawl_id: Optional[str], limit: Optional[int],
+         sort: Optional[str], csv: Optional[str], with_headers: bool) -> None:
+    """List Snapshots."""
    sys.exit(list_snapshots(
        status=status,
        url__icontains=url__icontains,
@@ -28,6 +32,9 @@ def main(status: Optional[str], url__icontains: Optional[str], url__istartswith:
        tag=tag,
        crawl_id=crawl_id,
        limit=limit,
+        sort=sort,
+        csv=csv,
+        with_headers=with_headers,
    ))


--- a/archivebox/cli/archivebox_persona.py
+++ b/archivebox/cli/archivebox_persona.py
@@ -42,6 +42,7 @@ import rich_click as click
 from rich import print as rprint

 from archivebox.cli.cli_utils import apply_filters
+from archivebox.personas import importers as persona_importers


 # =============================================================================
@@ -440,8 +441,6 @@ def create_personas(
        browser_binary = get_browser_binary(import_from)
        if browser_binary:
            rprint(f'[dim]Using {import_from} binary: {browser_binary}[/dim]', file=sys.stderr)
-    else:
-        browser_binary = None

    created_count = 0
    for name in name_list:
@@ -450,7 +449,7 @@ def create_personas(
            continue

        # Validate persona name to prevent path traversal
-        is_valid, error_msg = validate_persona_name(name)
+        is_valid, error_msg = persona_importers.validate_persona_name(name)
        if not is_valid:
            rprint(f'[red]Invalid persona name "{name}": {error_msg}[/red]', file=sys.stderr)
            continue
@@ -468,49 +467,29 @@ def create_personas(

        # Import browser profile if requested
        if import_from in CHROMIUM_BROWSERS and source_profile_dir is not None:
-            persona_chrome_dir = Path(persona.CHROME_USER_DATA_DIR)
-
-            # Copy the browser profile
-            rprint(f'[dim]Copying browser profile to {persona_chrome_dir}...[/dim]', file=sys.stderr)
-
            try:
-                # Remove existing chrome_user_data if it exists
-                if persona_chrome_dir.exists():
-                    shutil.rmtree(persona_chrome_dir)
-
-                # Copy the profile directory
-                # We copy the entire user data dir, not just Default profile
-                shutil.copytree(
-                    source_profile_dir,
-                    persona_chrome_dir,
-                    symlinks=True,
-                    ignore=shutil.ignore_patterns(
-                        'Cache', 'Code Cache', 'GPUCache', 'ShaderCache',
-                        'Service Worker', 'GCM Store', '*.log', 'Crashpad',
-                        'BrowserMetrics', 'BrowserMetrics-spare.pma',
-                        'SingletonLock', 'SingletonSocket', 'SingletonCookie',
-                    ),
+                import_source = persona_importers.resolve_browser_import_source(import_from, profile_dir=profile)
+                import_result = persona_importers.import_persona_from_source(
+                    persona,
+                    import_source,
+                    copy_profile=True,
+                    import_cookies=True,
+                    capture_storage=False,
                )
-                rprint('[green]Copied browser profile to persona[/green]', file=sys.stderr)
-
-                # Extract cookies via CDP
-                rprint('[dim]Extracting cookies via CDP...[/dim]', file=sys.stderr)
-
-                if extract_cookies_via_cdp(
-                    persona_chrome_dir,
-                    cookies_file,
-                    profile_dir=profile,
-                    chrome_binary=browser_binary,
-                ):
-                    rprint(f'[green]Extracted cookies to {cookies_file}[/green]', file=sys.stderr)
-                else:
-                    rprint('[yellow]Could not extract cookies automatically.[/yellow]', file=sys.stderr)
-                    rprint('[dim]You can manually export cookies using a browser extension.[/dim]', file=sys.stderr)
-
            except Exception as e:
-                rprint(f'[red]Failed to copy browser profile: {e}[/red]', file=sys.stderr)
+                rprint(f'[red]Failed to import browser profile: {e}[/red]', file=sys.stderr)
                return 1

+            if import_result.profile_copied:
+                rprint('[green]Copied browser profile to persona[/green]', file=sys.stderr)
+            if import_result.cookies_imported:
+                rprint(f'[green]Extracted cookies to {cookies_file}[/green]', file=sys.stderr)
+            elif not import_result.profile_copied:
+                rprint('[yellow]Could not import cookies automatically.[/yellow]', file=sys.stderr)
+
+            for warning in import_result.warnings:
+                rprint(f'[yellow]{warning}[/yellow]', file=sys.stderr)
+
        if not is_tty:
            write_record({
                'id': str(persona.id) if hasattr(persona, 'id') else None,
@@ -616,7 +595,7 @@ def update_personas(name: Optional[str] = None) -> int:
            # Apply updates from CLI flags
            if name:
                # Validate new name to prevent path traversal
-                is_valid, error_msg = validate_persona_name(name)
+                is_valid, error_msg = persona_importers.validate_persona_name(name)
                if not is_valid:
                    rprint(f'[red]Invalid new persona name "{name}": {error_msg}[/red]', file=sys.stderr)
                    continue
--- a/archivebox/cli/archivebox_pluginmap.py
+++ b/archivebox/cli/archivebox_pluginmap.py
@@ -89,56 +89,6 @@ SNAPSHOT_MACHINE_DIAGRAM = """
 └─────────────────────────────────────────────────────────────────────────────┘
 """

-ARCHIVERESULT_MACHINE_DIAGRAM = """
-┌─────────────────────────────────────────────────────────────────────────────┐
-│                          ArchiveResultMachine                               │
-├─────────────────────────────────────────────────────────────────────────────┤
-│                                                                             │
-│   ┌─────────────┐                                                           │
-│   │   QUEUED    │◄─────────────────┐                                        │
-│   │  (initial)  │                  │                                        │
-│   └──┬───────┬──┘                  │                                        │
-│      │       │                     │ tick() unless can_start()              │
-│      │       │ exceeded_max_       │                                        │
-│      │       │ attempts            │                                        │
-│      │       ▼                     │                                        │
-│      │  ┌──────────┐               │                                        │
-│      │  │ SKIPPED  │               │                                        │
-│      │  │ (final)  │               │                                        │
-│      │  └──────────┘               │                                        │
-│      │ tick() when                 │                                        │
-│      │ can_start()                 │                                        │
-│      ▼                             │                                        │
-│   ┌─────────────┐                  │                                        │
-│   │   STARTED   │──────────────────┘                                        │
-│   │             │◄─────────────────────────────────────────────────┐        │
-│   │ enter:      │                      │                           │        │
-│   │ result.run()│ tick() unless        │                           │        │
-│   │ (execute    │ is_finished()        │                           │        │
-│   │  hook via   │──────────────────────┘                           │        │
-│   │  run_hook())│                                                  │        │
-│   └──────┬──────┘                                                  │        │
-│          │                                                         │        │
-│          │ tick() checks status set by hook output                 │        │
-│          ├─────────────┬─────────────┬─────────────┐               │        │
-│          ▼             ▼             ▼             ▼               │        │
-│   ┌───────────┐ ┌───────────┐ ┌───────────┐ ┌───────────┐         │        │
-│   │ SUCCEEDED │ │  FAILED   │ │  SKIPPED  │ │  BACKOFF  │         │        │
-│   │  (final)  │ │  (final)  │ │  (final)  │ │           │         │        │
-│   └───────────┘ └───────────┘ └───────────┘ └──┬──────┬─┘         │        │
-│                                                 │      │            │        │
-│                                   exceeded_max_ │      │ can_start()│        │
-│                                   attempts      │      │ loops back │        │
-│                                        ▼        │      └────────────┘        │
-│                                   ┌──────────┐  │                            │
-│                                   │ SKIPPED  │◄─┘                            │
-│                                   │ (final)  │                               │
-│                                   └──────────┘                               │
-│                                                                             │
-│   Each ArchiveResult runs ONE specific hook (stored in .hook_name field)    │
-└─────────────────────────────────────────────────────────────────────────────┘
-"""
-
 BINARY_MACHINE_DIAGRAM = """
 ┌─────────────────────────────────────────────────────────────────────────────┐
 │                             BinaryMachine                                   │
@@ -193,8 +143,8 @@ def pluginmap(
    """
    Show a map of all state machines and their associated plugin hooks.

-    Displays ASCII art diagrams of the core model state machines (Crawl, Snapshot,
-    ArchiveResult, Binary) and lists all auto-detected on_Modelname_xyz hooks
+    Displays ASCII art diagrams of the core queued model state machines (Crawl,
+    Snapshot, Binary) and lists all auto-detected on_Modelname_xyz hooks
    that will run for each model's transitions.
    """
    from rich.console import Console
@@ -257,17 +207,6 @@ def pluginmap(
        prnt(f'[dim]User plugins: {USER_PLUGINS_DIR}[/dim]')
        prnt()

-    # Show diagrams first (unless quiet mode)
-    if not quiet:
-        # Show ArchiveResult diagram separately since it's different
-        prnt(Panel(
-            ARCHIVERESULT_MACHINE_DIAGRAM,
-            title='[bold green]ArchiveResultMachine[/bold green]',
-            border_style='green',
-            expand=False,
-        ))
-        prnt()
-
    for event_name, info in model_events.items():
        # Discover hooks for this event
        hooks = discover_hooks(event_name, filter_disabled=not show_disabled)
--- a/archivebox/cli/archivebox_run.py
+++ b/archivebox/cli/archivebox_run.py
@@ -145,17 +145,25 @@ def process_stdin_records() -> int:
                    try:
                        archiveresult = ArchiveResult.objects.get(id=record_id)
                    except ArchiveResult.DoesNotExist:
-                        archiveresult = ArchiveResult.from_json(record)
+                        archiveresult = None
                else:
-                    # New archiveresult - create it
-                    archiveresult = ArchiveResult.from_json(record)
+                    archiveresult = None

+                snapshot_id = record.get('snapshot_id')
+                plugin_name = record.get('plugin')
+                snapshot = None
                if archiveresult:
-                    archiveresult.retry_at = timezone.now()
-                    if archiveresult.status in [ArchiveResult.StatusChoices.FAILED, ArchiveResult.StatusChoices.SKIPPED, ArchiveResult.StatusChoices.BACKOFF]:
-                        archiveresult.status = ArchiveResult.StatusChoices.QUEUED
-                    archiveresult.save()
+                    if archiveresult.status in [ArchiveResult.StatusChoices.FAILED, ArchiveResult.StatusChoices.SKIPPED, ArchiveResult.StatusChoices.NORESULTS, ArchiveResult.StatusChoices.BACKOFF]:
+                        archiveresult.reset_for_retry()
                    snapshot = archiveresult.snapshot
+                    plugin_name = plugin_name or archiveresult.plugin
+                elif snapshot_id:
+                    try:
+                        snapshot = Snapshot.objects.get(id=snapshot_id)
+                    except Snapshot.DoesNotExist:
+                        snapshot = None
+
+                if snapshot:
                    snapshot.retry_at = timezone.now()
                    if snapshot.status != Snapshot.StatusChoices.STARTED:
                        snapshot.status = Snapshot.StatusChoices.QUEUED
@@ -167,9 +175,9 @@ def process_stdin_records() -> int:
                    crawl.save(update_fields=['status', 'retry_at', 'modified_at'])
                    crawl_id = str(snapshot.crawl_id)
                    snapshot_ids_by_crawl[crawl_id].add(str(snapshot.id))
-                    if archiveresult.plugin:
-                        plugin_names_by_crawl[crawl_id].add(archiveresult.plugin)
-                    output_records.append(archiveresult.to_json())
+                    if plugin_name:
+                        plugin_names_by_crawl[crawl_id].add(str(plugin_name))
+                    output_records.append(record if not archiveresult else archiveresult.to_json())
                    queued_count += 1

            elif record_type == TYPE_BINARY:
@@ -234,9 +242,11 @@ def run_runner(daemon: bool = False) -> int:
    """
    from django.utils import timezone
    from archivebox.machine.models import Machine, Process
-    from archivebox.services.runner import run_pending_crawls
+    from archivebox.services.runner import recover_orphaned_crawls, recover_orphaned_snapshots, run_pending_crawls

    Process.cleanup_stale_running()
+    recover_orphaned_snapshots()
+    recover_orphaned_crawls()
    Machine.current()
    current = Process.current()
    if current.process_type != Process.TypeChoices.ORCHESTRATOR:
@@ -305,6 +315,13 @@ def main(daemon: bool, crawl_id: str, snapshot_id: str, binary_id: str):
            traceback.print_exc()
            sys.exit(1)

+    if daemon:
+        if not sys.stdin.isatty():
+            exit_code = process_stdin_records()
+            if exit_code != 0:
+                sys.exit(exit_code)
+        sys.exit(run_runner(daemon=True))
+
    if not sys.stdin.isatty():
        sys.exit(process_stdin_records())
    else:
--- a/archivebox/cli/archivebox_server.py
+++ b/archivebox/cli/archivebox_server.py
@@ -3,9 +3,7 @@
 __package__ = 'archivebox.cli'

 from typing import Iterable
-import os
 import sys
-import subprocess

 import rich_click as click
 from rich import print
@@ -14,6 +12,41 @@ from archivebox.misc.util import docstring, enforce_types
 from archivebox.config.common import SERVER_CONFIG


+def stop_existing_background_runner(*, machine, process_model, supervisor=None, stop_worker_fn=None, log=print) -> int:
+    """Stop any existing orchestrator process so the server can take ownership."""
+    process_model.cleanup_stale_running(machine=machine)
+
+    running_runners = list(process_model.objects.filter(
+        machine=machine,
+        status=process_model.StatusChoices.RUNNING,
+        process_type=process_model.TypeChoices.ORCHESTRATOR,
+    ).order_by('created_at'))
+
+    if not running_runners:
+        return 0
+
+    log('[yellow][*] Stopping existing ArchiveBox background runner...[/yellow]')
+
+    if supervisor is not None and stop_worker_fn is not None:
+        for worker_name in ('worker_runner', 'worker_runner_watch'):
+            try:
+                stop_worker_fn(supervisor, worker_name)
+            except Exception:
+                pass
+
+    for proc in running_runners:
+        try:
+            proc.kill_tree(graceful_timeout=2.0)
+        except Exception:
+            try:
+                proc.terminate(graceful_timeout=2.0)
+            except Exception:
+                pass
+
+    process_model.cleanup_stale_running(machine=machine)
+    return len(running_runners)
+
+
@enforce_types
 def server(runserver_args: Iterable[str]=(SERVER_CONFIG.BIND_ADDR,),
          reload: bool=False,
@@ -39,25 +72,6 @@ def server(runserver_args: Iterable[str]=(SERVER_CONFIG.BIND_ADDR,),
    if debug or reload:
        SHELL_CONFIG.DEBUG = True

-    if run_in_debug:
-        os.environ['ARCHIVEBOX_RUNSERVER'] = '1'
-        if reload:
-            os.environ['ARCHIVEBOX_AUTORELOAD'] = '1'
-            from archivebox.config.common import STORAGE_CONFIG
-            pidfile = str(STORAGE_CONFIG.TMP_DIR / 'runserver.pid')
-            os.environ['ARCHIVEBOX_RUNSERVER_PIDFILE'] = pidfile
-
-            from django.utils.autoreload import DJANGO_AUTORELOAD_ENV
-            is_reloader_child = os.environ.get(DJANGO_AUTORELOAD_ENV) == 'true'
-            if not is_reloader_child:
-                env = os.environ.copy()
-                subprocess.Popen(
-                    [sys.executable, '-m', 'archivebox', 'manage', 'runner_watch', f'--pidfile={pidfile}'],
-                    env=env,
-                    stdout=subprocess.DEVNULL,
-                    stderr=subprocess.DEVNULL,
-                )
-
    from django.contrib.auth.models import User
    
    if not User.objects.filter(is_superuser=True).exclude(username='system').exists():
@@ -81,73 +95,62 @@ def server(runserver_args: Iterable[str]=(SERVER_CONFIG.BIND_ADDR,),
    except IndexError:
        pass

+    from archivebox.workers.supervisord_util import (
+        get_existing_supervisord_process,
+        get_worker,
+        stop_worker,
+        start_server_workers,
+        is_port_in_use,
+    )
+    from archivebox.machine.models import Machine, Process
+
+    # Check if port is already in use
+    if is_port_in_use(host, int(port)):
+        print(f'[red][X] Error: Port {port} is already in use[/red]')
+        print(f'    Another process (possibly daphne or runserver) is already listening on {host}:{port}')
+        print('    Stop the conflicting process or choose a different port')
+        sys.exit(1)
+
+    machine = Machine.current()
+    stop_existing_background_runner(
+        machine=machine,
+        process_model=Process,
+        supervisor=get_existing_supervisord_process(),
+        stop_worker_fn=stop_worker,
+    )
+
+    supervisor = get_existing_supervisord_process()
+    if supervisor:
+        server_worker_name = 'worker_runserver' if run_in_debug else 'worker_daphne'
+        server_proc = get_worker(supervisor, server_worker_name)
+        server_state = server_proc.get('statename') if isinstance(server_proc, dict) else None
+        if server_state == 'RUNNING':
+            runner_proc = get_worker(supervisor, 'worker_runner')
+            runner_watch_proc = get_worker(supervisor, 'worker_runner_watch')
+            runner_state = runner_proc.get('statename') if isinstance(runner_proc, dict) else None
+            runner_watch_state = runner_watch_proc.get('statename') if isinstance(runner_watch_proc, dict) else None
+            print('[red][X] Error: ArchiveBox server is already running[/red]')
+            print(f'    [green]√[/green] Web server ({server_worker_name}) is RUNNING on [deep_sky_blue4][link=http://{host}:{port}]http://{host}:{port}[/link][/deep_sky_blue4]')
+            if runner_state == 'RUNNING':
+                print('    [green]√[/green] Background runner (worker_runner) is RUNNING')
+            if runner_watch_state == 'RUNNING':
+                print('    [green]√[/green] Reload watcher (worker_runner_watch) is RUNNING')
+            print()
+            print('[yellow]To stop the existing server, run:[/yellow]')
+            print('    pkill -f "archivebox server"')
+            print('    pkill -f supervisord')
+            sys.exit(1)
+
    if run_in_debug:
-        from django.core.management import call_command
        print('[green][+] Starting ArchiveBox webserver in DEBUG mode...[/green]')
-        print(f'    [blink][green]>[/green][/blink] Starting ArchiveBox webserver on [deep_sky_blue4][link=http://{host}:{port}]http://{host}:{port}[/link][/deep_sky_blue4]')
-        print(f'    [green]>[/green] Log in to ArchiveBox Admin UI on [deep_sky_blue3][link=http://{host}:{port}/admin]http://{host}:{port}/admin[/link][/deep_sky_blue3]')
-        print('    > Writing ArchiveBox error log to ./logs/errors.log')
-        if not reload:
-            runserver_args.append('--noreload')  # '--insecure'
-        if nothreading:
-            runserver_args.append('--nothreading')
-        call_command("runserver", *runserver_args)
    else:
-        from archivebox.workers.supervisord_util import (
-            get_existing_supervisord_process,
-            get_worker,
-            start_server_workers,
-            is_port_in_use,
-        )
-        from archivebox.machine.models import Machine, Process
-
-        # Check if port is already in use
-        if is_port_in_use(host, int(port)):
-            print(f'[red][X] Error: Port {port} is already in use[/red]')
-            print(f'    Another process (possibly daphne) is already listening on {host}:{port}')
-            print('    Stop the conflicting process or choose a different port')
-            sys.exit(1)
-
-        # Check if the background crawl runner is already running for this data directory
-        if Process.objects.filter(
-            machine=Machine.current(),
-            status=Process.StatusChoices.RUNNING,
-            process_type=Process.TypeChoices.ORCHESTRATOR,
-        ).exists():
-            print('[red][X] Error: ArchiveBox background runner is already running for this data directory[/red]')
-            print('    Stop the existing runner before starting a new server')
-            print('    To stop: pkill -f "archivebox run --daemon"')
-            sys.exit(1)
-
-        # Check if supervisord is already running
-        supervisor = get_existing_supervisord_process()
-        if supervisor:
-            daphne_proc = get_worker(supervisor, 'worker_daphne')
-            daphne_state = daphne_proc.get('statename') if isinstance(daphne_proc, dict) else None
-
-            # If daphne is already running, error out
-            if daphne_state == 'RUNNING':
-                runner_proc = get_worker(supervisor, 'worker_runner')
-                runner_state = runner_proc.get('statename') if isinstance(runner_proc, dict) else None
-                print('[red][X] Error: ArchiveBox server is already running[/red]')
-                print(f'    [green]√[/green] Web server (worker_daphne) is RUNNING on [deep_sky_blue4][link=http://{host}:{port}]http://{host}:{port}[/link][/deep_sky_blue4]')
-                if runner_state == 'RUNNING':
-                    print('    [green]√[/green] Background runner (worker_runner) is RUNNING')
-                print()
-                print('[yellow]To stop the existing server, run:[/yellow]')
-                print('    pkill -f "archivebox server"')
-                print('    pkill -f supervisord')
-                sys.exit(1)
-            # Otherwise, daphne is not running - fall through to start it
-
-        # No existing workers found - start new ones
        print('[green][+] Starting ArchiveBox webserver...[/green]')
-        print(f'    [blink][green]>[/green][/blink] Starting ArchiveBox webserver on [deep_sky_blue4][link=http://{host}:{port}]http://{host}:{port}[/link][/deep_sky_blue4]')
-        print(f'    [green]>[/green] Log in to ArchiveBox Admin UI on [deep_sky_blue3][link=http://{host}:{port}/admin]http://{host}:{port}/admin[/link][/deep_sky_blue3]')
-        print('    > Writing ArchiveBox error log to ./logs/errors.log')
-        print()
-        start_server_workers(host=host, port=port, daemonize=daemonize)
-        print("\n[i][green][🟩] ArchiveBox server shut down gracefully.[/green][/i]")
+    print(f'    [blink][green]>[/green][/blink] Starting ArchiveBox webserver on [deep_sky_blue4][link=http://{host}:{port}]http://{host}:{port}[/link][/deep_sky_blue4]')
+    print(f'    [green]>[/green] Log in to ArchiveBox Admin UI on [deep_sky_blue3][link=http://{host}:{port}/admin]http://{host}:{port}/admin[/link][/deep_sky_blue3]')
+    print('    > Writing ArchiveBox error log to ./logs/errors.log')
+    print()
+    start_server_workers(host=host, port=port, daemonize=daemonize, debug=run_in_debug, reload=reload, nothreading=nothreading)
+    print("\n[i][green][🟩] ArchiveBox server shut down gracefully.[/green][/i]")


@click.command()
--- a/archivebox/cli/archivebox_snapshot.py
+++ b/archivebox/cli/archivebox_snapshot.py
@@ -172,6 +172,9 @@ def list_snapshots(
    tag: Optional[str] = None,
    crawl_id: Optional[str] = None,
    limit: Optional[int] = None,
+    sort: Optional[str] = None,
+    csv: Optional[str] = None,
+    with_headers: bool = False,
 ) -> int:
    """
    List Snapshots as JSONL with optional filters.
@@ -182,7 +185,11 @@ def list_snapshots(
    from archivebox.misc.jsonl import write_record
    from archivebox.core.models import Snapshot

-    is_tty = sys.stdout.isatty()
+    if with_headers and not csv:
+        rprint('[red]--with-headers requires --csv[/red]', file=sys.stderr)
+        return 2
+
+    is_tty = sys.stdout.isatty() and not csv

    queryset = Snapshot.objects.all().order_by('-created_at')

@@ -199,7 +206,29 @@ def list_snapshots(
    if tag:
        queryset = queryset.filter(tags__name__iexact=tag)

+    if sort:
+        queryset = queryset.order_by(sort)
+
    count = 0
+    if csv:
+        cols = [col.strip() for col in csv.split(',') if col.strip()]
+        if not cols:
+            rprint('[red]No CSV columns provided[/red]', file=sys.stderr)
+            return 2
+        rows: list[str] = []
+        if with_headers:
+            rows.append(','.join(cols))
+        for snapshot in queryset.iterator(chunk_size=500):
+            rows.append(snapshot.to_csv(cols=cols, separator=','))
+            count += 1
+        output = '\n'.join(rows)
+        if output:
+            sys.stdout.write(output)
+            if not output.endswith('\n'):
+                sys.stdout.write('\n')
+        rprint(f'[dim]Listed {count} snapshots[/dim]', file=sys.stderr)
+        return 0
+
    for snapshot in queryset:
        if is_tty:
            status_color = {
--- a/archivebox/config/common.py
+++ b/archivebox/config/common.py
@@ -1,6 +1,7 @@
 __package__ = "archivebox.config"

 import re
+import secrets
 import sys
 import shutil
 from typing import ClassVar, Dict, Optional, List
@@ -8,7 +9,6 @@ from pathlib import Path

 from rich import print
 from pydantic import Field, field_validator
-from django.utils.crypto import get_random_string

 from archivebox.config.configset import BaseConfigSet

@@ -104,7 +104,7 @@ class ServerConfig(BaseConfigSet):
        "danger-onedomain-fullreplay",
    )

-    SECRET_KEY: str = Field(default_factory=lambda: get_random_string(50, "abcdefghijklmnopqrstuvwxyz0123456789_"))
+    SECRET_KEY: str = Field(default_factory=lambda: ''.join(secrets.choice("abcdefghijklmnopqrstuvwxyz0123456789_") for _ in range(50)))
    BIND_ADDR: str = Field(default="127.0.0.1:8000")
    LISTEN_HOST: str = Field(default="archivebox.localhost:8000")
    ADMIN_BASE_URL: str = Field(default="")
--- a/archivebox/config/views.py
+++ b/archivebox/config/views.py
@@ -1,10 +1,13 @@
 __package__ = 'archivebox.config'

+import html
+import json
 import os
-import shutil
 import inspect
+import re
 from pathlib import Path
-from typing import Any, Dict
+from typing import Any, Callable, Dict
+from urllib.parse import quote, urlencode
 from django.http import HttpRequest
 from django.utils import timezone
 from django.utils.html import format_html
@@ -18,16 +21,27 @@ from archivebox.misc.util import parse_date

 from archivebox.machine.models import Binary

+ABX_PLUGINS_DOCS_BASE_URL = 'https://archivebox.github.io/abx-plugins/'
+ABX_PLUGINS_GITHUB_BASE_URL = 'https://github.com/ArchiveBox/abx-plugins/tree/main/abx_plugins/plugins/'
+LIVE_CONFIG_BASE_URL = '/admin/environment/config/'
+ENVIRONMENT_BINARIES_BASE_URL = '/admin/environment/binaries/'
+INSTALLED_BINARIES_BASE_URL = '/admin/machine/binary/'
+

 # Common binaries to check for
 KNOWN_BINARIES = [
    'wget', 'curl', 'chromium', 'chrome', 'google-chrome', 'google-chrome-stable',
-    'node', 'npm', 'npx', 'yt-dlp', 'ytdlp', 'youtube-dl',
+    'node', 'npm', 'npx', 'yt-dlp',
    'git', 'singlefile', 'readability-extractor', 'mercury-parser',
    'python3', 'python', 'bash', 'zsh',
    'ffmpeg', 'ripgrep', 'rg', 'sonic', 'archivebox',
 ]

+CANONICAL_BINARY_ALIASES = {
+    'youtube-dl': 'yt-dlp',
+    'ytdlp': 'yt-dlp',
+}
+

 def is_superuser(request: HttpRequest) -> bool:
    return bool(getattr(request.user, 'is_superuser', False))
@@ -38,6 +52,249 @@ def format_parsed_datetime(value: object) -> str:
    return parsed.strftime("%Y-%m-%d %H:%M:%S") if parsed else ""


+JSON_TOKEN_RE = re.compile(
+    r'(?P<key>"(?:\\u[a-fA-F0-9]{4}|\\[^u]|[^\\"])*")(?=\s*:)'
+    r'|(?P<string>"(?:\\u[a-fA-F0-9]{4}|\\[^u]|[^\\"])*")'
+    r'|(?P<boolean>\btrue\b|\bfalse\b)'
+    r'|(?P<null>\bnull\b)'
+    r'|(?P<number>-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?)'
+)
+
+
+def render_code_block(text: str, *, highlighted: bool = False) -> str:
+    code = html.escape(text, quote=False)
+
+    if highlighted:
+        def _wrap_token(match: re.Match[str]) -> str:
+            styles = {
+                'key': 'color: #0550ae;',
+                'string': 'color: #0a7f45;',
+                'boolean': 'color: #8250df; font-weight: 600;',
+                'null': 'color: #6e7781; font-style: italic;',
+                'number': 'color: #b35900;',
+            }
+            token_type = next(name for name, value in match.groupdict().items() if value is not None)
+            return f'<span style="{styles[token_type]}">{match.group(0)}</span>'
+
+        code = JSON_TOKEN_RE.sub(_wrap_token, code)
+
+    return (
+        '<pre style="max-height: 600px; overflow: auto; background: #f6f8fa; '
+        'border: 1px solid #d0d7de; border-radius: 6px; padding: 12px; margin: 0;">'
+        '<code style="font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, '
+        '\'Liberation Mono\', monospace; white-space: pre; line-height: 1.5;">'
+        f'{code}'
+        '</code></pre>'
+    )
+
+
+def render_highlighted_json_block(value: Any) -> str:
+    return render_code_block(json.dumps(value, indent=2, ensure_ascii=False), highlighted=True)
+
+
+def get_plugin_docs_url(plugin_name: str) -> str:
+    return f'{ABX_PLUGINS_DOCS_BASE_URL}#{plugin_name}'
+
+
+def get_plugin_hook_source_url(plugin_name: str, hook_name: str) -> str:
+    return f'{ABX_PLUGINS_GITHUB_BASE_URL}{quote(plugin_name)}/{quote(hook_name)}'
+
+
+def get_live_config_url(key: str) -> str:
+    return f'{LIVE_CONFIG_BASE_URL}{quote(key)}/'
+
+
+def get_environment_binary_url(name: str) -> str:
+    return f'{ENVIRONMENT_BINARIES_BASE_URL}{quote(name)}/'
+
+
+def get_installed_binary_change_url(name: str, binary: Any) -> str | None:
+    binary_id = getattr(binary, 'id', None)
+    if not binary_id:
+        return None
+
+    base_url = getattr(binary, 'admin_change_url', None) or f'{INSTALLED_BINARIES_BASE_URL}{binary_id}/change/'
+    changelist_filters = urlencode({'q': canonical_binary_name(name)})
+    return f'{base_url}?{urlencode({"_changelist_filters": changelist_filters})}'
+
+
+def get_machine_admin_url() -> str | None:
+    try:
+        from archivebox.machine.models import Machine
+        return Machine.current().admin_change_url
+    except Exception:
+        return None
+
+
+def render_code_tag_list(values: list[str]) -> str:
+    if not values:
+        return '<span style="color: #6e7781;">(none)</span>'
+
+    tags = ''.join(
+        str(format_html(
+            '<code style="display: inline-block; margin: 0 6px 6px 0; padding: 2px 6px; '
+            'background: #f6f8fa; border: 1px solid #d0d7de; border-radius: 999px;">{}</code>',
+            value,
+        ))
+        for value in values
+    )
+    return f'<div style="display: flex; flex-wrap: wrap;">{tags}</div>'
+
+
+def render_plugin_metadata_html(config: dict[str, Any]) -> str:
+    rows = (
+        ('Title', config.get('title') or '(none)'),
+        ('Description', config.get('description') or '(none)'),
+        ('Required Plugins', mark_safe(render_link_tag_list(config.get('required_plugins') or [], get_plugin_docs_url))),
+        ('Required Binaries', mark_safe(render_link_tag_list(config.get('required_binaries') or [], get_environment_binary_url))),
+        ('Output MIME Types', mark_safe(render_code_tag_list(config.get('output_mimetypes') or []))),
+    )
+
+    rendered_rows = ''.join(
+        str(format_html(
+            '<div style="margin: 0 0 14px 0;">'
+            '<div style="font-weight: 600; margin-bottom: 4px;">{}</div>'
+            '<div>{}</div>'
+            '</div>',
+            label,
+            value,
+        ))
+        for label, value in rows
+    )
+    return f'<div style="margin: 4px 0 0 0;">{rendered_rows}</div>'
+
+
+def render_link_tag_list(values: list[str], url_resolver: Callable[[str], str] | None = None) -> str:
+    if not values:
+        return '<span style="color: #6e7781;">(none)</span>'
+
+    tags = []
+    for value in values:
+        if url_resolver is None:
+            tags.append(str(format_html(
+                '<code style="display: inline-block; margin: 0 6px 6px 0; padding: 2px 6px; '
+                'background: #f6f8fa; border: 1px solid #d0d7de; border-radius: 999px;">{}</code>',
+                value,
+            )))
+        else:
+            tags.append(str(format_html(
+                '<a href="{}" style="text-decoration: none;">'
+                '<code style="display: inline-block; margin: 0 6px 6px 0; padding: 2px 6px; '
+                'background: #f6f8fa; border: 1px solid #d0d7de; border-radius: 999px;">{}</code>'
+                '</a>',
+                url_resolver(value),
+                value,
+            )))
+    return f'<div style="display: flex; flex-wrap: wrap;">{"".join(tags)}</div>'
+
+
+def render_property_links(prop_name: str, prop_info: dict[str, Any], machine_admin_url: str | None) -> str:
+    links = [
+        str(format_html('<a href="{}">Computed value</a>', get_live_config_url(prop_name))),
+    ]
+    if machine_admin_url:
+        links.append(str(format_html('<a href="{}">Edit override</a>', machine_admin_url)))
+
+    fallback = prop_info.get('x-fallback')
+    if isinstance(fallback, str) and fallback:
+        links.append(str(format_html('<a href="{}">Fallback: <code>{}</code></a>', get_live_config_url(fallback), fallback)))
+
+    aliases = prop_info.get('x-aliases') or []
+    if isinstance(aliases, list):
+        for alias in aliases:
+            if isinstance(alias, str) and alias:
+                links.append(str(format_html('<a href="{}">Alias: <code>{}</code></a>', get_live_config_url(alias), alias)))
+
+    default = prop_info.get('default')
+    if prop_name.endswith('_BINARY') and isinstance(default, str) and default:
+        links.append(str(format_html('<a href="{}">Binary: <code>{}</code></a>', get_environment_binary_url(default), default)))
+
+    return ' &nbsp; '.join(links)
+
+
+def render_config_properties_html(properties: dict[str, Any], machine_admin_url: str | None) -> str:
+    header_links = [
+        str(format_html('<a href="{}">Dependencies</a>', ENVIRONMENT_BINARIES_BASE_URL)),
+        str(format_html('<a href="{}">Installed Binaries</a>', INSTALLED_BINARIES_BASE_URL)),
+    ]
+    if machine_admin_url:
+        header_links.insert(0, str(format_html('<a href="{}">Machine Config Editor</a>', machine_admin_url)))
+
+    cards = [
+        f'<div style="margin: 0 0 16px 0;">{" &nbsp; | &nbsp; ".join(header_links)}</div>'
+    ]
+
+    for prop_name, prop_info in properties.items():
+        prop_type = prop_info.get('type', 'unknown')
+        if isinstance(prop_type, list):
+            prop_type = ' | '.join(str(type_name) for type_name in prop_type)
+        prop_desc = prop_info.get('description', '')
+
+        default_html = ''
+        if 'default' in prop_info:
+            default_html = str(format_html(
+                '<div style="margin-top: 6px;"><b>Default:</b> <code>{}</code></div>',
+                prop_info['default'],
+            ))
+
+        description_html = prop_desc or mark_safe('<span style="color: #6e7781;">(no description)</span>')
+        cards.append(str(format_html(
+            '<div style="margin: 0 0 14px 0; padding: 12px; background: #f6f8fa; border: 1px solid #d0d7de; border-radius: 6px;">'
+            '<div style="margin-bottom: 6px;">'
+            '<a href="{}" style="font-weight: 600;"><code>{}</code></a>'
+            ' <span style="color: #6e7781;">({})</span>'
+            '</div>'
+            '<div style="margin-bottom: 6px;">{}</div>'
+            '<div style="font-size: 0.95em;">{}</div>'
+            '{}'
+            '</div>',
+            get_live_config_url(prop_name),
+            prop_name,
+            prop_type,
+            description_html,
+            mark_safe(render_property_links(prop_name, prop_info, machine_admin_url)),
+            mark_safe(default_html),
+        )))
+
+    return ''.join(cards)
+
+
+def render_hook_links_html(plugin_name: str, hooks: list[str], source: str) -> str:
+    if not hooks:
+        return '<span style="color: #6e7781;">(none)</span>'
+
+    items = []
+    for hook_name in hooks:
+        if source == 'builtin':
+            items.append(str(format_html(
+                '<div style="margin: 0 0 8px 0;">'
+                '<a href="{}" target="_blank" rel="noopener noreferrer"><code>{}</code></a>'
+                '</div>',
+                get_plugin_hook_source_url(plugin_name, hook_name),
+                hook_name,
+            )))
+        else:
+            items.append(str(format_html(
+                '<div style="margin: 0 0 8px 0;"><code>{}</code></div>',
+                hook_name,
+            )))
+    return ''.join(items)
+
+
+def render_binary_detail_description(name: str, merged: dict[str, Any], db_binary: Any) -> str:
+    installed_binary_url = get_installed_binary_change_url(name, db_binary)
+
+    if installed_binary_url:
+        return str(format_html(
+            '<code>{}</code><br/>'
+            '<a href="{}">View Installed Binary Record</a>',
+            merged['abspath'],
+            installed_binary_url,
+        ))
+
+    return str(format_html('<code>{}</code>', merged['abspath']))
+
+
 def obj_to_yaml(obj: Any, indent: int = 0) -> str:
    indent_str = "  " * indent
    if indent == 0:
@@ -80,21 +337,41 @@ def obj_to_yaml(obj: Any, indent: int = 0) -> str:
        return f" {str(obj)}"


-def get_detected_binaries() -> Dict[str, Dict[str, Any]]:
-    """Detect available binaries using shutil.which."""
-    binaries = {}
+def canonical_binary_name(name: str) -> str:
+    return CANONICAL_BINARY_ALIASES.get(name, name)

-    for name in KNOWN_BINARIES:
-        path = shutil.which(name)
-        if path:
-            binaries[name] = {
-                'name': name,
-                'abspath': path,
-                'version': None,  # Could add version detection later
-                'is_available': True,
-            }

-    return binaries
+def _binary_sort_key(binary: Binary) -> tuple[int, int, int, Any]:
+    return (
+        int(binary.status == Binary.StatusChoices.INSTALLED),
+        int(bool(binary.version)),
+        int(bool(binary.abspath)),
+        binary.modified_at,
+    )
+
+
+def get_db_binaries_by_name() -> Dict[str, Binary]:
+    grouped: Dict[str, list[Binary]] = {}
+    for binary in Binary.objects.all():
+        grouped.setdefault(canonical_binary_name(binary.name), []).append(binary)
+
+    return {
+        name: max(records, key=_binary_sort_key)
+        for name, records in grouped.items()
+    }
+
+
+def serialize_binary_record(name: str, binary: Binary | None) -> Dict[str, Any]:
+    is_installed = bool(binary and binary.status == Binary.StatusChoices.INSTALLED)
+    return {
+        'name': canonical_binary_name(name),
+        'version': str(getattr(binary, 'version', '') or ''),
+        'binprovider': str(getattr(binary, 'binprovider', '') or ''),
+        'abspath': str(getattr(binary, 'abspath', '') or ''),
+        'sha256': str(getattr(binary, 'sha256', '') or ''),
+        'status': str(getattr(binary, 'status', '') or ''),
+        'is_available': is_installed and bool(getattr(binary, 'abspath', '') or ''),
+    }


 def get_filesystem_plugins() -> Dict[str, Dict[str, Any]]:
@@ -150,29 +427,18 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
        "Found Abspath": [],
    }

-    # Get binaries from database (previously detected/installed)
-    db_binaries = {b.name: b for b in Binary.objects.all()}
-
-    # Get currently detectable binaries
-    detected = get_detected_binaries()
-
-    # Merge and display
-    all_binary_names = sorted(set(list(db_binaries.keys()) + list(detected.keys())))
+    db_binaries = get_db_binaries_by_name()
+    all_binary_names = sorted(db_binaries.keys())

    for name in all_binary_names:
-        db_binary = db_binaries.get(name)
-        detected_binary = detected.get(name)
+        merged = serialize_binary_record(name, db_binaries.get(name))

        rows['Binary Name'].append(ItemLink(name, key=name))

-        if db_binary:
-            rows['Found Version'].append(f'✅ {db_binary.version}' if db_binary.version else '✅ found')
-            rows['Provided By'].append(db_binary.binprovider or 'PATH')
-            rows['Found Abspath'].append(str(db_binary.abspath or ''))
-        elif detected_binary:
-            rows['Found Version'].append('✅ found')
-            rows['Provided By'].append('PATH')
-            rows['Found Abspath'].append(detected_binary['abspath'])
+        if merged['is_available']:
+            rows['Found Version'].append(f"✅ {merged['version']}" if merged['version'] else '✅ found')
+            rows['Provided By'].append(merged['binprovider'] or '-')
+            rows['Found Abspath'].append(merged['abspath'] or '-')
        else:
            rows['Found Version'].append('❌ missing')
            rows['Provided By'].append('-')
@@ -187,41 +453,22 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
@render_with_item_view
 def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
    assert is_superuser(request), 'Must be a superuser to view configuration settings.'
+    key = canonical_binary_name(key)

-    # Try database first
-    try:
-        binary = Binary.objects.get(name=key)
-        section: SectionData = {
-            "name": binary.name,
-            "description": str(binary.abspath or ''),
-            "fields": {
-                'name': binary.name,
-                'binprovider': binary.binprovider,
-                'abspath': str(binary.abspath),
-                'version': binary.version,
-                'sha256': binary.sha256,
-            },
-            "help_texts": {},
-        }
-        return ItemContext(
-            slug=key,
-            title=key,
-            data=[section],
-        )
-    except Binary.DoesNotExist:
-        pass
+    db_binary = get_db_binaries_by_name().get(key)
+    merged = serialize_binary_record(key, db_binary)

-    # Try to detect from PATH
-    path = shutil.which(key)
-    if path:
+    if merged['is_available']:
        section: SectionData = {
            "name": key,
-            "description": path,
+            "description": mark_safe(render_binary_detail_description(key, merged, db_binary)),
            "fields": {
                'name': key,
-                'binprovider': 'PATH',
-                'abspath': path,
-                'version': 'unknown',
+                'binprovider': merged['binprovider'] or '-',
+                'abspath': merged['abspath'] or 'not found',
+                'version': merged['version'] or 'unknown',
+                'sha256': merged['sha256'],
+                'status': merged['status'],
            },
            "help_texts": {},
        }
@@ -233,12 +480,13 @@ def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:

    section: SectionData = {
        "name": key,
-        "description": "Binary not found",
+        "description": "No persisted Binary record found",
        "fields": {
            'name': key,
-            'binprovider': 'not installed',
-            'abspath': 'not found',
-            'version': 'N/A',
+            'binprovider': merged['binprovider'] or 'not recorded',
+            'abspath': merged['abspath'] or 'not recorded',
+            'version': merged['version'] or 'N/A',
+            'status': merged['status'] or 'unrecorded',
        },
        "help_texts": {},
    }
@@ -293,8 +541,6 @@ def plugins_list_view(request: HttpRequest, **kwargs) -> TableContext:

@render_with_item_view
 def plugin_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
-    import json
-
    assert is_superuser(request), 'Must be a superuser to view configuration settings.'

    plugins = get_filesystem_plugins()
@@ -308,45 +554,61 @@ def plugin_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
        )

    # Base fields that all plugins have
+    docs_url = get_plugin_docs_url(plugin['name'])
+    machine_admin_url = get_machine_admin_url()
    fields = {
        "id": plugin['id'],
        "name": plugin['name'],
        "source": plugin['source'],
-        "path": plugin['path'],
-        "hooks": ', '.join(plugin['hooks']),
    }

-    # Add config.json data if available
-    if plugin.get('config'):
-        config_json = json.dumps(plugin['config'], indent=2)
-        fields["config.json"] = mark_safe(
-            '<pre style="max-height: 600px; overflow-y: auto; background: #f5f5f5; '
-            f'padding: 10px; border-radius: 4px;"><code>{config_json}</code></pre>'
-        )
-
-        # Also extract and display individual config properties for easier viewing
-        if 'properties' in plugin['config']:
-            config_properties = plugin['config']['properties']
-            properties_summary = []
-            for prop_name, prop_info in config_properties.items():
-                prop_type = prop_info.get('type', 'unknown')
-                prop_desc = prop_info.get('description', '')
-                properties_summary.append(f"• {prop_name} ({prop_type}): {prop_desc}")
-
-            if properties_summary:
-                fields["Config Properties"] = mark_safe('<br/>'.join(properties_summary))
-
-    section: SectionData = {
+    sections: list[SectionData] = [{
        "name": plugin['name'],
-        "description": plugin['path'],
+        "description": format_html(
+            '<code>{}</code><br/><a href="{}" target="_blank" rel="noopener noreferrer">ABX Plugin Docs</a>',
+            plugin['path'],
+            docs_url,
+        ),
        "fields": fields,
        "help_texts": {},
-    }
+    }]
+
+    if plugin['hooks']:
+        sections.append({
+            "name": "Hooks",
+            "description": mark_safe(render_hook_links_html(plugin['name'], plugin['hooks'], plugin['source'])),
+            "fields": {},
+            "help_texts": {},
+        })
+
+    if plugin.get('config'):
+        sections.append({
+            "name": "Plugin Metadata",
+            "description": mark_safe(render_plugin_metadata_html(plugin['config'])),
+            "fields": {},
+            "help_texts": {},
+        })
+
+        sections.append({
+            "name": "config.json",
+            "description": mark_safe(render_highlighted_json_block(plugin['config'])),
+            "fields": {},
+            "help_texts": {},
+        })
+
+        config_properties = plugin['config'].get('properties', {})
+        if config_properties:
+            sections.append({
+                "name": "Config Properties",
+                "description": mark_safe(render_config_properties_html(config_properties, machine_admin_url)),
+                "fields": {},
+                "help_texts": {},
+            })

    return ItemContext(
        slug=key,
        title=plugin['name'],
-        data=[section],
+        data=sections,
    )


--- a/archivebox/core/admin_archiveresults.py
+++ b/archivebox/core/admin_archiveresults.py
@@ -1,14 +1,23 @@
 __package__ = 'archivebox.core'

+import html
+import json
 import os
+import shlex
 from pathlib import Path
+from urllib.parse import quote
+from functools import reduce
+from operator import and_

 from django.contrib import admin
+from django.db.models import Min, Q, TextField
+from django.db.models.functions import Cast
 from django.utils.html import format_html
 from django.utils.safestring import mark_safe
 from django.core.exceptions import ValidationError
 from django.urls import reverse, resolve
 from django.utils import timezone
+from django.utils.text import smart_split

 from archivebox.config import DATA_DIR
 from archivebox.config.common import SERVER_CONFIG
@@ -16,11 +25,71 @@ from archivebox.misc.paginators import AccelleratedPaginator
 from archivebox.base_models.admin import BaseModelAdmin
 from archivebox.hooks import get_plugin_icon
 from archivebox.core.host_utils import build_snapshot_url
+from archivebox.core.widgets import InlineTagEditorWidget
+from archivebox.core.views import LIVE_PLUGIN_BASE_URL


 from archivebox.core.models import ArchiveResult, Snapshot


+def _stringify_env_value(value) -> str:
+    if value is None:
+        return ''
+    if isinstance(value, str):
+        return value
+    return json.dumps(value, separators=(',', ':'))
+
+
+def _quote_shell_string(value: str) -> str:
+    return "'" + str(value).replace("'", "'\"'\"'") + "'"
+
+
+def _get_replay_source_url(result: ArchiveResult) -> str:
+    process_env = getattr(getattr(result, 'process', None), 'env', None) or {}
+    return str(process_env.get('SOURCE_URL') or result.snapshot.url or '')
+
+
+def build_abx_dl_display_command(result: ArchiveResult) -> str:
+    source_url = _get_replay_source_url(result)
+    plugin_name = str(result.plugin or '').strip()
+    if not plugin_name and not source_url:
+        return 'abx-dl'
+    if not source_url:
+        return f'abx-dl --plugins={plugin_name}'
+    return f'abx-dl --plugins={plugin_name} {_quote_shell_string(source_url)}'
+
+
+def build_abx_dl_replay_command(result: ArchiveResult) -> str:
+    display_command = build_abx_dl_display_command(result)
+    process = getattr(result, 'process', None)
+    env = getattr(process, 'env', None) or {}
+    env_items = ' '.join(
+        f'{key}={shlex.quote(_stringify_env_value(value))}'
+        for key, value in sorted(env.items())
+        if value is not None
+    )
+    snapshot_dir = shlex.quote(str(result.snapshot_dir))
+    if env_items:
+        return f'cd {snapshot_dir}; env {env_items} {display_command}'
+    return f'cd {snapshot_dir}; {display_command}'
+
+
+def get_plugin_admin_url(plugin_name: str) -> str:
+    from archivebox.hooks import BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR, iter_plugin_dirs
+
+    plugin_dir = next((path.resolve() for path in iter_plugin_dirs() if path.name == plugin_name), None)
+    if plugin_dir:
+        builtin_root = BUILTIN_PLUGINS_DIR.resolve()
+        if plugin_dir.is_relative_to(builtin_root):
+            return f'{LIVE_PLUGIN_BASE_URL}builtin.{quote(plugin_name)}/'
+
+        user_root = USER_PLUGINS_DIR.resolve()
+        if plugin_dir.is_relative_to(user_root):
+            return f'{LIVE_PLUGIN_BASE_URL}user.{quote(plugin_name)}/'
+
+    return f'{LIVE_PLUGIN_BASE_URL}builtin.{quote(plugin_name)}/'
+
+
 def render_archiveresults_list(archiveresults_qs, limit=50):
    """Render a nice inline list view of archive results with status, plugin, output, and actions."""

@@ -35,6 +104,9 @@ def render_archiveresults_list(archiveresults_qs, limit=50):
        'failed': ('#991b1b', '#fee2e2'),       # red
        'queued': ('#6b7280', '#f3f4f6'),       # gray
        'started': ('#92400e', '#fef3c7'),      # amber
+        'backoff': ('#92400e', '#fef3c7'),
+        'skipped': ('#475569', '#f1f5f9'),
+        'noresults': ('#475569', '#f1f5f9'),
    }

    rows = []
@@ -54,8 +126,10 @@ def render_archiveresults_list(archiveresults_qs, limit=50):
        if len(full_output) > 60:
            output_display += '...'

-        # Get full command as tooltip
-        cmd_str = ' '.join(result.cmd) if isinstance(result.cmd, list) else str(result.cmd or '-')
+        display_cmd = build_abx_dl_display_command(result)
+        replay_cmd = build_abx_dl_replay_command(result)
+        cmd_str_escaped = html.escape(display_cmd)
+        cmd_attr = html.escape(replay_cmd, quote=True)

        # Build output link - use embed_path() which checks output_files first
        embed_path = result.embed_path() if hasattr(result, 'embed_path') else None
@@ -77,7 +151,7 @@ def render_archiveresults_list(archiveresults_qs, limit=50):
                    <a href="{reverse('admin:core_archiveresult_change', args=[result.id])}"
                       style="color: #2563eb; text-decoration: none; font-family: ui-monospace, monospace; font-size: 11px;"
                       title="View/edit archive result">
-                        <code>{str(result.id)[:8]}</code>
+                        <code>{str(result.id)[-8:]}</code>
                    </a>
                </td>
                <td style="padding: 10px 12px; white-space: nowrap;">
@@ -140,7 +214,15 @@ def render_archiveresults_list(archiveresults_qs, limit=50):
                            <div style="font-size: 11px; color: #64748b; margin-top: 8px;">
                                <b>Command:</b>
                            </div>
-                            <pre style="margin: 0; padding: 8px; background: #1e293b; border-radius: 4px; color: #e2e8f0; font-size: 11px; white-space: pre-wrap; word-break: break-all;">{cmd_str}</pre>
+                            <div style="position: relative; margin: 0; padding: 8px 56px 8px 8px; background: #1e293b; border-radius: 4px;">
+                                <button type="button"
+                                        data-command="{cmd_attr}"
+                                        onclick="(function(btn){{var text=btn.dataset.command||''; if(navigator.clipboard&&navigator.clipboard.writeText){{navigator.clipboard.writeText(text);}} else {{var ta=document.createElement('textarea'); ta.value=text; document.body.appendChild(ta); ta.select(); document.execCommand('copy'); document.body.removeChild(ta);}}}})(this); return false;"
+                                        style="position: absolute; top: 6px; right: 6px; padding: 2px 8px; border: 0; border-radius: 4px; background: #334155; color: #e2e8f0; font-size: 11px; cursor: pointer;">
+                                    Copy
+                                </button>
+                                <code title="{cmd_attr}" style="display: block; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; color: #e2e8f0; font-size: 11px;">{cmd_str_escaped}</code>
+                            </div>
                        </div>
                    </details>
                </td>
@@ -165,7 +247,7 @@ def render_archiveresults_list(archiveresults_qs, limit=50):
            <table style="width: 100%; border-collapse: collapse; font-size: 14px;">
                <thead>
                    <tr style="background: #f8fafc; border-bottom: 2px solid #e2e8f0;">
-                        <th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">ID</th>
+                        <th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">Details</th>
                        <th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">Status</th>
                        <th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; width: 32px;"></th>
                        <th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">Plugin</th>
@@ -193,7 +275,7 @@ class ArchiveResultInline(admin.TabularInline):
    extra = 0
    sort_fields = ('end_ts', 'plugin', 'output_str', 'status', 'cmd_version')
    readonly_fields = ('id', 'result_id', 'completed', 'command', 'version')
-    fields = ('start_ts', 'end_ts', *readonly_fields, 'plugin', 'cmd', 'cmd_version', 'pwd', 'status', 'retry_at', 'output_str')
+    fields = ('start_ts', 'end_ts', *readonly_fields, 'plugin', 'cmd', 'cmd_version', 'pwd', 'status', 'output_str')
    # exclude = ('id',)
    ordering = ('end_ts',)
    show_change_link = True
@@ -259,10 +341,11 @@ class ArchiveResultInline(admin.TabularInline):


 class ArchiveResultAdmin(BaseModelAdmin):
-    list_display = ('id', 'created_at', 'snapshot_info', 'tags_str', 'status', 'plugin_with_icon', 'cmd_str', 'output_str')
+    list_display = ('details_link', 'created_at', 'snapshot_info', 'tags_inline', 'status_badge', 'plugin_with_icon', 'process_link', 'machine_link', 'cmd_str', 'output_str_display')
+    list_display_links = None
    sort_fields = ('id', 'created_at', 'plugin', 'status')
-    readonly_fields = ('cmd', 'cmd_version', 'pwd', 'cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'output_summary', 'plugin_with_icon')
-    search_fields = ('id', 'snapshot__url', 'plugin', 'output_str', 'cmd_version', 'cmd', 'snapshot__timestamp')
+    readonly_fields = ('cmd', 'cmd_version', 'pwd', 'cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'output_summary', 'plugin_with_icon', 'process_link')
+    search_fields = ()
    autocomplete_fields = ['snapshot']

    fieldsets = (
@@ -271,7 +354,7 @@ class ArchiveResultAdmin(BaseModelAdmin):
            'classes': ('card', 'wide'),
        }),
        ('Plugin', {
-            'fields': ('plugin', 'plugin_with_icon', 'status', 'retry_at'),
+            'fields': ('plugin_with_icon', 'process_link', 'status'),
            'classes': ('card',),
        }),
        ('Timing', {
@@ -305,8 +388,61 @@ class ArchiveResultAdmin(BaseModelAdmin):
        self.request = request
        return super().change_view(request, object_id, form_url, extra_context)

+    def get_queryset(self, request):
+        return (
+            super()
+            .get_queryset(request)
+            .select_related('snapshot', 'process')
+            .prefetch_related('snapshot__tags')
+            .annotate(snapshot_first_tag=Min('snapshot__tags__name'))
+        )
+
+    def get_search_results(self, request, queryset, search_term):
+        if not search_term:
+            return queryset, False
+
+        queryset = queryset.annotate(
+            snapshot_id_text=Cast('snapshot__id', output_field=TextField()),
+            snapshot_crawl_id_text=Cast('snapshot__crawl_id', output_field=TextField()),
+            output_json_text=Cast('output_json', output_field=TextField()),
+            cmd_text=Cast('process__cmd', output_field=TextField()),
+        )
+
+        search_bits = [
+            bit[1:-1] if len(bit) >= 2 and bit[0] == bit[-1] and bit[0] in {'"', "'"} else bit
+            for bit in smart_split(search_term)
+        ]
+        search_bits = [bit.strip() for bit in search_bits if bit.strip()]
+        if not search_bits:
+            return queryset, False
+
+        filters = []
+        for bit in search_bits:
+            filters.append(
+                Q(snapshot_id_text__icontains=bit)
+                | Q(snapshot__url__icontains=bit)
+                | Q(snapshot__tags__name__icontains=bit)
+                | Q(snapshot_crawl_id_text__icontains=bit)
+                | Q(plugin__icontains=bit)
+                | Q(hook_name__icontains=bit)
+                | Q(output_str__icontains=bit)
+                | Q(output_json_text__icontains=bit)
+                | Q(cmd_text__icontains=bit)
+            )
+
+        return queryset.filter(reduce(and_, filters)).distinct(), True
+
+    @admin.display(description='Details', ordering='id')
+    def details_link(self, result):
+        return format_html(
+            '<a href="{}"><code>{}</code></a>',
+            reverse('admin:core_archiveresult_change', args=[result.id]),
+            str(result.id)[-8:],
+        )
+
    @admin.display(
-        description='Snapshot Info'
+        description='Snapshot',
+        ordering='snapshot__url',
    )
    def snapshot_info(self, result):
        snapshot_id = str(result.snapshot_id)
@@ -325,20 +461,83 @@ class ArchiveResultAdmin(BaseModelAdmin):
    def tags_str(self, result):
        return result.snapshot.tags_str()

+    @admin.display(description='Tags', ordering='snapshot_first_tag')
+    def tags_inline(self, result):
+        widget = InlineTagEditorWidget(snapshot_id=str(result.snapshot_id), editable=False)
+        tags_html = widget.render(
+            name=f'tags_{result.snapshot_id}',
+            value=result.snapshot.tags.all(),
+            attrs={'id': f'tags_{result.snapshot_id}'},
+            snapshot_id=str(result.snapshot_id),
+        )
+        return mark_safe(f'<span class="tags-inline-editor">{tags_html}</span>')
+
+    @admin.display(description='Status', ordering='status')
+    def status_badge(self, result):
+        status = result.status or ArchiveResult.StatusChoices.QUEUED
+        return format_html(
+            '<span class="status-badge {} status-{}">{}</span>',
+            status,
+            status,
+            result.get_status_display() or status,
+        )
+
    @admin.display(description='Plugin', ordering='plugin')
    def plugin_with_icon(self, result):
        icon = get_plugin_icon(result.plugin)
        return format_html(
-            '<span title="{}">{}</span> {}',
+            '<a href="{}" title="{}">{}</a> <a href="{}"><code>{}</code></a>',
+            get_plugin_admin_url(result.plugin),
            result.plugin,
            icon,
+            get_plugin_admin_url(result.plugin),
            result.plugin,
        )

-    def cmd_str(self, result):
+    @admin.display(description='Process', ordering='process__pid')
+    def process_link(self, result):
+        if not result.process_id:
+            return '-'
+        process_label = result.process.pid if result.process and result.process.pid else '-'
        return format_html(
-            '<pre>{}</pre>',
-            ' '.join(result.cmd) if isinstance(result.cmd, list) else str(result.cmd),
+            '<a href="{}"><code>{}</code></a>',
+            reverse('admin:machine_process_change', args=[result.process_id]),
+            process_label,
+        )
+
+    @admin.display(description='Machine', ordering='process__machine__hostname')
+    def machine_link(self, result):
+        if not result.process_id or not result.process or not result.process.machine_id:
+            return '-'
+        machine = result.process.machine
+        return format_html(
+            '<a href="{}"><code>{}</code> {}</a>',
+            reverse('admin:machine_machine_change', args=[machine.id]),
+            str(machine.id)[:8],
+            machine.hostname,
+        )
+
+    @admin.display(description='Command')
+    def cmd_str(self, result):
+        display_cmd = build_abx_dl_display_command(result)
+        replay_cmd = build_abx_dl_replay_command(result)
+        return format_html(
+            '''
+            <div style="position: relative; width: 300px; min-width: 300px; max-width: 300px; overflow: hidden; box-sizing: border-box;">
+                <button type="button"
+                        data-command="{}"
+                        onclick="(function(btn){{var text=btn.dataset.command||''; if(navigator.clipboard&&navigator.clipboard.writeText){{navigator.clipboard.writeText(text);}} else {{var ta=document.createElement('textarea'); ta.value=text; document.body.appendChild(ta); ta.select(); document.execCommand('copy'); document.body.removeChild(ta);}}}})(this); return false;"
+                        style="position: absolute; top: 6px; right: 6px; z-index: 1; padding: 2px 8px; border: 0; border-radius: 4px; background: #e2e8f0; color: #334155; font-size: 11px; cursor: pointer;">
+                    Copy
+                </button>
+                <code title="{}" style="display: block; width: 100%; max-width: 100%; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; padding: 8px 56px 8px 8px; background: #f8fafc; border: 1px solid #e2e8f0; border-radius: 6px; font-size: 11px; box-sizing: border-box;">
+                    {}
+                </code>
+            </div>
+            ''',
+            replay_cmd,
+            replay_cmd,
+            display_cmd,
        )

    def output_display(self, result):
@@ -352,6 +551,27 @@ class ArchiveResultAdmin(BaseModelAdmin):
            result.output_str,
        )

+    @admin.display(description='Output', ordering='output_str')
+    def output_str_display(self, result):
+        output_text = str(result.output_str or '').strip()
+        if not output_text:
+            return '-'
+
+        live_path = result.embed_path() if hasattr(result, 'embed_path') else None
+        if live_path:
+            return format_html(
+                '<a href="{}" title="{}"><code>{}</code></a>',
+                build_snapshot_url(str(result.snapshot_id), live_path),
+                output_text,
+                output_text,
+            )
+
+        return format_html(
+            '<span title="{}">{}</span>',
+            output_text,
+            output_text,
+        )
+
    def output_summary(self, result):
        snapshot_dir = Path(DATA_DIR) / str(result.pwd).split('data/', 1)[-1]
        output_html = format_html(
--- a/archivebox/core/admin_site.py
+++ b/archivebox/core/admin_site.py
@@ -61,12 +61,14 @@ def register_admin_site():
    from archivebox.crawls.admin import register_admin as register_crawls_admin
    from archivebox.api.admin import register_admin as register_api_admin
    from archivebox.machine.admin import register_admin as register_machine_admin
+    from archivebox.personas.admin import register_admin as register_personas_admin
    from archivebox.workers.admin import register_admin as register_workers_admin

    register_core_admin(archivebox_admin)
    register_crawls_admin(archivebox_admin)
    register_api_admin(archivebox_admin)
    register_machine_admin(archivebox_admin)
+    register_personas_admin(archivebox_admin)
    register_workers_admin(archivebox_admin)

    return archivebox_admin
--- a/archivebox/core/admin_snapshots.py
+++ b/archivebox/core/admin_snapshots.py
@@ -6,6 +6,7 @@ from pathlib import Path

 from django.contrib import admin, messages
 from django.urls import path
+from django.shortcuts import get_object_or_404, redirect
 from django.utils.html import format_html
 from django.utils.safestring import mark_safe
 from django.utils import timezone
@@ -14,6 +15,7 @@ from django.db.models.functions import Coalesce
 from django import forms
 from django.template import Template, RequestContext
 from django.contrib.admin.helpers import ActionForm
+from django.middleware.csrf import get_token

 from archivebox.config import DATA_DIR
 from archivebox.config.common import SERVER_CONFIG
@@ -24,7 +26,7 @@ from archivebox.search.admin import SearchResultsAdminMixin
 from archivebox.core.host_utils import build_snapshot_url, build_web_url

 from archivebox.base_models.admin import BaseModelAdmin, ConfigEditorMixin
-from archivebox.workers.tasks import bg_archive_snapshots, bg_add
+from archivebox.workers.tasks import bg_archive_snapshot, bg_archive_snapshots, bg_add

 from archivebox.core.models import Tag, Snapshot, ArchiveResult
 from archivebox.core.admin_archiveresults import render_archiveresults_list
@@ -215,10 +217,23 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
    def get_urls(self):
        urls = super().get_urls()
        custom_urls = [
-            path('grid/', self.admin_site.admin_view(self.grid_view), name='grid')
+            path('grid/', self.admin_site.admin_view(self.grid_view), name='grid'),
+            path('<path:object_id>/redo-failed/', self.admin_site.admin_view(self.redo_failed_view), name='core_snapshot_redo_failed'),
        ]
        return custom_urls + urls

+    def redo_failed_view(self, request, object_id):
+        snapshot = get_object_or_404(Snapshot, pk=object_id)
+
+        if request.method == 'POST':
+            queued = bg_archive_snapshot(snapshot, overwrite=False)
+            messages.success(
+                request,
+                f"Queued {queued} snapshot for re-archiving. The background runner will process it.",
+            )
+
+        return redirect(snapshot.admin_change_url)
+
    # def get_queryset(self, request):
    #     # tags_qs = SnapshotTag.objects.all().select_related('tag')
    #     # prefetch = Prefetch('snapshottag_set', queryset=tags_qs)
@@ -312,6 +327,8 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
    def admin_actions(self, obj):
        summary_url = build_web_url(f'/{obj.archive_path}')
        results_url = build_web_url(f'/{obj.archive_path}/index.html#all')
+        redo_failed_url = f'/admin/core/snapshot/{obj.pk}/redo-failed/'
+        csrf_token = get_token(self.request)
        return format_html(
            '''
            <div style="display: flex; flex-wrap: wrap; gap: 12px; align-items: center;">
@@ -344,13 +361,15 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
                   onmouseout="this.style.background='#eff6ff';">
                    🆕 Archive Now
                </a>
-                <a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #ecfdf5; border: 1px solid #a7f3d0; border-radius: 8px; color: #065f46; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
-                   href="/admin/core/snapshot/?id__exact={}"
-                   title="Redo failed extractors (missing outputs)"
-                   onmouseover="this.style.background='#d1fae5';"
-                   onmouseout="this.style.background='#ecfdf5';">
-                    🔁 Redo Failed
-                </a>
+                <form action="{}" method="post" style="display: inline-flex; margin: 0;">
+                    <input type="hidden" name="csrfmiddlewaretoken" value="{}">
+                    <button type="submit" class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #ecfdf5; border: 1px solid #a7f3d0; border-radius: 8px; color: #065f46; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s; cursor: pointer;"
+                       title="Redo failed extractors (missing outputs)"
+                       onmouseover="this.style.background='#d1fae5';"
+                       onmouseout="this.style.background='#ecfdf5';">
+                        🔁 Redo Failed
+                    </button>
+                </form>
                <a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #fffbeb; border: 1px solid #fde68a; border-radius: 8px; color: #92400e; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
                   href="/admin/core/snapshot/?id__exact={}"
                   title="Re-run all extractors (overwrite existing)"
@@ -367,14 +386,15 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
                </a>
            </div>
            <p style="margin-top: 12px; font-size: 12px; color: #64748b;">
-                <b>Tip:</b> Action buttons link to the list view with this snapshot pre-selected. Select it and use the action dropdown to execute.
+                <b>Tip:</b> Redo Failed runs immediately. The other action buttons link to the list view with this snapshot pre-selected.
            </p>
            ''',
            summary_url,
            results_url,
            obj.url,
            obj.pk,
-            obj.pk,
+            redo_failed_url,
+            csrf_token,
            obj.pk,
            obj.pk,
        )
--- a/archivebox/core/admin_tags.py
+++ b/archivebox/core/admin_tags.py
@@ -1,63 +1,74 @@
 __package__ = 'archivebox.core'

-from django.contrib import admin
+from urllib.parse import quote
+
+from django import forms
+from django.contrib import admin, messages
+from django.contrib.admin.options import IS_POPUP_VAR
+from django.http import HttpRequest, HttpResponseRedirect
+from django.urls import reverse
 from django.utils.html import format_html
 from django.utils.safestring import mark_safe

-from archivebox.misc.paginators import AccelleratedPaginator
 from archivebox.base_models.admin import BaseModelAdmin
-
 from archivebox.core.models import SnapshotTag, Tag
+from archivebox.core.tag_utils import (
+    TAG_HAS_SNAPSHOTS_CHOICES,
+    TAG_SORT_CHOICES,
+    build_tag_cards,
+    get_tag_creator_choices,
+    get_tag_year_choices,
+    normalize_created_by_filter,
+    normalize_created_year_filter,
+    normalize_has_snapshots_filter,
+    normalize_tag_sort,
+)
+from archivebox.core.host_utils import build_snapshot_url


 class TagInline(admin.TabularInline):
    model = SnapshotTag
-    # fk_name = 'snapshot'
    fields = ('id', 'tag')
    extra = 1
-    # min_num = 1
    max_num = 1000
    autocomplete_fields = (
        'tag',
    )
-    

-# class AutocompleteTags:
-#     model = Tag
-#     search_fields = ['name']
-#     name = 'name'
-#     # source_field = 'name'
-#     remote_field = Tag._meta.get_field('name')

-# class AutocompleteTagsAdminStub:
-#     name = 'admin'
-    
-    
-# class TaggedItemInline(admin.TabularInline):
-#     readonly_fields = ('object_link',)
-#     fields = ('id', 'tag', 'content_type', 'object_id', *readonly_fields)
-#     model = TaggedItem
-#     extra = 1
-#     show_change_link = True
-    
-#     @admin.display(description='object')
-#     def object_link(self, obj):
-#         obj = obj.content_type.get_object_for_this_type(pk=obj.object_id)
-#         return format_html('<a href="/admin/{}/{}/{}/change"><b>[{}]</b></a>', obj._meta.app_label, obj._meta.model_name, obj.pk, str(obj))
+class TagAdminForm(forms.ModelForm):
+    class Meta:
+        model = Tag
+        fields = '__all__'
+        widgets = {
+            'name': forms.TextInput(attrs={
+                'placeholder': 'research, receipts, product-design...',
+                'autocomplete': 'off',
+                'spellcheck': 'false',
+                'data-tag-name-input': '1',
+            }),
+        }
+
+    def clean_name(self):
+        name = (self.cleaned_data.get('name') or '').strip()
+        if not name:
+            raise forms.ValidationError('Tag name is required.')
+        return name
+

-    
 class TagAdmin(BaseModelAdmin):
-    list_display = ('created_at', 'created_by', 'id', 'name', 'num_snapshots', 'snapshots')
+    form = TagAdminForm
+    change_list_template = 'admin/core/tag/change_list.html'
+    change_form_template = 'admin/core/tag/change_form.html'
+    list_display = ('name', 'num_snapshots', 'created_at', 'created_by')
    list_filter = ('created_at', 'created_by')
-    sort_fields = ('name', 'slug', 'id', 'created_by', 'created_at')
-    readonly_fields = ('slug', 'id', 'created_at', 'modified_at', 'snapshots')
    search_fields = ('id', 'name', 'slug')
-    actions = ['delete_selected', 'merge_tags']
-    ordering = ['-created_at']
-    # inlines = [TaggedItemInline]
+    readonly_fields = ('slug', 'id', 'created_at', 'modified_at', 'snapshots')
+    actions = ['delete_selected']
+    ordering = ['name', 'id']

    fieldsets = (
-        ('Tag Info', {
+        ('Tag', {
            'fields': ('name', 'slug'),
            'classes': ('card',),
        }),
@@ -65,112 +76,137 @@ class TagAdmin(BaseModelAdmin):
            'fields': ('id', 'created_by', 'created_at', 'modified_at'),
            'classes': ('card',),
        }),
-        ('Snapshots', {
+        ('Recent Snapshots', {
            'fields': ('snapshots',),
            'classes': ('card', 'wide'),
        }),
    )

-    paginator = AccelleratedPaginator
+    add_fieldsets = (
+        ('Tag', {
+            'fields': ('name',),
+            'classes': ('card', 'wide'),
+        }),
+        ('Metadata', {
+            'fields': ('created_by',),
+            'classes': ('card',),
+        }),
+    )

+    def get_fieldsets(self, request: HttpRequest, obj: Tag | None = None):
+        return self.fieldsets if obj else self.add_fieldsets

-    def num_snapshots(self, tag):
+    def changelist_view(self, request: HttpRequest, extra_context=None):
+        query = (request.GET.get('q') or '').strip()
+        sort = normalize_tag_sort((request.GET.get('sort') or 'created_desc').strip())
+        created_by = normalize_created_by_filter((request.GET.get('created_by') or '').strip())
+        year = normalize_created_year_filter((request.GET.get('year') or '').strip())
+        has_snapshots = normalize_has_snapshots_filter((request.GET.get('has_snapshots') or 'all').strip())
+        extra_context = {
+            **(extra_context or {}),
+            'initial_query': query,
+            'initial_sort': sort,
+            'initial_created_by': created_by,
+            'initial_year': year,
+            'initial_has_snapshots': has_snapshots,
+            'tag_sort_choices': TAG_SORT_CHOICES,
+            'tag_has_snapshots_choices': TAG_HAS_SNAPSHOTS_CHOICES,
+            'tag_created_by_choices': get_tag_creator_choices(),
+            'tag_year_choices': get_tag_year_choices(),
+            'initial_tag_cards': build_tag_cards(
+                query=query,
+                request=request,
+                sort=sort,
+                created_by=created_by,
+                year=year,
+                has_snapshots=has_snapshots,
+            ),
+            'tag_search_api_url': reverse('api-1:search_tags'),
+            'tag_create_api_url': reverse('api-1:tags_create'),
+        }
+        return super().changelist_view(request, extra_context=extra_context)
+
+    def render_change_form(self, request, context, add=False, change=False, form_url='', obj=None):
+        current_name = (request.POST.get('name') or '').strip()
+        if not current_name and obj:
+            current_name = obj.name
+
+        similar_tag_cards = build_tag_cards(query=current_name, request=request, limit=12) if current_name else build_tag_cards(request=request, limit=12)
+        if obj:
+            similar_tag_cards = [card for card in similar_tag_cards if card['id'] != obj.pk]
+
+        context.update({
+            'tag_search_api_url': reverse('api-1:search_tags'),
+            'tag_similar_cards': similar_tag_cards,
+            'tag_similar_query': current_name,
+        })
+        return super().render_change_form(request, context, add=add, change=change, form_url=form_url, obj=obj)
+
+    def response_add(self, request: HttpRequest, obj: Tag, post_url_continue=None):
+        if IS_POPUP_VAR in request.POST or '_continue' in request.POST or '_addanother' in request.POST:
+            return super().response_add(request, obj, post_url_continue=post_url_continue)
+
+        self.message_user(request, f'Tag "{obj.name}" saved.', level=messages.SUCCESS)
+        return self._redirect_to_changelist(obj.name)
+
+    def response_change(self, request: HttpRequest, obj: Tag):
+        if IS_POPUP_VAR in request.POST or '_continue' in request.POST or '_addanother' in request.POST or '_saveasnew' in request.POST:
+            return super().response_change(request, obj)
+
+        self.message_user(request, f'Tag "{obj.name}" updated.', level=messages.SUCCESS)
+        return self._redirect_to_changelist(obj.name)
+
+    def _redirect_to_changelist(self, query: str = '') -> HttpResponseRedirect:
+        changelist_url = reverse('admin:core_tag_changelist')
+        if query:
+            changelist_url = f'{changelist_url}?q={quote(query)}'
+        return HttpResponseRedirect(changelist_url)
+
+    @admin.display(description='Snapshots')
+    def snapshots(self, tag: Tag):
+        snapshots = tag.snapshot_set.select_related('crawl__created_by').order_by('-downloaded_at', '-created_at', '-pk')[:10]
+        total_count = tag.snapshot_set.count()
+        if not snapshots:
+            return mark_safe(
+                f'<p style="margin:0;color:#64748b;">No snapshots use this tag yet. '
+                f'<a href="/admin/core/snapshot/?tags__id__exact={tag.id}">Open filtered snapshot list</a>.</p>'
+            )
+
+        cards = []
+        for snapshot in snapshots:
+            title = (snapshot.title or '').strip() or snapshot.url
+            cards.append(format_html(
+                '''
+                <a href="{}" style="display:flex;align-items:center;gap:10px;padding:10px 12px;border:1px solid #e2e8f0;border-radius:12px;background:#fff;text-decoration:none;color:#0f172a;">
+                    <img src="{}" alt="" style="width:18px;height:18px;border-radius:4px;flex:0 0 auto;" onerror="this.style.display='none'">
+                    <span style="min-width:0;">
+                        <strong style="display:block;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;">{}</strong>
+                        <code style="display:block;color:#64748b;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;">{}</code>
+                    </span>
+                </a>
+                ''',
+                reverse('admin:core_snapshot_change', args=[snapshot.pk]),
+                build_snapshot_url(str(snapshot.pk), 'favicon.ico'),
+                title[:120],
+                snapshot.url[:120],
+            ))
+
+        cards.append(format_html(
+            '<a href="/admin/core/snapshot/?tags__id__exact={}" style="display:inline-flex;margin-top:10px;font-weight:600;">View all {} tagged snapshots</a>',
+            tag.id,
+            total_count,
+        ))
+        return mark_safe('<div style="display:grid;gap:10px;">' + ''.join(cards) + '</div>')
+
+    @admin.display(description='Snapshots', ordering='num_snapshots')
+    def num_snapshots(self, tag: Tag):
+        count = getattr(tag, 'num_snapshots', tag.snapshot_set.count())
        return format_html(
            '<a href="/admin/core/snapshot/?tags__id__exact={}">{} total</a>',
            tag.id,
-            tag.snapshot_set.count(),
+            count,
        )

-    def snapshots(self, tag):
-        total_count = tag.snapshot_set.count()
-        return mark_safe('<br/>'.join(
-            format_html(
-                '<code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a></code> {}',
-                snap.pk,
-                snap.downloaded_at.strftime('%Y-%m-%d %H:%M') if snap.downloaded_at else 'pending...',
-                snap.url[:64],
-            )
-            for snap in tag.snapshot_set.order_by('-downloaded_at')[:10]
-        ) + (f'<br/><a href="/admin/core/snapshot/?tags__id__exact={tag.id}">{total_count} total snapshots...<a>'))
-
-    # def get_urls(self):
-    #     urls = super().get_urls()
-    #     custom_urls = [
-    #         path(
-    #             "merge-tags/",
-    #             self.admin_site.admin_view(self.merge_tags_view),
-    #             name="taggit_tag_merge_tags",
-    #         ),
-    #     ]
-    #     return custom_urls + urls
-
-    # @admin.action(description="Merge selected tags")
-    # def merge_tags(self, request, queryset):
-    #     selected = request.POST.getlist(admin.helpers.ACTION_CHECKBOX_NAME)
-    #     if not selected:
-    #         self.message_user(request, "Please select at least one tag.")
-    #         return redirect(request.get_full_path())
-
-    #     selected_tag_ids = ",".join(selected)
-    #     redirect_url = f"{request.get_full_path()}merge-tags/"
-
-    #     request.session["selected_tag_ids"] = selected_tag_ids
-
-    #     return redirect(redirect_url)
-
-    # def merge_tags_view(self, request):
-    #     selected_tag_ids = request.session.get("selected_tag_ids", "").split(",")
-    #     if request.method == "POST":
-    #         form = MergeTagsForm(request.POST)
-    #         if form.is_valid():
-    #             new_tag_name = form.cleaned_data["new_tag_name"]
-    #             new_tag, created = Tag.objects.get_or_create(name=new_tag_name)
-    #             with transaction.atomic():
-    #                 for tag_id in selected_tag_ids:
-    #                     tag = Tag.objects.get(id=tag_id)
-    #                     tagged_items = TaggedItem.objects.filter(tag=tag)
-    #                     for tagged_item in tagged_items:
-    #                         if TaggedItem.objects.filter(
-    #                             tag=new_tag,
-    #                             content_type=tagged_item.content_type,
-    #                             object_id=tagged_item.object_id,
-    #                         ).exists():
-    #                             # we have the new tag as well, so we can just
-    #                             # remove the tag association
-    #                             tagged_item.delete()
-    #                         else:
-    #                             # point this taggedItem to the new one
-    #                             tagged_item.tag = new_tag
-    #                             tagged_item.save()
-                        
-    #                     # delete the old tag
-    #                     if tag.id != new_tag.id:
-    #                         tag.delete()
-
-    #             self.message_user(request, "Tags have been merged", level="success")
-    #             # clear the selected_tag_ids from session after merge is complete
-    #             request.session.pop("selected_tag_ids", None)
-
-    #             return redirect("..")
-    #         else:
-    #             self.message_user(request, "Form is invalid.", level="error")
-
-    #     context = {
-    #         "form": MergeTagsForm(),
-    #         "selected_tag_ids": selected_tag_ids,
-    #     }
-    #     return render(request, "admin/taggit/merge_tags_form.html", context)
-
-
-# @admin.register(SnapshotTag, site=archivebox_admin)
-# class SnapshotTagAdmin(BaseModelAdmin):
-#     list_display = ('id', 'snapshot', 'tag')
-#     sort_fields = ('id', 'snapshot', 'tag')
-#     search_fields = ('id', 'snapshot_id', 'tag_id')
-#     fields = ('snapshot', 'id')
-#     actions = ['delete_selected']
-#     ordering = ['-id']
-

 def register_admin(admin_site):
    admin_site.register(Tag, TagAdmin)
--- a/archivebox/core/forms.py
+++ b/archivebox/core/forms.py
@@ -1,12 +1,16 @@
 __package__ = 'archivebox.core'

 from django import forms
+from django.utils.html import format_html

-from archivebox.misc.util import URL_REGEX
+from archivebox.misc.util import URL_REGEX, find_all_urls
 from taggit.utils import edit_string_for_tags, parse_tags
 from archivebox.base_models.admin import KeyValueWidget
 from archivebox.crawls.schedule_utils import validate_schedule
-from archivebox.hooks import get_plugins
+from archivebox.config.common import SEARCH_BACKEND_CONFIG
+from archivebox.core.widgets import TagEditorWidget, URLFiltersWidget
+from archivebox.hooks import get_plugins, discover_plugin_configs, get_plugin_icon
+from archivebox.personas.models import Persona

 DEPTH_CHOICES = (
    ('0', 'depth = 0 (archive just these URLs)'),
@@ -22,6 +26,22 @@ def get_plugin_choices():
    return [(name, name) for name in get_plugins()]


+def get_plugin_choice_label(plugin_name: str, plugin_configs: dict[str, dict]) -> str:
+    schema = plugin_configs.get(plugin_name, {})
+    description = str(schema.get('description') or '').strip()
+    if not description:
+        return plugin_name
+    icon_html = get_plugin_icon(plugin_name)
+
+    return format_html(
+        '<span class="plugin-choice-icon">{}</span><span class="plugin-choice-name">{}</span><a class="plugin-choice-description" href="https://archivebox.github.io/abx-plugins/#{}" target="_blank" rel="noopener noreferrer">{}</a>',
+        icon_html,
+        plugin_name,
+        plugin_name,
+        description,
+    )
+
+
 def get_choice_field(form: forms.Form, name: str) -> forms.ChoiceField:
    field = form.fields[name]
    if not isinstance(field, forms.ChoiceField):
@@ -31,22 +51,19 @@ def get_choice_field(form: forms.Form, name: str) -> forms.ChoiceField:

 class AddLinkForm(forms.Form):
    # Basic fields
-    url = forms.RegexField(
-        label="URLs (one per line)",
-        regex=URL_REGEX,
-        min_length=6,
+    url = forms.CharField(
+        label="URLs",
        strip=True,
-        widget=forms.Textarea,
+        widget=forms.Textarea(attrs={
+            'data-url-regex': URL_REGEX.pattern,
+        }),
        required=True
    )
    tag = forms.CharField(
-        label="Tags (comma separated tag1,tag2,tag3)",
+        label="Tags",
        strip=True,
        required=False,
-        widget=forms.TextInput(attrs={
-            'list': 'tag-datalist',
-            'autocomplete': 'off',
-        })
+        widget=TagEditorWidget(),
    )
    depth = forms.ChoiceField(
        label="Archive depth",
@@ -58,11 +75,15 @@ class AddLinkForm(forms.Form):
        label="Notes",
        strip=True,
        required=False,
-        widget=forms.Textarea(attrs={
-            'rows': 3,
-            'placeholder': 'Optional notes about this crawl (e.g., purpose, project name, context...)',
+        widget=forms.TextInput(attrs={
+            'placeholder': 'Optional notes about this crawl',
        })
    )
+    url_filters = forms.Field(
+        label="URL allowlist / denylist",
+        required=False,
+        widget=URLFiltersWidget(source_selector='textarea[name="url"]'),
+    )

    # Plugin groups
    chrome_plugins = forms.MultipleChoiceField(
@@ -111,24 +132,15 @@ class AddLinkForm(forms.Form):
            'placeholder': 'e.g., daily, weekly, 0 */6 * * * (every 6 hours)',
        })
    )
-    persona = forms.CharField(
+    persona = forms.ModelChoiceField(
        label="Persona (authentication profile)",
-        max_length=100,
-        initial='Default',
-        required=False,
-    )
-    overwrite = forms.BooleanField(
-        label="Overwrite existing snapshots",
-        initial=False,
-        required=False,
-    )
-    update = forms.BooleanField(
-        label="Update/retry previously failed URLs",
-        initial=False,
        required=False,
+        queryset=Persona.objects.none(),
+        empty_label=None,
+        to_field_name='name',
    )
    index_only = forms.BooleanField(
-        label="Index only (don't archive yet)",
+        label="Index only dry run (add crawl but don't archive yet)",
        initial=False,
        required=False,
    )
@@ -142,11 +154,13 @@ class AddLinkForm(forms.Form):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

-        # Import at runtime to avoid circular imports
-        from archivebox.config.common import ARCHIVING_CONFIG
+        default_persona = Persona.get_or_create_default()
+        self.fields['persona'].queryset = Persona.objects.order_by('name')
+        self.fields['persona'].initial = default_persona.name

        # Get all plugins
        all_plugins = get_plugins()
+        plugin_configs = discover_plugin_configs()

        # Define plugin groups
        chrome_dependent = {
@@ -170,26 +184,28 @@ class AddLinkForm(forms.Form):

        # Populate plugin field choices
        get_choice_field(self, 'chrome_plugins').choices = [
-            (p, p) for p in sorted(all_plugins) if p in chrome_dependent
+            (p, get_plugin_choice_label(p, plugin_configs)) for p in sorted(all_plugins) if p in chrome_dependent
        ]
        get_choice_field(self, 'archiving_plugins').choices = [
-            (p, p) for p in sorted(all_plugins) if p in archiving
+            (p, get_plugin_choice_label(p, plugin_configs)) for p in sorted(all_plugins) if p in archiving
        ]
        get_choice_field(self, 'parsing_plugins').choices = [
-            (p, p) for p in sorted(all_plugins) if p in parsing
+            (p, get_plugin_choice_label(p, plugin_configs)) for p in sorted(all_plugins) if p in parsing
        ]
        get_choice_field(self, 'search_plugins').choices = [
-            (p, p) for p in sorted(all_plugins) if p in search
+            (p, get_plugin_choice_label(p, plugin_configs)) for p in sorted(all_plugins) if p in search
        ]
        get_choice_field(self, 'binary_plugins').choices = [
-            (p, p) for p in sorted(all_plugins) if p in binary
+            (p, get_plugin_choice_label(p, plugin_configs)) for p in sorted(all_plugins) if p in binary
        ]
        get_choice_field(self, 'extension_plugins').choices = [
-            (p, p) for p in sorted(all_plugins) if p in extensions
+            (p, get_plugin_choice_label(p, plugin_configs)) for p in sorted(all_plugins) if p in extensions
        ]

-        # Set update default from config
-        self.fields['update'].initial = not ARCHIVING_CONFIG.ONLY_NEW
+        required_search_plugin = f'search_backend_{SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE}'.strip()
+        search_choices = [choice[0] for choice in get_choice_field(self, 'search_plugins').choices]
+        if required_search_plugin in search_choices:
+            get_choice_field(self, 'search_plugins').initial = [required_search_plugin]

    def clean(self):
        cleaned_data = super().clean() or {}
@@ -207,6 +223,23 @@ class AddLinkForm(forms.Form):

        return cleaned_data

+    def clean_url(self):
+        value = self.cleaned_data.get('url') or ''
+        urls = '\n'.join(find_all_urls(value))
+        if not urls:
+            raise forms.ValidationError('Enter at least one valid URL.')
+        return urls
+
+    def clean_url_filters(self):
+        from archivebox.crawls.models import Crawl
+
+        value = self.cleaned_data.get('url_filters') or {}
+        return {
+            'allowlist': '\n'.join(Crawl.split_filter_patterns(value.get('allowlist', ''))),
+            'denylist': '\n'.join(Crawl.split_filter_patterns(value.get('denylist', ''))),
+            'same_domain_only': bool(value.get('same_domain_only')),
+        }
+
    def clean_schedule(self):
        schedule = (self.cleaned_data.get('schedule') or '').strip()
        if not schedule:
--- a/archivebox/core/host_utils.py
+++ b/archivebox/core/host_utils.py
@@ -163,6 +163,10 @@ def get_api_base_url(request=None) -> str:
    return _build_base_url_for_host(get_api_host(), request=request)


+def get_public_base_url(request=None) -> str:
+    return _build_base_url_for_host(get_public_host(), request=request)
+
+
 # Backwards-compat aliases (archive == web)
 def get_archive_base_url(request=None) -> str:
    return get_web_base_url(request=request)
--- a/archivebox/core/migrations/0032_remove_archiveresult_retry_at.py
+++ b/archivebox/core/migrations/0032_remove_archiveresult_retry_at.py
@@ -0,0 +1,15 @@
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("core", "0031_add_archiveresult_snapshot_status_index"),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name="archiveresult",
+            name="retry_at",
+        ),
+    ]
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -36,7 +36,7 @@ from archivebox.base_models.models import (
 from archivebox.workers.models import ModelWithStateMachine, BaseStateMachine
 from archivebox.workers.tasks import bg_archive_snapshot
 from archivebox.crawls.models import Crawl
-from archivebox.machine.models import NetworkInterface, Binary
+from archivebox.machine.models import Binary



@@ -60,32 +60,41 @@ class Tag(ModelWithUUID):
    def __str__(self):
        return self.name

+    def _generate_unique_slug(self) -> str:
+        base_slug = slugify(self.name) or 'tag'
+        existing = Tag.objects.filter(slug__startswith=base_slug)
+        if self.pk:
+            existing = existing.exclude(pk=self.pk)
+        existing_slugs = set(existing.values_list("slug", flat=True))
+
+        slug = base_slug
+        i = 1
+        while slug in existing_slugs:
+            slug = f"{base_slug}_{i}"
+            i += 1
+        return slug
+
    def save(self, *args, **kwargs):
-        is_new = self._state.adding
-        if is_new:
-            self.slug = slugify(self.name)
-            existing = set(Tag.objects.filter(slug__startswith=self.slug).values_list("slug", flat=True))
-            i = None
-            while True:
-                slug = f"{slugify(self.name)}_{i}" if i else slugify(self.name)
-                if slug not in existing:
-                    self.slug = slug
-                    break
-                i = (i or 0) + 1
+        existing_name = None
+        if self.pk:
+            existing_name = Tag.objects.filter(pk=self.pk).values_list('name', flat=True).first()
+
+        if not self.slug or existing_name != self.name:
+            self.slug = self._generate_unique_slug()
        super().save(*args, **kwargs)

-        if is_new:
-            from archivebox.misc.logging_util import log_worker_event
-            log_worker_event(
-                worker_type='DB',
-                event='Created Tag',
-                indent_level=0,
-                metadata={
-                    'id': self.id,
-                    'name': self.name,
-                    'slug': self.slug,
-                },
-            )
+        # if is_new:
+        #     from archivebox.misc.logging_util import log_worker_event
+        #     log_worker_event(
+        #         worker_type='DB',
+        #         event='Created Tag',
+        #         indent_level=0,
+        #         metadata={
+        #             'id': self.id,
+        #             'name': self.name,
+        #             'slug': self.slug,
+        #         },
+        #     )

    @property
    def api_url(self) -> str:
@@ -364,7 +373,6 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
        return Binary.objects.filter(process_set__archiveresult__snapshot_id=self.id).distinct()

    def save(self, *args, **kwargs):
-        is_new = self._state.adding
        if not self.bookmarked_at:
            self.bookmarked_at = self.created_at or timezone.now()
        if not self.timestamp:
@@ -393,24 +401,25 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea

        super().save(*args, **kwargs)
        self.ensure_legacy_archive_symlink()
-        if self.url not in self.crawl.urls:
+        existing_urls = {url for _raw_line, url in self.crawl._iter_url_lines() if url}
+        if self.crawl.url_passes_filters(self.url, snapshot=self) and self.url not in existing_urls:
            self.crawl.urls += f'\n{self.url}'
            self.crawl.save()

-        if is_new:
-            from archivebox.misc.logging_util import log_worker_event
-            log_worker_event(
-                worker_type='DB',
-                event='Created Snapshot',
-                indent_level=2,
-                url=self.url,
-                metadata={
-                    'id': str(self.id),
-                    'crawl_id': str(self.crawl_id),
-                    'depth': self.depth,
-                    'status': self.status,
-                },
-            )
+        # if is_new:
+        #     from archivebox.misc.logging_util import log_worker_event
+        #     log_worker_event(
+        #         worker_type='DB',
+        #         event='Created Snapshot',
+        #         indent_level=2,
+        #         url=self.url,
+        #         metadata={
+        #             'id': str(self.id),
+        #             'crawl_id': str(self.crawl_id),
+        #             'depth': self.depth,
+        #             'status': self.status,
+        #         },
+        #     )

    # =========================================================================
    # Filesystem Migration Methods
@@ -1528,16 +1537,6 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
        """
        Execute snapshot by creating pending ArchiveResults for all enabled hooks.

-        Called by: SnapshotMachine.enter_started()
-
-        Hook Lifecycle:
-            1. discover_hooks('Snapshot') → finds all plugin hooks
-            2. For each hook:
-               - Create ArchiveResult with status=QUEUED
-               - Store hook_name (e.g., 'on_Snapshot__50_wget.py')
-            3. ArchiveResults execute independently via ArchiveResultMachine
-            4. Hook execution happens in ArchiveResult.run(), NOT here
-
        Returns:
            list[ArchiveResult]: Newly created pending results
        """
@@ -1602,7 +1601,6 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
            'url': self.url,
            'title': self.title,
            'tags': self.tags_str(),
-            'tags_str': self.tags_str(),
            'bookmarked_at': self.bookmarked_at.isoformat() if self.bookmarked_at else None,
            'created_at': self.created_at.isoformat() if self.created_at else None,
            'timestamp': self.timestamp,
@@ -1672,7 +1670,9 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
                # ID not found, fall through to create-by-URL logic
                pass

-        url = record.get('url')
+        from archivebox.misc.util import fix_url_from_markdown
+
+        url = fix_url_from_markdown(str(record.get('url') or '').strip())
        if not url:
            return None

@@ -1807,7 +1807,6 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
                defaults={
                    'plugin': plugin,
                    'status': ArchiveResult.INITIAL_STATE,
-                    'retry_at': timezone.now(),
                },
            )
            if archiveresult.status == ArchiveResult.INITIAL_STATE:
@@ -1853,11 +1852,12 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
        failed = results.filter(status='failed').count()
        running = results.filter(status='started').count()
        skipped = results.filter(status='skipped').count()
+        noresults = results.filter(status='noresults').count()
        total = results.count()
-        pending = total - succeeded - failed - running - skipped
+        pending = total - succeeded - failed - running - skipped - noresults

-        # Calculate percentage (succeeded + failed + skipped as completed)
-        completed = succeeded + failed + skipped
+        # Calculate percentage (succeeded + failed + skipped + noresults as completed)
+        completed = succeeded + failed + skipped + noresults
        percent = int((completed / total * 100) if total > 0 else 0)

        # Sum output sizes
@@ -1875,47 +1875,38 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
            'running': running,
            'pending': pending,
            'skipped': skipped,
+            'noresults': noresults,
            'percent': percent,
            'output_size': output_size,
            'is_sealed': is_sealed,
        }

-    def retry_failed_archiveresults(self, retry_at: Optional[datetime] = None) -> int:
+    def retry_failed_archiveresults(self) -> int:
        """
        Reset failed/skipped ArchiveResults to queued for retry.

-        This enables seamless retry of the entire extraction pipeline:
-        - Resets FAILED and SKIPPED results to QUEUED
-        - Sets retry_at so workers pick them up
-        - Plugins run in order (numeric prefix)
-        - Each plugin checks its dependencies at runtime
-
-        Dependency handling (e.g., chrome → screenshot):
-        - Plugins check if required outputs exist before running
-        - If dependency output missing → plugin returns 'skipped'
-        - On retry, if dependency now succeeds → dependent can run
-
        Returns count of ArchiveResults reset.
        """
-        retry_at = retry_at or timezone.now()
-
        count = self.archiveresult_set.filter(
            status__in=[
                ArchiveResult.StatusChoices.FAILED,
                ArchiveResult.StatusChoices.SKIPPED,
+                ArchiveResult.StatusChoices.NORESULTS,
            ]
        ).update(
            status=ArchiveResult.StatusChoices.QUEUED,
-            retry_at=retry_at,
-            output=None,
+            output_str='',
+            output_json=None,
+            output_files={},
+            output_size=0,
+            output_mimetypes='',
            start_ts=None,
            end_ts=None,
        )

-        # Also reset the snapshot and current_step so it gets re-checked from the beginning
        if count > 0:
            self.status = self.StatusChoices.STARTED
-            self.retry_at = retry_at
+            self.retry_at = timezone.now()
            self.current_step = 0  # Reset to step 0 for retry
            self.save(update_fields=['status', 'retry_at', 'current_step', 'modified_at'])

@@ -2228,6 +2219,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
            best_result = outputs[0]
        context = {
            **self.to_dict(extended=True),
+            'snapshot': self,
            'title': htmlencode(self.title or (self.base_url if self.is_archived else TITLE_LOADING_MSG)),
            'url_str': htmlencode(urldecode(self.base_url)),
            'archive_url': urlencode(f'warc/{self.timestamp}' or (self.domain if self.is_archived else '')) or 'about:blank',
@@ -2275,8 +2267,8 @@ class SnapshotMachine(BaseStateMachine):
    │     • discover_hooks('Snapshot') → finds all plugin hooks   │
    │     • create_pending_archiveresults() → creates ONE         │
    │       ArchiveResult per hook (NO execution yet)             │
-    │  2. ArchiveResults process independently with their own     │
-    │     state machines (see ArchiveResultMachine)               │
+    │  2. The shared abx-dl runner executes hooks and the         │
+    │     projector updates ArchiveResult rows from events        │
    │  3. Advance through steps 0-9 as foreground hooks complete  │
    └─────────────────────────────────────────────────────────────┘
                            ↓ tick() when is_finished()
@@ -2358,7 +2350,7 @@ class SnapshotMachine(BaseStateMachine):
                cast(Any, crawl).sm.seal()


-class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithStateMachine):
+class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes):
    class StatusChoices(models.TextChoices):
        QUEUED = 'queued', 'Queued'
        STARTED = 'started', 'Started'
@@ -2366,6 +2358,17 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
        SUCCEEDED = 'succeeded', 'Succeeded'
        FAILED = 'failed', 'Failed'
        SKIPPED = 'skipped', 'Skipped'
+        NORESULTS = 'noresults', 'No Results'
+
+    INITIAL_STATE = StatusChoices.QUEUED
+    ACTIVE_STATE = StatusChoices.STARTED
+    FINAL_STATES = (
+        StatusChoices.SUCCEEDED,
+        StatusChoices.FAILED,
+        StatusChoices.SKIPPED,
+        StatusChoices.NORESULTS,
+    )
+    FINAL_OR_ACTIVE_STATES = (*FINAL_STATES, ACTIVE_STATE)

    @classmethod
    def get_plugin_choices(cls):
@@ -2404,16 +2407,10 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
    start_ts = models.DateTimeField(default=None, null=True, blank=True)
    end_ts = models.DateTimeField(default=None, null=True, blank=True)

-    status = ModelWithStateMachine.StatusField(choices=StatusChoices.choices, default=StatusChoices.QUEUED)
-    retry_at = ModelWithStateMachine.RetryAtField(default=timezone.now)
+    status = models.CharField(max_length=16, choices=StatusChoices.choices, default=StatusChoices.QUEUED, db_index=True)
    notes = models.TextField(blank=True, null=False, default='')
    # output_dir is computed via @property from snapshot.output_dir / plugin

-    state_machine_name = 'archivebox.core.models.ArchiveResultMachine'
-    retry_at_field_name = 'retry_at'
-    state_field_name = 'status'
-    active_state = StatusChoices.STARTED
-
    snapshot_id: uuid.UUID
    process_id: uuid.UUID | None

@@ -2421,7 +2418,6 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
        ModelWithOutputDir.Meta,
        ModelWithConfig.Meta,
        ModelWithNotes.Meta,
-        ModelWithStateMachine.Meta,
    ):
        app_label = 'core'
        verbose_name = 'Archive Result'
@@ -2516,40 +2512,24 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
            return None

    def save(self, *args, **kwargs):
-        is_new = self._state.adding
-
-        # Create Process record if this is a new ArchiveResult and no process exists yet
-        if is_new and not self.process_id:
-            from archivebox.machine.models import Process, Machine
-
-            process = Process.objects.create(
-                machine=Machine.current(),
-                pwd=str(Path(self.snapshot.output_dir) / self.plugin),
-                cmd=[],  # Will be set by run()
-                status='queued',
-                timeout=120,
-                env={},
-            )
-            self.process = process
-
        # Skip ModelWithOutputDir.save() to avoid creating index.json in plugin directories
        # Call the Django Model.save() directly instead
        models.Model.save(self, *args, **kwargs)

-        if is_new:
-            from archivebox.misc.logging_util import log_worker_event
-            log_worker_event(
-                worker_type='DB',
-                event='Created ArchiveResult',
-                indent_level=3,
-                plugin=self.plugin,
-                metadata={
-                    'id': str(self.id),
-                    'snapshot_id': str(self.snapshot_id),
-                    'snapshot_url': str(self.snapshot.url)[:64],
-                    'status': self.status,
-                },
-            )
+        # if is_new:
+        #     from archivebox.misc.logging_util import log_worker_event
+        #     log_worker_event(
+        #         worker_type='DB',
+        #         event='Created ArchiveResult',
+        #         indent_level=3,
+        #         plugin=self.plugin,
+        #         metadata={
+        #             'id': str(self.id),
+        #             'snapshot_id': str(self.snapshot_id),
+        #             'snapshot_url': str(self.snapshot.url)[:64],
+        #             'status': self.status,
+        #         },
+        #     )

    @cached_property
    def snapshot_dir(self):
@@ -2566,6 +2546,28 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
    def get_absolute_url(self):
        return f'/{self.snapshot.archive_path}/{self.plugin}'

+    def reset_for_retry(self, *, save: bool = True) -> None:
+        self.status = self.StatusChoices.QUEUED
+        self.output_str = ''
+        self.output_json = None
+        self.output_files = {}
+        self.output_size = 0
+        self.output_mimetypes = ''
+        self.start_ts = None
+        self.end_ts = None
+        if save:
+            self.save(update_fields=[
+                'status',
+                'output_str',
+                'output_json',
+                'output_files',
+                'output_size',
+                'output_mimetypes',
+                'start_ts',
+                'end_ts',
+                'modified_at',
+            ])
+
    @property
    def plugin_module(self) -> Any | None:
        # Hook scripts are now used instead of Python plugin modules
@@ -2723,11 +2725,6 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi

        return None

-    def create_output_dir(self):
-        output_dir = Path(self.snapshot_dir) / self.plugin
-        output_dir.mkdir(parents=True, exist_ok=True)
-        return output_dir
-
    @property
    def output_dir_name(self) -> str:
        return self.plugin
@@ -2782,134 +2779,17 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
    def save_search_index(self):
        pass

-    def cascade_health_update(self, success: bool):
-        """Update health stats for parent Snapshot, Crawl, and execution infrastructure (Binary, Machine, NetworkInterface)."""
-        # Update archival hierarchy
-        self.snapshot.increment_health_stats(success)
-        self.snapshot.crawl.increment_health_stats(success)
-
-        # Update execution infrastructure
-        if self.binary:
-            self.binary.increment_health_stats(success)
-            if self.binary.machine:
-                self.binary.machine.increment_health_stats(success)
-
-        if self.iface:
-            self.iface.increment_health_stats(success)
-
-    def run(self):
-        """
-        Execute this ArchiveResult's hook and update status.
-
-        If self.hook_name is set, runs only that specific hook.
-        If self.hook_name is empty, discovers and runs all hooks for self.plugin (backwards compat).
-
-        Updates status/output fields, queues discovered URLs, and triggers indexing.
-        """
-        from django.utils import timezone
-        from archivebox.hooks import BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR, run_hook
-        from archivebox.config.configset import get_config
-
-        # Get merged config with proper context
-        config = get_config(
-            crawl=self.snapshot.crawl,
-            snapshot=self.snapshot,
-        )
-
-        # Determine which hook(s) to run
-        hooks = []
-
-        if self.hook_name:
-            # SPECIFIC HOOK MODE: Find the specific hook by name
-            for base_dir in (BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR):
-                if not base_dir.exists():
-                    continue
-                plugin_dir = base_dir / self.plugin
-                if plugin_dir.exists():
-                    hook_path = plugin_dir / self.hook_name
-                    if hook_path.exists():
-                        hooks.append(hook_path)
-                        break
-        else:
-            # LEGACY MODE: Discover all hooks for this plugin (backwards compatibility)
-            for base_dir in (BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR):
-                if not base_dir.exists():
-                    continue
-                plugin_dir = base_dir / self.plugin
-                if plugin_dir.exists():
-                    matches = list(plugin_dir.glob('on_Snapshot__*.*'))
-                    if matches:
-                        hooks.extend(sorted(matches))
-
-        if not hooks:
-            self.status = self.StatusChoices.FAILED
-            if self.hook_name:
-                self.output_str = f'Hook not found: {self.plugin}/{self.hook_name}'
-            else:
-                self.output_str = f'No hooks found for plugin: {self.plugin}'
-            self.retry_at = None
-            self.save()
-            return
-
-        # Output directory is plugin_dir for the hook output
-        plugin_dir = Path(self.snapshot.output_dir) / self.plugin
-
-        start_ts = timezone.now()
-        process = None
-
-        for hook in hooks:
-            # Run hook using Process.launch() - returns Process model
-            process = run_hook(
-                hook,
-                output_dir=plugin_dir,
-                config=config,
-                url=self.snapshot.url,
-                snapshot_id=str(self.snapshot.id),
-                crawl_id=str(self.snapshot.crawl.id),
-                depth=self.snapshot.depth,
-            )
-
-            # Link ArchiveResult to Process
-            self.process = process
-            self.start_ts = start_ts
-            self.save(update_fields=['process_id', 'start_ts', 'modified_at'])
-
-        if not process:
-            # No hooks ran
-            self.status = self.StatusChoices.FAILED
-            self.output_str = 'No hooks executed'
-            self.save()
-            return
-
-        # Update status based on hook execution
-        if process.status == process.StatusChoices.RUNNING:
-            # BACKGROUND HOOK - still running, return immediately
-            # Status is already STARTED from enter_started(), will be finalized by Snapshot.cleanup()
-            return
-
-        # FOREGROUND HOOK - completed, update from filesystem
-        self.update_from_output()
-
-        # Clean up empty output directory if no files were created
-        if plugin_dir.exists() and not self.output_files:
-            try:
-                if not any(plugin_dir.iterdir()):
-                    plugin_dir.rmdir()
-            except (OSError, RuntimeError):
-                pass
-
    def update_from_output(self):
        """
        Update this ArchiveResult from filesystem logs and output files.

-        Used for:
-        - Foreground hooks that completed (called from ArchiveResult.run())
-        - Background hooks that completed (called from Snapshot.cleanup())
+        Used for Snapshot cleanup / orphan recovery when a hook's output exists
+        on disk but the projector did not finalize the row in the database.

        Updates:
        - status, output_str, output_json from ArchiveResult JSONL record
        - output_files, output_size, output_mimetypes by walking filesystem
-        - end_ts, retry_at, cmd, cmd_version, binary FK
+        - end_ts, cmd, cmd_version, binary FK
        - Processes side-effect records (Snapshot, Tag, etc.) via process_hook_records()
        """
        import mimetypes
@@ -2924,7 +2804,6 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
            self.status = self.StatusChoices.FAILED
            self.output_str = 'Output directory not found'
            self.end_ts = timezone.now()
-            self.retry_at = None
            self.save()
            return

@@ -2948,6 +2827,7 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
                'succeeded': self.StatusChoices.SUCCEEDED,
                'failed': self.StatusChoices.FAILED,
                'skipped': self.StatusChoices.SKIPPED,
+                'noresults': self.StatusChoices.NORESULTS,
            }
            self.status = status_map.get(hook_data.get('status', 'failed'), self.StatusChoices.FAILED)

@@ -3011,7 +2891,6 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi

        # Update timestamps
        self.end_ts = timezone.now()
-        self.retry_at = None

        self.save()

@@ -3095,340 +2974,13 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi

        Uses proper config hierarchy: defaults -> file -> env -> machine -> user -> crawl -> snapshot
        """
-        import re
-        from archivebox.config.configset import get_config
-
-        # Get merged config with proper hierarchy
-        config = get_config(
-            user=self.created_by,
-            crawl=self.snapshot.crawl,
-            snapshot=self.snapshot,
-        )
-
-        # Get allowlist/denylist (can be string or list)
-        allowlist_raw = config.get('URL_ALLOWLIST', '')
-        denylist_raw = config.get('URL_DENYLIST', '')
-
-        # Normalize to list of patterns
-        def to_pattern_list(value):
-            if isinstance(value, list):
-                return value
-            if isinstance(value, str):
-                return [p.strip() for p in value.split(',') if p.strip()]
-            return []
-
-        allowlist = to_pattern_list(allowlist_raw)
-        denylist = to_pattern_list(denylist_raw)
-
-        # Denylist takes precedence
-        if denylist:
-            for pattern in denylist:
-                try:
-                    if re.search(pattern, url):
-                        return False
-                except re.error:
-                    continue  # Skip invalid regex patterns
-
-        # If allowlist exists, URL must match at least one pattern
-        if allowlist:
-            for pattern in allowlist:
-                try:
-                    if re.search(pattern, url):
-                        return True
-                except re.error:
-                    continue  # Skip invalid regex patterns
-            return False  # No allowlist patterns matched
-
-        return True  # No filters or passed filters
+        return self.snapshot.crawl.url_passes_filters(url, snapshot=self.snapshot)

    @property
    def output_dir(self) -> Path:
        """Get the output directory for this plugin's results."""
        return Path(self.snapshot.output_dir) / self.plugin

-    def is_background_hook(self) -> bool:
-        """Check if this ArchiveResult is for a background hook."""
-        plugin_dir = Path(self.pwd) if self.pwd else None
-        if not plugin_dir:
-            return False
-        pid_file = plugin_dir / 'hook.pid'
-        return pid_file.exists()
-
-
-# =============================================================================
-# ArchiveResult State Machine
-# =============================================================================
-
-class ArchiveResultMachine(BaseStateMachine):
-    """
-    State machine for managing ArchiveResult (single plugin execution) lifecycle.
-
-    Hook Lifecycle:
-    ┌─────────────────────────────────────────────────────────────┐
-    │ QUEUED State                                                │
-    │  • Waiting for its turn to run                              │
-    └─────────────────────────────────────────────────────────────┘
-                            ↓ tick() when can_start()
-    ┌─────────────────────────────────────────────────────────────┐
-    │ STARTED State → enter_started()                             │
-    │  1. archiveresult.run()                                     │
-    │     • Find specific hook by hook_name                       │
-    │     • run_hook(script, output_dir, ...) → subprocess        │
-    │                                                              │
-    │  2a. FOREGROUND hook (returns HookResult):                  │
-    │      • update_from_output() immediately                     │
-    │        - Read stdout.log                                    │
-    │        - Parse JSONL records                                │
-    │        - Extract 'ArchiveResult' record → update status     │
-    │        - Walk output_dir → populate output_files            │
-    │        - Call process_hook_records() for side effects       │
-    │                                                              │
-    │  2b. BACKGROUND hook (returns None):                        │
-    │      • Status stays STARTED                                 │
-    │      • Continues running in background                      │
-    │      • Killed by Snapshot.cleanup() when sealed             │
-    └─────────────────────────────────────────────────────────────┘
-                            ↓ tick() checks status
-    ┌─────────────────────────────────────────────────────────────┐
-    │ SUCCEEDED / FAILED / SKIPPED / BACKOFF                      │
-    │  • Set by hook's JSONL output during update_from_output()   │
-    │  • Health stats incremented (num_uses_succeeded/failed)     │
-    │  • Parent Snapshot health stats also updated                │
-    └─────────────────────────────────────────────────────────────┘
-
-    https://github.com/ArchiveBox/ArchiveBox/wiki/ArchiveBox-Architecture-Diagrams
-    """
-
-    model_attr_name = 'archiveresult'
-
-    # States
-    queued = State(value=ArchiveResult.StatusChoices.QUEUED, initial=True)
-    started = State(value=ArchiveResult.StatusChoices.STARTED)
-    backoff = State(value=ArchiveResult.StatusChoices.BACKOFF)
-    succeeded = State(value=ArchiveResult.StatusChoices.SUCCEEDED, final=True)
-    failed = State(value=ArchiveResult.StatusChoices.FAILED, final=True)
-    skipped = State(value=ArchiveResult.StatusChoices.SKIPPED, final=True)
-
-    # Tick Event - transitions based on conditions
-    # Flow: queued → started → (succeeded|failed|skipped)
-    #       queued → skipped (if exceeded max attempts)
-    #       started → backoff → started (retry)
-    tick = (
-        queued.to(skipped, cond='is_exceeded_max_attempts')  # Check skip first
-        | queued.to.itself(unless='can_start')
-        | queued.to(started, cond='can_start')
-        | started.to(succeeded, cond='is_succeeded')
-        | started.to(failed, cond='is_failed')
-        | started.to(skipped, cond='is_skipped')
-        | started.to(backoff, cond='is_backoff')
-        | backoff.to(skipped, cond='is_exceeded_max_attempts')  # Check skip from backoff too
-        | backoff.to.itself(unless='can_start')
-        | backoff.to(started, cond='can_start')
-        # Removed redundant transitions: backoff.to(succeeded/failed/skipped)
-        # Reason: backoff should always retry→started, then started→final states
-    )
-
-    archiveresult: ArchiveResult
-
-    def can_start(self) -> bool:
-        """Pure function - check if AR can start (has valid URL)."""
-        return bool(self.archiveresult.snapshot.url)
-
-    def is_exceeded_max_attempts(self) -> bool:
-        """Check if snapshot has exceeded MAX_URL_ATTEMPTS failed results."""
-        from archivebox.config.configset import get_config
-
-        config = get_config(
-            crawl=self.archiveresult.snapshot.crawl,
-            snapshot=self.archiveresult.snapshot,
-        )
-        max_attempts = config.get('MAX_URL_ATTEMPTS', 50)
-
-        # Count failed ArchiveResults for this snapshot (any plugin type)
-        failed_count = self.archiveresult.snapshot.archiveresult_set.filter(
-            status=ArchiveResult.StatusChoices.FAILED
-        ).count()
-
-        return failed_count >= max_attempts
-
-    def is_succeeded(self) -> bool:
-        """Check if extractor plugin succeeded (status was set by run())."""
-        return self.archiveresult.status == ArchiveResult.StatusChoices.SUCCEEDED
-
-    def is_failed(self) -> bool:
-        """Check if extractor plugin failed (status was set by run())."""
-        return self.archiveresult.status == ArchiveResult.StatusChoices.FAILED
-
-    def is_skipped(self) -> bool:
-        """Check if extractor plugin was skipped (status was set by run())."""
-        return self.archiveresult.status == ArchiveResult.StatusChoices.SKIPPED
-
-    def is_backoff(self) -> bool:
-        """Check if we should backoff and retry later."""
-        # Backoff if status is still started (plugin didn't complete) and output_str is empty
-        return (
-            self.archiveresult.status == ArchiveResult.StatusChoices.STARTED
-            and not self.archiveresult.output_str
-        )
-
-    def is_finished(self) -> bool:
-        """
-        Check if extraction has completed (success, failure, or skipped).
-
-        For background hooks in STARTED state, checks if their Process has finished and reaps them.
-        """
-        # If already in final state, return True
-        if self.archiveresult.status in (
-            ArchiveResult.StatusChoices.SUCCEEDED,
-            ArchiveResult.StatusChoices.FAILED,
-            ArchiveResult.StatusChoices.SKIPPED,
-        ):
-            return True
-
-        # If in STARTED state with a Process, check if Process has finished running
-        if self.archiveresult.status == ArchiveResult.StatusChoices.STARTED:
-            if self.archiveresult.process_id:
-                process = self.archiveresult.process
-
-                # If process is NOT running anymore, reap the background hook
-                if not process.is_running:
-                    self.archiveresult.update_from_output()
-                    # Check if now in final state after reaping
-                    return self.archiveresult.status in (
-                        ArchiveResult.StatusChoices.SUCCEEDED,
-                        ArchiveResult.StatusChoices.FAILED,
-                        ArchiveResult.StatusChoices.SKIPPED,
-                    )
-
-        return False
-
-    @queued.enter
-    def enter_queued(self):
-        self.archiveresult.update_and_requeue(
-            retry_at=timezone.now(),
-            status=ArchiveResult.StatusChoices.QUEUED,
-            start_ts=None,
-        )  # bump the snapshot's retry_at so they pickup any new changes
-
-    @started.enter
-    def enter_started(self):
-
-        # Update Process with network interface
-        if self.archiveresult.process_id:
-            self.archiveresult.process.iface = NetworkInterface.current()
-            self.archiveresult.process.save()
-
-        # Lock the object and mark start time
-        self.archiveresult.update_and_requeue(
-            retry_at=timezone.now() + timedelta(seconds=120),  # 2 min timeout for plugin
-            status=ArchiveResult.StatusChoices.STARTED,
-            start_ts=timezone.now(),
-        )
-
-        # Run the plugin - this updates status, output, timestamps, etc.
-        self.archiveresult.run()
-
-        # Save the updated result
-        self.archiveresult.save()
-
-
-    @backoff.enter
-    def enter_backoff(self):
-        self.archiveresult.update_and_requeue(
-            retry_at=timezone.now() + timedelta(seconds=60),
-            status=ArchiveResult.StatusChoices.BACKOFF,
-            end_ts=None,
-        )
-
-    def _check_and_seal_parent_snapshot(self):
-        """
-        Check if this is the last ArchiveResult to finish - if so, seal the parent Snapshot.
-
-        Note: In the new architecture, the shared runner handles step advancement and sealing.
-        This method is kept for direct model-driven edge cases.
-        """
-        import sys
-
-        snapshot = self.archiveresult.snapshot
-
-        # Check if all archiveresults are finished (in final states)
-        remaining_active = snapshot.archiveresult_set.exclude(
-            status__in=[
-                ArchiveResult.StatusChoices.SUCCEEDED,
-                ArchiveResult.StatusChoices.FAILED,
-                ArchiveResult.StatusChoices.SKIPPED,
-            ]
-        ).count()
-
-        if remaining_active == 0:
-            print(f'[cyan]    🔒 All archiveresults finished for snapshot {snapshot.url}, sealing snapshot[/cyan]', file=sys.stderr)
-            # Seal the parent snapshot
-            cast(Any, snapshot).sm.seal()
-
-    @succeeded.enter
-    def enter_succeeded(self):
-        import sys
-
-        self.archiveresult.update_and_requeue(
-            retry_at=None,
-            status=ArchiveResult.StatusChoices.SUCCEEDED,
-            end_ts=timezone.now(),
-        )
-
-        # Update health stats for ArchiveResult, Snapshot, and Crawl cascade
-        self.archiveresult.cascade_health_update(success=True)
-
-        print(f'[cyan]    ✅ ArchiveResult succeeded: {self.archiveresult.plugin} for {self.archiveresult.snapshot.url}[/cyan]', file=sys.stderr)
-
-        # Check if this is the last AR to finish - seal parent snapshot if so
-        self._check_and_seal_parent_snapshot()
-
-    @failed.enter
-    def enter_failed(self):
-        import sys
-
-        print(f'[red]    ❌ ArchiveResult.enter_failed() called for {self.archiveresult.plugin}[/red]', file=sys.stderr)
-
-        self.archiveresult.update_and_requeue(
-            retry_at=None,
-            status=ArchiveResult.StatusChoices.FAILED,
-            end_ts=timezone.now(),
-        )
-
-        # Update health stats for ArchiveResult, Snapshot, and Crawl cascade
-        self.archiveresult.cascade_health_update(success=False)
-
-        print(f'[red]    ❌ ArchiveResult failed: {self.archiveresult.plugin} for {self.archiveresult.snapshot.url}[/red]', file=sys.stderr)
-
-        # Check if this is the last AR to finish - seal parent snapshot if so
-        self._check_and_seal_parent_snapshot()
-
-    @skipped.enter
-    def enter_skipped(self):
-        import sys
-
-        # Set output_str if not already set (e.g., when skipped due to max attempts)
-        if not self.archiveresult.output_str and self.is_exceeded_max_attempts():
-            from archivebox.config.configset import get_config
-            config = get_config(
-                crawl=self.archiveresult.snapshot.crawl,
-                snapshot=self.archiveresult.snapshot,
-            )
-            max_attempts = config.get('MAX_URL_ATTEMPTS', 50)
-            self.archiveresult.output_str = f'Skipped: snapshot exceeded MAX_URL_ATTEMPTS ({max_attempts} failures)'
-
-        self.archiveresult.update_and_requeue(
-            retry_at=None,
-            status=ArchiveResult.StatusChoices.SKIPPED,
-            end_ts=timezone.now(),
-        )
-
-        print(f'[dim]    ⏭️  ArchiveResult skipped: {self.archiveresult.plugin} for {self.archiveresult.snapshot.url}[/dim]', file=sys.stderr)
-
-        # Check if this is the last AR to finish - seal parent snapshot if so
-        self._check_and_seal_parent_snapshot()
-
-
 # =============================================================================
 # State Machine Registration
 # =============================================================================
@@ -3436,4 +2988,3 @@ class ArchiveResultMachine(BaseStateMachine):
 # Manually register state machines with python-statemachine registry
 # (normally auto-discovered from statemachines.py, but we define them here for clarity)
 registry.register(SnapshotMachine)
-registry.register(ArchiveResultMachine)
--- a/archivebox/core/settings.py
+++ b/archivebox/core/settings.py
@@ -232,11 +232,12 @@ SQLITE_CONNECTION_OPTIONS = {
        # https://gcollazo.com/optimal-sqlite-settings-for-django/
        # https://litestream.io/tips/#busy-timeout
        # https://docs.djangoproject.com/en/5.1/ref/databases/#setting-pragma-options
-        "timeout": 10,
+        "timeout": 30,
        "check_same_thread": False,
        "transaction_mode": "IMMEDIATE",
        "init_command": (
            "PRAGMA foreign_keys=ON;"
+            "PRAGMA busy_timeout = 30000;"
            "PRAGMA journal_mode = WAL;"
            "PRAGMA synchronous = NORMAL;"
            "PRAGMA temp_store = MEMORY;"
--- a/archivebox/core/tag_utils.py
+++ b/archivebox/core/tag_utils.py
@@ -0,0 +1,271 @@
+from __future__ import annotations
+
+import json
+from collections import defaultdict
+from typing import Any
+
+from django.contrib.auth.models import User
+from django.db.models import Count, F, Q, QuerySet
+from django.db.models.functions import Lower
+from django.http import HttpRequest
+from django.urls import reverse
+
+from archivebox.core.host_utils import build_snapshot_url, build_web_url
+from archivebox.core.models import Snapshot, SnapshotTag, Tag
+
+
+TAG_SNAPSHOT_PREVIEW_LIMIT = 10
+TAG_SORT_CHOICES = (
+    ('name_asc', 'Name A-Z'),
+    ('name_desc', 'Name Z-A'),
+    ('created_desc', 'Created newest'),
+    ('created_asc', 'Created oldest'),
+    ('snapshots_desc', 'Most snapshots'),
+    ('snapshots_asc', 'Fewest snapshots'),
+)
+TAG_HAS_SNAPSHOTS_CHOICES = (
+    ('all', 'All'),
+    ('yes', 'Has snapshots'),
+    ('no', 'No snapshots'),
+)
+
+
+def normalize_tag_name(name: str) -> str:
+    return (name or '').strip()
+
+
+def normalize_tag_sort(sort: str = 'created_desc') -> str:
+    valid_sorts = {key for key, _label in TAG_SORT_CHOICES}
+    return sort if sort in valid_sorts else 'created_desc'
+
+
+def normalize_has_snapshots_filter(value: str = 'all') -> str:
+    valid_filters = {key for key, _label in TAG_HAS_SNAPSHOTS_CHOICES}
+    return value if value in valid_filters else 'all'
+
+
+def normalize_created_by_filter(created_by: str = '') -> str:
+    return created_by if str(created_by).isdigit() else ''
+
+
+def normalize_created_year_filter(year: str = '') -> str:
+    year = (year or '').strip()
+    return year if len(year) == 4 and year.isdigit() else ''
+
+
+def get_matching_tags(
+    query: str = '',
+    sort: str = 'created_desc',
+    created_by: str = '',
+    year: str = '',
+    has_snapshots: str = 'all',
+) -> QuerySet[Tag]:
+    queryset = Tag.objects.select_related('created_by').annotate(
+        num_snapshots=Count('snapshot_set', distinct=True),
+    )
+
+    query = normalize_tag_name(query)
+    if query:
+        queryset = queryset.filter(
+            Q(name__icontains=query) | Q(slug__icontains=query),
+        )
+
+    created_by = normalize_created_by_filter(created_by)
+    if created_by:
+        queryset = queryset.filter(created_by_id=int(created_by))
+
+    year = normalize_created_year_filter(year)
+    if year:
+        queryset = queryset.filter(created_at__year=int(year))
+
+    has_snapshots = normalize_has_snapshots_filter(has_snapshots)
+    if has_snapshots == 'yes':
+        queryset = queryset.filter(num_snapshots__gt=0)
+    elif has_snapshots == 'no':
+        queryset = queryset.filter(num_snapshots=0)
+
+    sort = normalize_tag_sort(sort)
+    if sort == 'name_asc':
+        queryset = queryset.order_by(Lower('name'), 'id')
+    elif sort == 'name_desc':
+        queryset = queryset.order_by(Lower('name').desc(), '-id')
+    elif sort == 'created_asc':
+        queryset = queryset.order_by(F('created_at').asc(nulls_first=True), 'id', Lower('name'))
+    elif sort == 'snapshots_desc':
+        queryset = queryset.order_by(F('num_snapshots').desc(nulls_last=True), F('created_at').desc(nulls_last=True), '-id', Lower('name'))
+    elif sort == 'snapshots_asc':
+        queryset = queryset.order_by(F('num_snapshots').asc(nulls_first=True), Lower('name'), 'id')
+    else:
+        queryset = queryset.order_by(F('created_at').desc(nulls_last=True), '-id', Lower('name'))
+
+    return queryset
+
+
+def get_tag_creator_choices() -> list[tuple[str, str]]:
+    rows = (
+        Tag.objects
+        .filter(created_by__isnull=False)
+        .values_list('created_by_id', 'created_by__username')
+        .order_by(Lower('created_by__username'), 'created_by_id')
+        .distinct()
+    )
+    return [(str(user_id), username or f'User {user_id}') for user_id, username in rows]
+
+
+def get_tag_year_choices() -> list[str]:
+    years = Tag.objects.exclude(created_at__isnull=True).dates('created_at', 'year', order='DESC')
+    return [str(year.year) for year in years]
+
+
+def get_tag_by_ref(tag_ref: str | int) -> Tag:
+    if isinstance(tag_ref, int):
+        return Tag.objects.get(pk=tag_ref)
+
+    ref = str(tag_ref).strip()
+    if ref.isdigit():
+        return Tag.objects.get(pk=int(ref))
+
+    try:
+        return Tag.objects.get(slug__iexact=ref)
+    except Tag.DoesNotExist:
+        return Tag.objects.get(slug__icontains=ref)
+
+
+def get_or_create_tag(name: str, created_by: User | None = None) -> tuple[Tag, bool]:
+    normalized_name = normalize_tag_name(name)
+    if not normalized_name:
+        raise ValueError('Tag name is required')
+
+    existing = Tag.objects.filter(name__iexact=normalized_name).first()
+    if existing:
+        return existing, False
+
+    tag = Tag.objects.create(
+        name=normalized_name,
+        created_by=created_by,
+    )
+    return tag, True
+
+
+def rename_tag(tag: Tag, name: str) -> Tag:
+    normalized_name = normalize_tag_name(name)
+    if not normalized_name:
+        raise ValueError('Tag name is required')
+
+    existing = Tag.objects.filter(name__iexact=normalized_name).exclude(pk=tag.pk).first()
+    if existing:
+        raise ValueError(f'Tag "{existing.name}" already exists')
+
+    if tag.name != normalized_name:
+        tag.name = normalized_name
+        tag.save()
+    return tag
+
+
+def delete_tag(tag: Tag) -> tuple[int, dict[str, int]]:
+    return tag.delete()
+
+
+def export_tag_urls(tag: Tag) -> str:
+    urls = tag.snapshot_set.order_by('-downloaded_at', '-created_at', '-pk').values_list('url', flat=True)
+    return '\n'.join(urls)
+
+
+def export_tag_snapshots_jsonl(tag: Tag) -> str:
+    snapshots = tag.snapshot_set.order_by('-downloaded_at', '-created_at', '-pk').prefetch_related('tags')
+    return '\n'.join(json.dumps(snapshot.to_json()) for snapshot in snapshots)
+
+
+def _display_snapshot_title(snapshot: Snapshot) -> str:
+    title = (snapshot.title or '').strip()
+    url = (snapshot.url or '').strip()
+    if not title:
+        return url
+
+    normalized_title = title.lower()
+    if normalized_title == 'pending...' or normalized_title == url.lower():
+        return url
+    return title
+
+
+def _build_snapshot_preview(snapshot: Snapshot, request: HttpRequest | None = None) -> dict[str, Any]:
+    return {
+        'id': str(snapshot.pk),
+        'title': _display_snapshot_title(snapshot),
+        'url': snapshot.url,
+        'favicon_url': build_snapshot_url(str(snapshot.pk), 'favicon.ico', request=request),
+        'admin_url': reverse('admin:core_snapshot_change', args=[snapshot.pk]),
+        'archive_url': build_web_url(f'/{snapshot.archive_path_from_db}/index.html', request=request),
+        'downloaded_at': snapshot.downloaded_at.isoformat() if snapshot.downloaded_at else None,
+    }
+
+
+def _build_snapshot_preview_map(tags: list[Tag], request: HttpRequest | None = None, preview_limit: int = TAG_SNAPSHOT_PREVIEW_LIMIT) -> dict[int, list[dict[str, Any]]]:
+    tag_ids = [tag.pk for tag in tags]
+    if not tag_ids:
+        return {}
+
+    snapshot_tags = (
+        SnapshotTag.objects
+        .filter(tag_id__in=tag_ids)
+        .select_related('snapshot__crawl__created_by')
+        .order_by(
+            'tag_id',
+            F('snapshot__downloaded_at').desc(nulls_last=True),
+            F('snapshot__created_at').desc(nulls_last=True),
+            F('snapshot_id').desc(),
+        )
+    )
+
+    preview_map: dict[int, list[dict[str, Any]]] = defaultdict(list)
+    for snapshot_tag in snapshot_tags:
+        previews = preview_map[snapshot_tag.tag_id]
+        if len(previews) >= preview_limit:
+            continue
+        previews.append(_build_snapshot_preview(snapshot_tag.snapshot, request=request))
+    return preview_map
+
+
+def build_tag_card(tag: Tag, snapshot_previews: list[dict[str, Any]] | None = None) -> dict[str, Any]:
+    count = getattr(tag, 'num_snapshots', tag.snapshot_set.count())
+    return {
+        'id': tag.pk,
+        'name': tag.name,
+        'slug': tag.slug,
+        'num_snapshots': count,
+        'filter_url': f"{reverse('admin:core_snapshot_changelist')}?tags__id__exact={tag.pk}",
+        'edit_url': reverse('admin:core_tag_change', args=[tag.pk]),
+        'export_urls_url': reverse('api-1:tag_urls_export', args=[tag.pk]),
+        'export_jsonl_url': reverse('api-1:tag_snapshots_export', args=[tag.pk]),
+        'rename_url': reverse('api-1:rename_tag', args=[tag.pk]),
+        'delete_url': reverse('api-1:delete_tag', args=[tag.pk]),
+        'snapshots': snapshot_previews or [],
+    }
+
+
+def build_tag_cards(
+    query: str = '',
+    request: HttpRequest | None = None,
+    limit: int | None = None,
+    preview_limit: int = TAG_SNAPSHOT_PREVIEW_LIMIT,
+    sort: str = 'created_desc',
+    created_by: str = '',
+    year: str = '',
+    has_snapshots: str = 'all',
+) -> list[dict[str, Any]]:
+    queryset = get_matching_tags(
+        query=query,
+        sort=sort,
+        created_by=created_by,
+        year=year,
+        has_snapshots=has_snapshots,
+    )
+    if limit is not None:
+        queryset = queryset[:limit]
+
+    tags = list(queryset)
+    preview_map = _build_snapshot_preview_map(tags, request=request, preview_limit=preview_limit)
+    return [
+        build_tag_card(tag, snapshot_previews=preview_map.get(tag.pk, []))
+        for tag in tags
+    ]
--- a/archivebox/core/templatetags/core_tags.py
+++ b/archivebox/core/templatetags/core_tags.py
@@ -11,6 +11,7 @@ from archivebox.hooks import (
 )
 from archivebox.core.host_utils import (
    get_admin_base_url,
+    get_public_base_url,
    get_web_base_url,
    get_snapshot_base_url,
    build_snapshot_url,
@@ -166,6 +167,11 @@ def web_base_url(context) -> str:
    return get_web_base_url(request=context.get('request'))


+@register.simple_tag(takes_context=True)
+def public_base_url(context) -> str:
+    return get_public_base_url(request=context.get('request'))
+
+
@register.simple_tag(takes_context=True)
 def snapshot_base_url(context, snapshot) -> str:
    snapshot_id = getattr(snapshot, 'id', snapshot)
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -1,5 +1,6 @@
 __package__ = 'archivebox.core'

+import json
 import os
 import posixpath
 from glob import glob, escape
@@ -7,7 +8,7 @@ from django.utils import timezone
 import inspect
 from typing import Callable, cast, get_type_hints
 from pathlib import Path
-from urllib.parse import urlparse
+from urllib.parse import quote, urlparse

 from django.shortcuts import render, redirect
 from django.http import JsonResponse, HttpRequest, HttpResponse, Http404, HttpResponseForbidden
@@ -26,7 +27,7 @@ from admin_data_views.typing import TableContext, ItemContext, SectionData
 from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink

 from archivebox.config import CONSTANTS, CONSTANTS_CONFIG, DATA_DIR, VERSION
-from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG
+from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG, SEARCH_BACKEND_CONFIG
 from archivebox.config.configset import get_flat_config, get_config, get_all_configs
 from archivebox.misc.util import base_url, htmlencode, ts_to_date_str, urldecode
 from archivebox.misc.serve_static import serve_static_with_byterange_support
@@ -37,7 +38,18 @@ from archivebox.core.models import Snapshot
 from archivebox.core.host_utils import build_snapshot_url
 from archivebox.core.forms import AddLinkForm
 from archivebox.crawls.models import Crawl
-from archivebox.hooks import get_enabled_plugins, get_plugin_name
+from archivebox.hooks import (
+    BUILTIN_PLUGINS_DIR,
+    USER_PLUGINS_DIR,
+    discover_plugin_configs,
+    get_enabled_plugins,
+    get_plugin_name,
+    iter_plugin_dirs,
+)
+
+
+ABX_PLUGINS_GITHUB_BASE_URL = 'https://github.com/ArchiveBox/abx-plugins/tree/main/abx_plugins/plugins/'
+LIVE_PLUGIN_BASE_URL = '/admin/environment/plugins/'


 def _files_index_target(snapshot: Snapshot, archivefile: str | None) -> str:
@@ -699,6 +711,9 @@ def _serve_responses_path(request, responses_root: Path, rel_path: str, show_ind
 def _serve_snapshot_replay(request: HttpRequest, snapshot: Snapshot, path: str = ""):
    rel_path = path or ""
    show_indexes = bool(request.GET.get("files"))
+    if not show_indexes and (not rel_path or rel_path == "index.html"):
+        return SnapshotView.render_live_index(request, snapshot)
+
    if not rel_path or rel_path.endswith("/"):
        if show_indexes:
            rel_path = rel_path.rstrip("/")
@@ -784,7 +799,6 @@ class SnapshotHostView(View):
            raise Http404
        return _serve_snapshot_replay(request, snapshot, path)

-
 class SnapshotReplayView(View):
    """Serve snapshot directory contents on a one-domain replay path."""

@@ -915,8 +929,17 @@ class AddView(UserPassesTestMixin, FormView):
        return custom_config

    def get_context_data(self, **kwargs):
-        from archivebox.core.models import Tag
-
+        required_search_plugin = f'search_backend_{SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE}'.strip()
+        plugin_configs = discover_plugin_configs()
+        plugin_dependency_map = {
+            plugin_name: [
+                str(required_plugin).strip()
+                for required_plugin in (schema.get('required_plugins') or [])
+                if str(required_plugin).strip()
+            ]
+            for plugin_name, schema in plugin_configs.items()
+            if isinstance(schema.get('required_plugins'), list) and schema.get('required_plugins')
+        }
        return {
            **super().get_context_data(**kwargs),
            'title': "Create Crawl",
@@ -924,8 +947,9 @@ class AddView(UserPassesTestMixin, FormView):
            'absolute_add_path': self.request.build_absolute_uri(self.request.path),
            'VERSION': VERSION,
            'FOOTER_INFO': SERVER_CONFIG.FOOTER_INFO,
+            'required_search_plugin': required_search_plugin,
+            'plugin_dependency_map_json': json.dumps(plugin_dependency_map, sort_keys=True),
            'stdout': '',
-            'available_tags': list(Tag.objects.all().order_by('name').values_list('name', flat=True)),
        }

    def _create_crawl_from_form(self, form, *, created_by_id=None) -> Crawl:
@@ -937,11 +961,10 @@ class AddView(UserPassesTestMixin, FormView):
        depth = int(form.cleaned_data["depth"])
        plugins = ','.join(form.cleaned_data.get("plugins", []))
        schedule = form.cleaned_data.get("schedule", "").strip()
-        persona = form.cleaned_data.get("persona", "Default")
-        overwrite = form.cleaned_data.get("overwrite", False)
-        update = form.cleaned_data.get("update", False)
+        persona = form.cleaned_data.get("persona")
        index_only = form.cleaned_data.get("index_only", False)
        notes = form.cleaned_data.get("notes", "")
+        url_filters = form.cleaned_data.get("url_filters") or {}
        custom_config = self._get_custom_config_overrides(form)

        from archivebox.config.permissions import HOSTNAME
@@ -957,6 +980,7 @@ class AddView(UserPassesTestMixin, FormView):

        # 1. save the provided urls to sources/2024-11-05__23-59-59__web_ui_add_by_user_<user_pk>.txt
        sources_file = CONSTANTS.SOURCES_DIR / f'{timezone.now().strftime("%Y-%m-%d__%H-%M-%S")}__web_ui_add_by_user_{created_by_id}.txt'
+        sources_file.parent.mkdir(parents=True, exist_ok=True)
        sources_file.write_text(urls if isinstance(urls, str) else '\n'.join(urls))

        # 2. create a new Crawl with the URLs from the file
@@ -964,16 +988,18 @@ class AddView(UserPassesTestMixin, FormView):
        urls_content = sources_file.read_text()
        # Build complete config
        config = {
-            'ONLY_NEW': not update,
            'INDEX_ONLY': index_only,
-            'OVERWRITE': overwrite,
            'DEPTH': depth,
            'PLUGINS': plugins or '',
-            'DEFAULT_PERSONA': persona or 'Default',
+            'DEFAULT_PERSONA': (persona.name if persona else 'Default'),
        }

        # Merge custom config overrides
        config.update(custom_config)
+        if url_filters.get('allowlist'):
+            config['URL_ALLOWLIST'] = url_filters['allowlist']
+        if url_filters.get('denylist'):
+            config['URL_DENYLIST'] = url_filters['denylist']

        crawl = Crawl.objects.create(
            urls=urls_content,
@@ -999,6 +1025,8 @@ class AddView(UserPassesTestMixin, FormView):
            crawl.schedule = crawl_schedule
            crawl.save(update_fields=['schedule'])

+        crawl.create_snapshots_from_urls()
+
        # 4. start the Orchestrator & wait until it completes
        #    ... orchestrator will create the root Snapshot, which creates pending ArchiveResults, which gets run by the ArchiveResultActors ...
        # from archivebox.crawls.actors import CrawlActor
@@ -1011,7 +1039,7 @@ class AddView(UserPassesTestMixin, FormView):

        urls = form.cleaned_data["url"]
        schedule = form.cleaned_data.get("schedule", "").strip()
-        rough_url_count = urls.count('://')
+        rough_url_count = len([url for url in urls.splitlines() if url.strip()])

        # Build success message with schedule link if created
        schedule_msg = ""
@@ -1080,10 +1108,6 @@ class WebAddView(AddView):
            'persona': defaults_form.fields['persona'].initial or 'Default',
            'config': {},
        }
-        if defaults_form.fields['update'].initial:
-            form_data['update'] = 'on'
-        if defaults_form.fields['overwrite'].initial:
-            form_data['overwrite'] = 'on'
        if defaults_form.fields['index_only'].initial:
            form_data['index_only'] = 'on'

@@ -1118,6 +1142,41 @@ def live_progress_view(request):
        from archivebox.core.models import Snapshot, ArchiveResult
        from archivebox.machine.models import Process, Machine

+        def hook_details(hook_name: str, plugin: str = "setup") -> tuple[str, str, str, str]:
+            normalized_hook_name = Path(hook_name).name if hook_name else ""
+            if not normalized_hook_name:
+                return (plugin, plugin, "unknown", "")
+
+            phase = "unknown"
+            if normalized_hook_name.startswith("on_Crawl__"):
+                phase = "crawl"
+            elif normalized_hook_name.startswith("on_Snapshot__"):
+                phase = "snapshot"
+            elif normalized_hook_name.startswith("on_Binary__"):
+                phase = "binary"
+
+            label = normalized_hook_name
+            if "__" in normalized_hook_name:
+                label = normalized_hook_name.split("__", 1)[1]
+            label = label.rsplit(".", 1)[0]
+            if len(label) > 3 and label[:2].isdigit() and label[2] == "_":
+                label = label[3:]
+            label = label.replace("_", " ").strip() or plugin
+
+            return (plugin, label, phase, normalized_hook_name)
+
+        def process_label(cmd: list[str] | None) -> tuple[str, str, str, str]:
+            hook_path = ""
+            if isinstance(cmd, list) and cmd:
+                first = cmd[0]
+                if isinstance(first, str):
+                    hook_path = first
+
+            if not hook_path:
+                return ("", "setup", "unknown", "")
+
+            return hook_details(Path(hook_path).name, plugin=Path(hook_path).parent.name or "setup")
+
        machine = Machine.current()
        orchestrator_proc = Process.objects.filter(
            machine=machine,
@@ -1188,8 +1247,19 @@ def live_progress_view(request):
                Process.TypeChoices.BINARY,
            ],
        )
+        recent_processes = Process.objects.filter(
+            machine=machine,
+            process_type__in=[
+                Process.TypeChoices.HOOK,
+                Process.TypeChoices.BINARY,
+            ],
+            modified_at__gte=timezone.now() - timedelta(minutes=10),
+        ).order_by("-modified_at")
        crawl_process_pids: dict[str, int] = {}
        snapshot_process_pids: dict[str, int] = {}
+        process_records_by_crawl: dict[str, list[dict[str, object]]] = {}
+        process_records_by_snapshot: dict[str, list[dict[str, object]]] = {}
+        seen_process_records: set[str] = set()
        for proc in running_processes:
            env = proc.env or {}
            if not isinstance(env, dict):
@@ -1197,11 +1267,48 @@ def live_progress_view(request):

            crawl_id = env.get('CRAWL_ID')
            snapshot_id = env.get('SNAPSHOT_ID')
+            _plugin, _label, phase, _hook_name = process_label(proc.cmd)
            if crawl_id and proc.pid:
                crawl_process_pids.setdefault(str(crawl_id), proc.pid)
-            if snapshot_id and proc.pid:
+            if phase == "snapshot" and snapshot_id and proc.pid:
                snapshot_process_pids.setdefault(str(snapshot_id), proc.pid)

+        for proc in recent_processes:
+            env = proc.env or {}
+            if not isinstance(env, dict):
+                env = {}
+
+            crawl_id = env.get("CRAWL_ID")
+            snapshot_id = env.get("SNAPSHOT_ID")
+            if not crawl_id and not snapshot_id:
+                continue
+
+            plugin, label, phase, hook_name = process_label(proc.cmd)
+
+            record_scope = str(snapshot_id) if phase == "snapshot" and snapshot_id else str(crawl_id)
+            proc_key = f"{record_scope}:{plugin}:{label}:{proc.status}:{proc.exit_code}"
+            if proc_key in seen_process_records:
+                continue
+            seen_process_records.add(proc_key)
+
+            status = "started" if proc.status == Process.StatusChoices.RUNNING else ("failed" if proc.exit_code not in (None, 0) else "succeeded")
+            payload: dict[str, object] = {
+                "id": str(proc.id),
+                "plugin": plugin,
+                "label": label,
+                "hook_name": hook_name,
+                "status": status,
+                "phase": phase,
+                "source": "process",
+                "process_id": str(proc.id),
+            }
+            if status == "started" and proc.pid:
+                payload["pid"] = proc.pid
+            if phase == "snapshot" and snapshot_id:
+                process_records_by_snapshot.setdefault(str(snapshot_id), []).append(payload)
+            elif crawl_id:
+                process_records_by_crawl.setdefault(str(crawl_id), []).append(payload)
+
        active_crawls_qs = Crawl.objects.filter(
            status__in=[Crawl.StatusChoices.QUEUED, Crawl.StatusChoices.STARTED]
        ).prefetch_related(
@@ -1234,6 +1341,11 @@ def live_progress_view(request):

            # Calculate crawl progress
            crawl_progress = int((completed_snapshots / total_snapshots) * 100) if total_snapshots > 0 else 0
+            crawl_setup_plugins = list(process_records_by_crawl.get(str(crawl.id), []))
+            crawl_setup_total = len(crawl_setup_plugins)
+            crawl_setup_completed = sum(1 for item in crawl_setup_plugins if item.get("status") == "succeeded")
+            crawl_setup_failed = sum(1 for item in crawl_setup_plugins if item.get("status") == "failed")
+            crawl_setup_pending = sum(1 for item in crawl_setup_plugins if item.get("status") == "queued")

            # Get active snapshots for this crawl (already prefetched)
            active_snapshots_for_crawl = []
@@ -1241,28 +1353,21 @@ def live_progress_view(request):
                # Get archive results for this snapshot (already prefetched)
                snapshot_results = snapshot.archiveresult_set.all()

-                # Count in memory instead of DB queries
-                total_plugins = len(snapshot_results)
-                completed_plugins = sum(1 for ar in snapshot_results if ar.status == ArchiveResult.StatusChoices.SUCCEEDED)
-                failed_plugins = sum(1 for ar in snapshot_results if ar.status == ArchiveResult.StatusChoices.FAILED)
-                pending_plugins = sum(1 for ar in snapshot_results if ar.status == ArchiveResult.StatusChoices.QUEUED)
-
-                # Calculate snapshot progress using per-plugin progress
                now = timezone.now()
                plugin_progress_values: list[int] = []
+                all_plugins: list[dict[str, object]] = []
+                seen_plugin_keys: set[str] = set()

-                # Get all extractor plugins for this snapshot (already prefetched, sort in Python)
-                # Order: started first, then queued, then completed
                def plugin_sort_key(ar):
                    status_order = {
                        ArchiveResult.StatusChoices.STARTED: 0,
                        ArchiveResult.StatusChoices.QUEUED: 1,
                        ArchiveResult.StatusChoices.SUCCEEDED: 2,
-                        ArchiveResult.StatusChoices.FAILED: 3,
+                        ArchiveResult.StatusChoices.NORESULTS: 3,
+                        ArchiveResult.StatusChoices.FAILED: 4,
                    }
-                    return (status_order.get(ar.status, 4), ar.plugin)
+                    return (status_order.get(ar.status, 5), ar.plugin, ar.hook_name or "")

-                all_plugins = []
                for ar in sorted(snapshot_results, key=plugin_sort_key):
                    status = ar.status
                    progress_value = 0
@@ -1270,6 +1375,7 @@ def live_progress_view(request):
                        ArchiveResult.StatusChoices.SUCCEEDED,
                        ArchiveResult.StatusChoices.FAILED,
                        ArchiveResult.StatusChoices.SKIPPED,
+                        ArchiveResult.StatusChoices.NORESULTS,
                    ):
                        progress_value = 100
                    elif status == ArchiveResult.StatusChoices.STARTED:
@@ -1284,20 +1390,49 @@ def live_progress_view(request):
                        progress_value = 0

                    plugin_progress_values.append(progress_value)
+                    plugin, label, phase, hook_name = hook_details(ar.hook_name or ar.plugin, plugin=ar.plugin)

                    plugin_payload = {
                        'id': str(ar.id),
                        'plugin': ar.plugin,
+                        'label': label,
+                        'hook_name': hook_name,
+                        'phase': phase,
                        'status': status,
+                        'process_id': str(ar.process_id) if ar.process_id else None,
                    }
                    if status == ArchiveResult.StatusChoices.STARTED and ar.process_id and ar.process:
                        plugin_payload['pid'] = ar.process.pid
                    if status == ArchiveResult.StatusChoices.STARTED:
                        plugin_payload['progress'] = progress_value
                        plugin_payload['timeout'] = ar.timeout or 120
+                    plugin_payload['source'] = 'archiveresult'
                    all_plugins.append(plugin_payload)
+                    seen_plugin_keys.add(
+                        str(ar.process_id) if ar.process_id else f"{ar.plugin}:{hook_name}"
+                    )

-                snapshot_progress = int(sum(plugin_progress_values) / total_plugins) if total_plugins > 0 else 0
+                for proc_payload in process_records_by_snapshot.get(str(snapshot.id), []):
+                    proc_key = str(proc_payload.get("process_id") or f"{proc_payload.get('plugin')}:{proc_payload.get('hook_name')}")
+                    if proc_key in seen_plugin_keys:
+                        continue
+                    seen_plugin_keys.add(proc_key)
+                    all_plugins.append(proc_payload)
+
+                    proc_status = proc_payload.get("status")
+                    if proc_status in ("succeeded", "failed", "skipped"):
+                        plugin_progress_values.append(100)
+                    elif proc_status == "started":
+                        plugin_progress_values.append(1)
+                    else:
+                        plugin_progress_values.append(0)
+
+                total_plugins = len(all_plugins)
+                completed_plugins = sum(1 for item in all_plugins if item.get("status") == "succeeded")
+                failed_plugins = sum(1 for item in all_plugins if item.get("status") == "failed")
+                pending_plugins = sum(1 for item in all_plugins if item.get("status") == "queued")
+
+                snapshot_progress = int(sum(plugin_progress_values) / len(plugin_progress_values)) if plugin_progress_values else 0

                active_snapshots_for_crawl.append({
                    'id': str(snapshot.id),
@@ -1334,6 +1469,11 @@ def live_progress_view(request):
                'started_snapshots': started_snapshots,
                'failed_snapshots': 0,
                'pending_snapshots': pending_snapshots,
+                'setup_plugins': crawl_setup_plugins,
+                'setup_total_plugins': crawl_setup_total,
+                'setup_completed_plugins': crawl_setup_completed,
+                'setup_failed_plugins': crawl_setup_failed,
+                'setup_pending_plugins': crawl_setup_pending,
                'active_snapshots': active_snapshots_for_crawl,
                'can_start': can_start,
                'urls_preview': urls_preview,
@@ -1461,7 +1601,11 @@ def find_config_source(key: str, merged_config: dict) -> str:
    """Determine where a config value comes from."""
    from archivebox.machine.models import Machine

-    # Check if it's from archivebox.machine.config
+    # Environment variables override all persistent config sources.
+    if key in os.environ:
+        return 'Environment'
+
+    # Machine.config overrides ArchiveBox.conf.
    try:
        machine = Machine.current()
        if machine.config and key in machine.config:
@@ -1469,10 +1613,6 @@ def find_config_source(key: str, merged_config: dict) -> str:
    except Exception:
        pass

-    # Check if it's from environment variable
-    if key in os.environ:
-        return 'Environment'
-
    # Check if it's from archivebox.config.file
    from archivebox.config.configset import BaseConfigSet
    file_config = BaseConfigSet.load_from_file(CONSTANTS.CONFIG_FILE)
@@ -1483,6 +1623,43 @@ def find_config_source(key: str, merged_config: dict) -> str:
    return 'Default'


+def find_plugin_for_config_key(key: str) -> str | None:
+    for plugin_name, schema in discover_plugin_configs().items():
+        if key in (schema.get('properties') or {}):
+            return plugin_name
+    return None
+
+
+def get_config_definition_link(key: str) -> tuple[str, str]:
+    plugin_name = find_plugin_for_config_key(key)
+    if not plugin_name:
+        return (
+            f'https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig+{quote(key)}&type=code',
+            'archivebox/config',
+        )
+
+    plugin_dir = next((path.resolve() for path in iter_plugin_dirs() if path.name == plugin_name), None)
+    if plugin_dir:
+        builtin_root = BUILTIN_PLUGINS_DIR.resolve()
+        if plugin_dir.is_relative_to(builtin_root):
+            return (
+                f'{ABX_PLUGINS_GITHUB_BASE_URL}{quote(plugin_name)}/config.json',
+                f'abx_plugins/plugins/{plugin_name}/config.json',
+            )
+
+        user_root = USER_PLUGINS_DIR.resolve()
+        if plugin_dir.is_relative_to(user_root):
+            return (
+                f'{LIVE_PLUGIN_BASE_URL}user.{quote(plugin_name)}/',
+                f'data/custom_plugins/{plugin_name}/config.json',
+            )
+
+    return (
+        f'{LIVE_PLUGIN_BASE_URL}builtin.{quote(plugin_name)}/',
+        f'abx_plugins/plugins/{plugin_name}/config.json',
+    )
+
+
@render_with_table_view
 def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
    CONFIGS = get_all_configs()
@@ -1566,17 +1743,6 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
    # Determine all sources for this config value
    sources_info = []

-    # Default value
-    default_val = find_config_default(key)
-    if default_val:
-        sources_info.append(('Default', default_val, 'gray'))
-
-    # Config file value
-    if CONSTANTS.CONFIG_FILE.exists():
-        file_config = BaseConfigSet.load_from_file(CONSTANTS.CONFIG_FILE)
-        if key in file_config:
-            sources_info.append(('Config File', file_config[key], 'green'))
-
    # Environment variable
    if key in os.environ:
        sources_info.append(('Environment', os.environ[key] if key_is_safe(key) else '********', 'blue'))
@@ -1592,6 +1758,17 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
    except Exception:
        pass

+    # Config file value
+    if CONSTANTS.CONFIG_FILE.exists():
+        file_config = BaseConfigSet.load_from_file(CONSTANTS.CONFIG_FILE)
+        if key in file_config:
+            sources_info.append(('Config File', file_config[key], 'green'))
+
+    # Default value
+    default_val = find_config_default(key)
+    if default_val:
+        sources_info.append(('Default', default_val, 'gray'))
+
    # Final computed value
    final_value = merged_config.get(key, FLAT_CONFIG.get(key, CONFIGS.get(key, None)))
    if not key_is_safe(key):
@@ -1614,6 +1791,8 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
        section_header = mark_safe(f'[DYNAMIC CONFIG]   &nbsp; <b><code style="color: lightgray">{key}</code></b> &nbsp; <small>(read-only, calculated at runtime)</small>')


+    definition_url, definition_label = get_config_definition_link(key)
+
    section_data = cast(SectionData, {
        "name": section_header,
        "description": None,
@@ -1621,7 +1800,7 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
            'Key': key,
            'Type': find_config_type(key),
            'Value': final_value,
-            'Source': find_config_source(key, merged_config),
+            'Currently read from': find_config_source(key, merged_config),
        },
        "help_texts": {
            'Key': mark_safe(f'''
@@ -1631,14 +1810,14 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
                </span>
            '''),
            'Type': mark_safe(f'''
-                <a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig+{key}&type=code">
-                    See full definition in <code>archivebox/config</code>...
+                <a href="{definition_url}" target="_blank" rel="noopener noreferrer">
+                    See full definition in <code>{definition_label}</code>...
                </a>
            '''),
            'Value': mark_safe(f'''
                {'<b style="color: red">Value is redacted for your security. (Passwords, secrets, API tokens, etc. cannot be viewed in the Web UI)</b><br/><br/>' if not key_is_safe(key) else ''}
                <br/><hr/><br/>
-                <b>Configuration Sources (in priority order):</b><br/><br/>
+                <b>Configuration Sources (highest priority first):</b><br/><br/>
                {sources_html}
                <br/><br/>
                <p style="display: {"block" if key in FLAT_CONFIG and key not in CONSTANTS_CONFIG else "none"}">
@@ -1651,15 +1830,15 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
                    }"</code>
                </p>
            '''),
-            'Source': mark_safe(f'''
+            'Currently read from': mark_safe(f'''
                The value shown in the "Value" field comes from the <b>{find_config_source(key, merged_config)}</b> source.
                <br/><br/>
                Priority order (highest to lowest):
                <ol>
+                    <li><b style="color: blue">Environment</b> - Environment variables</li>
                    <li><b style="color: purple">Machine</b> - Machine-specific overrides (e.g., resolved binary paths)
                        {f'<br/><a href="{machine_admin_url}">→ Edit <code>{key}</code> in Machine.config for this server</a>' if machine_admin_url else ''}
                    </li>
-                    <li><b style="color: blue">Environment</b> - Environment variables</li>
                    <li><b style="color: green">Config File</b> - data/ArchiveBox.conf</li>
                    <li><b style="color: gray">Default</b> - Default value from code</li>
                </ol>
--- a/archivebox/core/widgets.py
+++ b/archivebox/core/widgets.py
@@ -131,7 +131,46 @@ class TagEditorWidget(forms.Widget):
            }};

            window.updateHiddenInput_{widget_id} = function() {{
-                document.getElementById('{widget_id}').value = currentTags_{widget_id}.join(',');
+                var hiddenInput = document.getElementById('{widget_id}');
+                if (!hiddenInput) {{
+                    return;
+                }}
+                hiddenInput.value = currentTags_{widget_id}.join(',');
+                hiddenInput.dispatchEvent(new Event('input', {{ bubbles: true }}));
+                hiddenInput.dispatchEvent(new Event('change', {{ bubbles: true }}));
+            }};
+
+            function normalizeTags_{widget_id}(value) {{
+                var rawTags = Array.isArray(value) ? value : String(value || '').split(',');
+                var seen = {{}};
+                return rawTags
+                    .map(function(tag) {{ return String(tag || '').trim(); }})
+                    .filter(function(tag) {{
+                        if (!tag) return false;
+                        var normalized = tag.toLowerCase();
+                        if (seen[normalized]) return false;
+                        seen[normalized] = true;
+                        return true;
+                    }})
+                    .sort(function(a, b) {{
+                        return a.toLowerCase().localeCompare(b.toLowerCase());
+                    }});
+            }}
+
+            window.setTags_{widget_id} = function(value, options) {{
+                currentTags_{widget_id} = normalizeTags_{widget_id}(value);
+                rebuildPills_{widget_id}();
+                if (!(options && options.skipHiddenUpdate)) {{
+                    updateHiddenInput_{widget_id}();
+                }}
+            }};
+
+            window.syncTagEditorFromHidden_{widget_id} = function() {{
+                var hiddenInput = document.getElementById('{widget_id}');
+                if (!hiddenInput) {{
+                    return;
+                }}
+                setTags_{widget_id}(hiddenInput.value, {{ skipHiddenUpdate: true }});
            }};

            function computeTagStyle_{widget_id}(tagName) {{
@@ -190,9 +229,7 @@ class TagEditorWidget(forms.Widget):

                // Add to current tags
                currentTags_{widget_id}.push(tagName);
-                currentTags_{widget_id}.sort(function(a, b) {{
-                    return a.toLowerCase().localeCompare(b.toLowerCase());
-                }});
+                currentTags_{widget_id} = normalizeTags_{widget_id}(currentTags_{widget_id});

                // Rebuild pills
                rebuildPills_{widget_id}();
@@ -252,6 +289,14 @@ class TagEditorWidget(forms.Widget):
                }}
            }});

+            document.getElementById('{widget_id}').addEventListener('change', function() {{
+                syncTagEditorFromHidden_{widget_id}();
+            }});
+
+            document.getElementById('{widget_id}').addEventListener('archivebox:sync-tags', function() {{
+                syncTagEditorFromHidden_{widget_id}();
+            }});
+
            window.handleTagKeydown_{widget_id} = function(event) {{
                var input = event.target;
                var value = input.value.trim();
@@ -320,6 +365,8 @@ class TagEditorWidget(forms.Widget):
                var input = document.querySelector('input[name="csrfmiddlewaretoken"]');
                return input ? input.value : '';
            }}
+
+            syncTagEditorFromHidden_{widget_id}();
        }})();
        </script>
        '''
@@ -327,15 +374,232 @@ class TagEditorWidget(forms.Widget):
        return mark_safe(html)


+class URLFiltersWidget(forms.Widget):
+    """Render URL allowlist / denylist controls with same-domain autofill."""
+
+    template_name = ""
+
+    def __init__(self, attrs=None, *, source_selector='textarea[name="url"]'):
+        self.source_selector = source_selector
+        super().__init__(attrs)
+
+    def render(self, name, value, attrs=None, renderer=None):
+        value = value if isinstance(value, dict) else {}
+        widget_id_raw = attrs.get('id', name) if attrs else name
+        widget_id = re.sub(r'[^A-Za-z0-9_]', '_', str(widget_id_raw)) or name
+        allowlist = escape(value.get('allowlist', '') or '')
+        denylist = escape(value.get('denylist', '') or '')
+
+        return mark_safe(f'''
+        <div id="{widget_id}_container" class="url-filters-widget">
+            <input type="hidden" name="{name}" value="">
+            <div class="url-filters-grid">
+                <div class="url-filters-column">
+                    <div class="url-filter-label-row">
+                        <label for="{widget_id}_allowlist" class="url-filter-label"><span class="url-filter-label-main">🟢 URL_ALLOWLIST</span></label>
+                        <span class="url-filter-label-note">Regex patterns or domains to exclude, one pattern per line.</span>
+                    </div>
+                    <textarea id="{widget_id}_allowlist"
+                              name="{name}_allowlist"
+                              rows="2"
+                              placeholder="^https?://([^/]+\\.)?(example\\.com|example\\.org)([:/]|$)">{allowlist}</textarea>
+                </div>
+                <div class="url-filters-column">
+                    <div class="url-filter-label-row">
+                        <label for="{widget_id}_denylist" class="url-filter-label"><span class="url-filter-label-main">⛔ URL_DENYLIST</span></label>
+                        <span class="url-filter-label-note">Regex patterns or domains to exclude, one pattern per line.</span>
+                    </div>
+                    <textarea id="{widget_id}_denylist"
+                              name="{name}_denylist"
+                              rows="2"
+                              placeholder="^https?://([^/]+\\.)?(cdn\\.example\\.com|analytics\\.example\\.org)([:/]|$)">{denylist}</textarea>
+                </div>
+            </div>
+            <label class="url-filters-toggle" for="{widget_id}_same_domain_only">
+                <input type="checkbox" id="{widget_id}_same_domain_only" name="{name}_same_domain_only" value="1">
+                <span>Same domain only</span>
+            </label>
+            <div class="help-text">These values can be one regex pattern or domain per line. URL_DENYLIST takes precedence over URL_ALLOWLIST.</div>
+            <script>
+            (function() {{
+                var allowlistField = document.getElementById('{widget_id}_allowlist');
+                var denylistField = document.getElementById('{widget_id}_denylist');
+                var sameDomainOnly = document.getElementById('{widget_id}_same_domain_only');
+                var sourceField = document.querySelector({json.dumps(self.source_selector)});
+                var lastAutoGeneratedAllowlist = '';
+                if (!allowlistField || !sameDomainOnly || !sourceField) {{
+                    return;
+                }}
+
+                function extractUrl(line) {{
+                    var trimmed = String(line || '').trim();
+                    if (!trimmed || trimmed.charAt(0) === '#') {{
+                        return '';
+                    }}
+                    if (trimmed.charAt(0) === '{{') {{
+                        try {{
+                            var record = JSON.parse(trimmed);
+                            return String(record.url || '').trim();
+                        }} catch (error) {{
+                            return '';
+                        }}
+                    }}
+                    return trimmed;
+                }}
+
+                function escapeRegex(text) {{
+                    return String(text || '').replace(/[.*+?^${{}}()|[\\]\\\\]/g, '\\\\$&');
+                }}
+
+                function buildHostRegex(domains) {{
+                    if (!domains.length) {{
+                        return '';
+                    }}
+                    return '^https?://(' + domains.map(escapeRegex).join('|') + ')([:/]|$)';
+                }}
+
+                function getConfigEditorRows() {{
+                    return document.getElementById('id_config_rows');
+                }}
+
+                function getConfigUpdater() {{
+                    return window.updateHiddenField_id_config || null;
+                }}
+
+                function findConfigRow(key) {{
+                    var rows = getConfigEditorRows();
+                    if (!rows) {{
+                        return null;
+                    }}
+                    var matches = Array.prototype.filter.call(rows.querySelectorAll('.key-value-row'), function(row) {{
+                        var keyInput = row.querySelector('.kv-key');
+                        return keyInput && keyInput.value.trim() === key;
+                    }});
+                    return matches.length ? matches[0] : null;
+                }}
+
+                function addConfigRow() {{
+                    if (typeof window.addKeyValueRow_id_config === 'function') {{
+                        window.addKeyValueRow_id_config();
+                        var rows = getConfigEditorRows();
+                        return rows ? rows.lastElementChild : null;
+                    }}
+                    return null;
+                }}
+
+                function setConfigRow(key, value) {{
+                    var rows = getConfigEditorRows();
+                    var updater = getConfigUpdater();
+                    if (!rows || !updater) {{
+                        return;
+                    }}
+
+                    var row = findConfigRow(key);
+                    if (!value) {{
+                        if (row) {{
+                            row.remove();
+                            updater();
+                        }}
+                        return;
+                    }}
+
+                    if (!row) {{
+                        row = addConfigRow();
+                    }}
+                    if (!row) {{
+                        return;
+                    }}
+
+                    var keyInput = row.querySelector('.kv-key');
+                    var valueInput = row.querySelector('.kv-value');
+                    if (!keyInput || !valueInput) {{
+                        return;
+                    }}
+
+                    keyInput.value = key;
+                    valueInput.value = value;
+                    keyInput.dispatchEvent(new Event('input', {{ bubbles: true }}));
+                    valueInput.dispatchEvent(new Event('input', {{ bubbles: true }}));
+                    updater();
+                }}
+
+                function syncConfigEditor() {{
+                    setConfigRow('URL_ALLOWLIST', allowlistField.value.trim());
+                    setConfigRow('URL_DENYLIST', denylistField ? denylistField.value.trim() : '');
+                }}
+
+                function syncAllowlistFromUrls() {{
+                    if (!sameDomainOnly.checked) {{
+                        if (allowlistField.value.trim() === lastAutoGeneratedAllowlist) {{
+                            allowlistField.value = '';
+                            syncConfigEditor();
+                        }}
+                        lastAutoGeneratedAllowlist = '';
+                        return;
+                    }}
+
+                    var seen = Object.create(null);
+                    var domains = [];
+                    sourceField.value.split(/\\n+/).forEach(function(line) {{
+                        var url = extractUrl(line);
+                        if (!url) {{
+                            return;
+                        }}
+                        try {{
+                            var parsed = new URL(url);
+                            var domain = String(parsed.hostname || '').toLowerCase();
+                            if (!domain || seen[domain]) {{
+                                return;
+                            }}
+                            seen[domain] = true;
+                            domains.push(domain);
+                        }} catch (error) {{
+                            return;
+                        }}
+                    }});
+                    lastAutoGeneratedAllowlist = buildHostRegex(domains);
+                    allowlistField.value = lastAutoGeneratedAllowlist;
+                    syncConfigEditor();
+                }}
+
+                sameDomainOnly.addEventListener('change', syncAllowlistFromUrls);
+                sourceField.addEventListener('input', syncAllowlistFromUrls);
+                sourceField.addEventListener('change', syncAllowlistFromUrls);
+                allowlistField.addEventListener('input', syncConfigEditor);
+                allowlistField.addEventListener('change', syncConfigEditor);
+                if (denylistField) {{
+                    denylistField.addEventListener('input', syncConfigEditor);
+                    denylistField.addEventListener('change', syncConfigEditor);
+                }}
+
+                if (document.readyState === 'loading') {{
+                    document.addEventListener('DOMContentLoaded', syncConfigEditor, {{ once: true }});
+                }} else {{
+                    syncConfigEditor();
+                }}
+            }})();
+            </script>
+        </div>
+        ''')
+
+    def value_from_datadict(self, data, files, name):
+        return {
+            'allowlist': data.get(f'{name}_allowlist', ''),
+            'denylist': data.get(f'{name}_denylist', ''),
+            'same_domain_only': data.get(f'{name}_same_domain_only') in ('1', 'on', 'true'),
+        }
+
+
 class InlineTagEditorWidget(TagEditorWidget):
    """
    Inline version of TagEditorWidget for use in list views.
    Includes AJAX save functionality for immediate persistence.
    """

-    def __init__(self, attrs=None, snapshot_id=None):
+    def __init__(self, attrs=None, snapshot_id=None, editable=True):
        super().__init__(attrs, snapshot_id)
        self.snapshot_id = snapshot_id
+        self.editable = editable

    def render(self, name, value, attrs=None, renderer=None, snapshot_id=None):
        """Render inline tag editor with AJAX save."""
@@ -361,20 +625,24 @@ class InlineTagEditorWidget(TagEditorWidget):
        # Build pills HTML with filter links
        pills_html = ''
        for td in tag_data:
+            remove_button = ''
+            if self.editable:
+                remove_button = (
+                    f'<button type="button" class="tag-remove-btn" '
+                    f'data-tag-id="{td["id"]}" data-tag-name="{self._escape(td["name"])}">&times;</button>'
+                )
            pills_html += f'''
                <span class="tag-pill" data-tag="{self._escape(td['name'])}" data-tag-id="{td['id']}" style="{self._tag_style(td['name'])}">
                    <a href="/admin/core/snapshot/?tags__id__exact={td['id']}" class="tag-link">{self._escape(td['name'])}</a>
-                    <button type="button" class="tag-remove-btn" data-tag-id="{td['id']}" data-tag-name="{self._escape(td['name'])}">&times;</button>
+                    {remove_button}
                </span>
            '''

        tags_json = escape(json.dumps(tag_data))
-
-        html = f'''
-        <span id="{widget_id}_container" class="tag-editor-inline" data-snapshot-id="{snapshot_id}" data-tags="{tags_json}">
-            <span id="{widget_id}_pills" class="tag-pills-inline">
-                {pills_html}
-            </span>
+        input_html = ''
+        readonly_class = ' readonly' if not self.editable else ''
+        if self.editable:
+            input_html = f'''
            <input type="text"
                   id="{widget_id}_input"
                   class="tag-inline-input-sm"
@@ -384,6 +652,14 @@ class InlineTagEditorWidget(TagEditorWidget):
                   data-inline-tag-input="1"
            >
            <datalist id="{widget_id}_datalist"></datalist>
+            '''
+
+        html = f'''
+        <span id="{widget_id}_container" class="tag-editor-inline{readonly_class}" data-snapshot-id="{snapshot_id}" data-tags="{tags_json}" data-readonly="{int(not self.editable)}">
+            <span id="{widget_id}_pills" class="tag-pills-inline">
+                {pills_html}
+            </span>
+            {input_html}
        </span>
        '''

--- a/archivebox/crawls/admin.py
+++ b/archivebox/crawls/admin.py
@@ -1,8 +1,11 @@
 __package__ = 'archivebox.crawls'

-
 from django import forms
-from django.utils.html import format_html, format_html_join
+from django.http import JsonResponse, HttpRequest, HttpResponseNotAllowed
+from django.shortcuts import get_object_or_404, redirect
+from django.urls import path, reverse
+from django.utils.html import escape, format_html, format_html_join
+from django.utils import timezone
 from django.utils.safestring import mark_safe
 from django.contrib import admin, messages
 from django.db.models import Count, Q
@@ -13,16 +16,19 @@ from django_object_actions import action
 from archivebox.base_models.admin import BaseModelAdmin, ConfigEditorMixin

 from archivebox.core.models import Snapshot
+from archivebox.core.widgets import TagEditorWidget
 from archivebox.crawls.models import Crawl, CrawlSchedule


-def render_snapshots_list(snapshots_qs, limit=20):
+def render_snapshots_list(snapshots_qs, limit=20, crawl=None):
    """Render a nice inline list view of snapshots with status, title, URL, and progress."""

    snapshots = snapshots_qs.order_by('-created_at')[:limit].annotate(
        total_results=Count('archiveresult'),
        succeeded_results=Count('archiveresult', filter=Q(archiveresult__status='succeeded')),
        failed_results=Count('archiveresult', filter=Q(archiveresult__status='failed')),
+        started_results=Count('archiveresult', filter=Q(archiveresult__status='started')),
+        skipped_results=Count('archiveresult', filter=Q(archiveresult__status='skipped')),
    )

    if not snapshots:
@@ -43,17 +49,57 @@ def render_snapshots_list(snapshots_qs, limit=20):

        # Calculate progress
        total = snapshot.total_results
-        done = snapshot.succeeded_results + snapshot.failed_results
+        succeeded = snapshot.succeeded_results
+        failed = snapshot.failed_results
+        running = snapshot.started_results
+        skipped = snapshot.skipped_results
+        done = succeeded + failed + skipped
+        pending = max(total - done - running, 0)
        progress_pct = int((done / total) * 100) if total > 0 else 0
        progress_text = f'{done}/{total}' if total > 0 else '-'
+        progress_title = (
+            f'{succeeded} succeeded, {failed} failed, {running} running, '
+            f'{pending} pending, {skipped} skipped'
+        )
+        progress_color = '#28a745'
+        if failed:
+            progress_color = '#dc3545'
+        elif running:
+            progress_color = '#17a2b8'
+        elif pending:
+            progress_color = '#ffc107'

        # Truncate title and URL
-        title = (snapshot.title or 'Untitled')[:60]
-        if len(snapshot.title or '') > 60:
+        snapshot_title = snapshot.title or 'Untitled'
+        title = snapshot_title[:60]
+        if len(snapshot_title) > 60:
            title += '...'
        url_display = snapshot.url[:50]
        if len(snapshot.url) > 50:
            url_display += '...'
+        delete_button = ''
+        exclude_button = ''
+        if crawl is not None:
+            delete_url = reverse('admin:crawls_crawl_snapshot_delete', args=[crawl.pk, snapshot.pk])
+            exclude_url = reverse('admin:crawls_crawl_snapshot_exclude_domain', args=[crawl.pk, snapshot.pk])
+            delete_button = f'''
+                <button type="button"
+                        class="crawl-snapshots-action"
+                        data-post-url="{escape(delete_url)}"
+                        data-confirm="Delete this snapshot from the crawl?"
+                        title="Delete this snapshot from the crawl and remove its URL from the crawl queue."
+                        aria-label="Delete snapshot"
+                        style="border: 1px solid #ddd; background: #fff; color: #666; border-radius: 4px; width: 28px; height: 28px; cursor: pointer;">🗑</button>
+            '''
+            exclude_button = f'''
+                <button type="button"
+                        class="crawl-snapshots-action"
+                        data-post-url="{escape(exclude_url)}"
+                        data-confirm="Exclude this domain from the crawl? This removes matching queued URLs, deletes pending matching snapshots, and blocks future matches."
+                        title="Exclude this domain from this crawl. This removes matching URLs from the crawl queue, deletes pending matching snapshots, and blocks future matches."
+                        aria-label="Exclude domain from crawl"
+                        style="border: 1px solid #ddd; background: #fff; color: #666; border-radius: 4px; width: 28px; height: 28px; cursor: pointer;">⊘</button>
+            '''

        # Format date
        date_str = snapshot.created_at.strftime('%Y-%m-%d %H:%M') if snapshot.created_at else '-'
@@ -74,18 +120,18 @@ def render_snapshots_list(snapshots_qs, limit=20):
                </td>
                <td style="padding: 6px 8px; max-width: 300px;">
                    <a href="{snapshot.admin_change_url}" style="color: #417690; text-decoration: none; font-weight: 500;"
-                       title="{snapshot.title or 'Untitled'}">{title}</a>
+                       title="{escape(snapshot_title)}">{escape(title)}</a>
                </td>
                <td style="padding: 6px 8px; max-width: 250px;">
-                    <a href="{snapshot.url}" target="_blank"
+                    <a href="{escape(snapshot.url)}" target="_blank"
                       style="color: #666; text-decoration: none; font-family: monospace; font-size: 11px;"
-                       title="{snapshot.url}">{url_display}</a>
+                       title="{escape(snapshot.url)}">{escape(url_display)}</a>
                </td>
                <td style="padding: 6px 8px; white-space: nowrap; text-align: center;">
-                    <div style="display: inline-flex; align-items: center; gap: 6px;">
+                    <div style="display: inline-flex; align-items: center; gap: 6px;" title="{escape(progress_title)}">
                        <div style="width: 60px; height: 6px; background: #eee; border-radius: 3px; overflow: hidden;">
                            <div style="width: {progress_pct}%; height: 100%;
-                                        background: {'#28a745' if snapshot.failed_results == 0 else '#ffc107' if snapshot.succeeded_results > 0 else '#dc3545'};
+                                        background: {progress_color};
                                        transition: width 0.3s;"></div>
                        </div>
                        <a href="/admin/core/archiveresult/?snapshot__id__exact={snapshot.id}"
@@ -96,6 +142,7 @@ def render_snapshots_list(snapshots_qs, limit=20):
                <td style="padding: 6px 8px; white-space: nowrap; color: #888; font-size: 11px;">
                    {date_str}
                </td>
+                {"<td style=\"padding: 6px 8px; white-space: nowrap; text-align: right;\"><div style=\"display: inline-flex; gap: 6px;\">%s%s</div></td>" % (exclude_button, delete_button) if crawl is not None else ""}
            </tr>
        ''')

@@ -111,7 +158,7 @@ def render_snapshots_list(snapshots_qs, limit=20):
        '''

    return mark_safe(f'''
-        <div style="border: 1px solid #ddd; border-radius: 6px; overflow: hidden; max-width: 100%;">
+        <div data-crawl-snapshots-list style="border: 1px solid #ddd; border-radius: 6px; overflow: hidden; max-width: 100%;">
            <table style="width: 100%; border-collapse: collapse; font-size: 13px;">
                <thead>
                    <tr style="background: #f5f5f5; border-bottom: 2px solid #ddd;">
@@ -121,6 +168,7 @@ def render_snapshots_list(snapshots_qs, limit=20):
                        <th style="padding: 8px; text-align: left; font-weight: 600; color: #333;">URL</th>
                        <th style="padding: 8px; text-align: center; font-weight: 600; color: #333;">Progress</th>
                        <th style="padding: 8px; text-align: left; font-weight: 600; color: #333;">Created</th>
+                        {'<th style="padding: 8px; text-align: right; font-weight: 600; color: #333;">Actions</th>' if crawl is not None else ''}
                    </tr>
                </thead>
                <tbody>
@@ -129,11 +177,197 @@ def render_snapshots_list(snapshots_qs, limit=20):
                </tbody>
            </table>
        </div>
+        {'''
+        <script>
+        (function() {
+            if (window.__archiveboxCrawlSnapshotActionsBound) {
+                return;
+            }
+            window.__archiveboxCrawlSnapshotActionsBound = true;
+
+            function getCookie(name) {
+                var cookieValue = null;
+                if (!document.cookie) {
+                    return cookieValue;
+                }
+                var cookies = document.cookie.split(';');
+                for (var i = 0; i < cookies.length; i++) {
+                    var cookie = cookies[i].trim();
+                    if (cookie.substring(0, name.length + 1) === (name + '=')) {
+                        cookieValue = decodeURIComponent(cookie.substring(name.length + 1));
+                        break;
+                    }
+                }
+                return cookieValue;
+            }
+
+            document.addEventListener('click', function(event) {
+                var button = event.target.closest('.crawl-snapshots-action');
+                if (!button) {
+                    return;
+                }
+                event.preventDefault();
+
+                var confirmMessage = button.getAttribute('data-confirm');
+                if (confirmMessage && !window.confirm(confirmMessage)) {
+                    return;
+                }
+
+                button.disabled = true;
+
+                fetch(button.getAttribute('data-post-url'), {
+                    method: 'POST',
+                    credentials: 'same-origin',
+                    headers: {
+                        'X-CSRFToken': getCookie('csrftoken') || '',
+                        'X-Requested-With': 'XMLHttpRequest'
+                    }
+                }).then(function(response) {
+                    return response.json().then(function(data) {
+                        if (!response.ok) {
+                            throw new Error(data.error || 'Request failed');
+                        }
+                        return data;
+                    });
+                }).then(function() {
+                    window.location.reload();
+                }).catch(function(error) {
+                    button.disabled = false;
+                    window.alert(error.message || 'Request failed');
+                });
+            });
+        })();
+        </script>
+        ''' if crawl is not None else ''}
    ''')


+class URLFiltersWidget(forms.Widget):
+    def render(self, name, value, attrs=None, renderer=None):
+        value = value if isinstance(value, dict) else {}
+        widget_id = (attrs or {}).get('id', name)
+        allowlist = escape(value.get('allowlist', '') or '')
+        denylist = escape(value.get('denylist', '') or '')
+
+        return mark_safe(f'''
+            <div id="{widget_id}_container" style="min-width: 420px;">
+                <input type="hidden" name="{name}" value="">
+                <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px;">
+                    <div>
+                        <label for="{widget_id}_allowlist" style="display: block; font-weight: 600; margin-bottom: 4px;">Allowlist</label>
+                        <textarea id="{widget_id}_allowlist" name="{name}_allowlist" rows="3"
+                                  style="width: 100%; font-family: monospace; font-size: 12px;"
+                                  placeholder="example.com&#10;*.example.com">{allowlist}</textarea>
+                    </div>
+                    <div>
+                        <label for="{widget_id}_denylist" style="display: block; font-weight: 600; margin-bottom: 4px;">Denylist</label>
+                        <textarea id="{widget_id}_denylist" name="{name}_denylist" rows="3"
+                                  style="width: 100%; font-family: monospace; font-size: 12px;"
+                                  placeholder="static.example.com">{denylist}</textarea>
+                    </div>
+                </div>
+                <label style="display: inline-flex; align-items: center; gap: 6px; margin-top: 8px; font-weight: 500;">
+                    <input type="checkbox" id="{widget_id}_same_domain_only" name="{name}_same_domain_only" value="1">
+                    Same domain only
+                </label>
+                <p style="color: #666; font-size: 11px; margin: 6px 0 0 0;">
+                    Enter domains, wildcards, or regex patterns. Denylist takes precedence over allowlist.
+                </p>
+                <script>
+                    (function() {{
+                        if (window.__archiveboxUrlFilterEditors && window.__archiveboxUrlFilterEditors['{widget_id}']) {{
+                            return;
+                        }}
+                        window.__archiveboxUrlFilterEditors = window.__archiveboxUrlFilterEditors || {{}};
+                        window.__archiveboxUrlFilterEditors['{widget_id}'] = true;
+
+                        var urlsField = document.getElementById('id_urls');
+                        var allowlistField = document.getElementById('{widget_id}_allowlist');
+                        var sameDomainOnly = document.getElementById('{widget_id}_same_domain_only');
+
+                        function extractUrl(line) {{
+                            var trimmed = (line || '').trim();
+                            if (!trimmed || trimmed.charAt(0) === '#') {{
+                                return '';
+                            }}
+                            if (trimmed.charAt(0) === '{{') {{
+                                try {{
+                                    var record = JSON.parse(trimmed);
+                                    return String(record.url || '').trim();
+                                }} catch (error) {{
+                                    return '';
+                                }}
+                            }}
+                            return trimmed;
+                        }}
+
+                        function syncAllowlistFromUrls() {{
+                            if (!urlsField || !allowlistField || !sameDomainOnly || !sameDomainOnly.checked) {{
+                                return;
+                            }}
+                            var domains = [];
+                            var seen = Object.create(null);
+                            urlsField.value.split(/\\n+/).forEach(function(line) {{
+                                var url = extractUrl(line);
+                                if (!url) {{
+                                    return;
+                                }}
+                                try {{
+                                    var parsed = new URL(url);
+                                    var domain = (parsed.hostname || '').toLowerCase();
+                                    if (domain && !seen[domain]) {{
+                                        seen[domain] = true;
+                                        domains.push(domain);
+                                    }}
+                                }} catch (error) {{
+                                    return;
+                                }}
+                            }});
+                            allowlistField.value = domains.join('\\n');
+                        }}
+
+                        if (sameDomainOnly) {{
+                            sameDomainOnly.addEventListener('change', syncAllowlistFromUrls);
+                        }}
+                        if (urlsField) {{
+                            urlsField.addEventListener('input', syncAllowlistFromUrls);
+                            urlsField.addEventListener('change', syncAllowlistFromUrls);
+                        }}
+                    }})();
+                </script>
+            </div>
+        ''')
+
+    def value_from_datadict(self, data, files, name):
+        return {
+            'allowlist': data.get(f'{name}_allowlist', ''),
+            'denylist': data.get(f'{name}_denylist', ''),
+            'same_domain_only': data.get(f'{name}_same_domain_only') in ('1', 'on', 'true'),
+        }
+
+
+class URLFiltersField(forms.Field):
+    widget = URLFiltersWidget
+
+    def to_python(self, value):
+        if isinstance(value, dict):
+            return value
+        return {'allowlist': '', 'denylist': '', 'same_domain_only': False}
+
+
 class CrawlAdminForm(forms.ModelForm):
    """Custom form for Crawl admin to render urls field as textarea."""
+    tags_editor = forms.CharField(
+        label='Tags',
+        required=False,
+        widget=TagEditorWidget(),
+        help_text='Type tag names and press Enter or Space to add. Click × to remove.',
+    )
+    url_filters = URLFiltersField(
+        label='URL Filters',
+        required=False,
+        help_text='Set URL_ALLOWLIST / URL_DENYLIST for this crawl.',
+    )

    class Meta:
        model = Crawl
@@ -144,8 +378,62 @@ class CrawlAdminForm(forms.ModelForm):
                'style': 'width: 100%; font-family: monospace; font-size: 13px;',
                'placeholder': 'https://example.com\nhttps://example2.com\n# Comments start with #',
            }),
+            'notes': forms.Textarea(attrs={
+                'rows': 1,
+                'style': 'width: 100%; min-height: 0; resize: vertical;',
+            }),
        }

+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        config = dict(self.instance.config or {}) if self.instance and self.instance.pk else {}
+        if self.instance and self.instance.pk:
+            self.initial['tags_editor'] = self.instance.tags_str
+        self.initial['url_filters'] = {
+            'allowlist': config.get('URL_ALLOWLIST', ''),
+            'denylist': config.get('URL_DENYLIST', ''),
+            'same_domain_only': False,
+        }
+
+    def clean_tags_editor(self):
+        tags_str = self.cleaned_data.get('tags_editor', '')
+        tag_names = []
+        seen = set()
+        for raw_name in tags_str.split(','):
+            name = raw_name.strip()
+            if not name:
+                continue
+            lowered = name.lower()
+            if lowered in seen:
+                continue
+            seen.add(lowered)
+            tag_names.append(name)
+        return ','.join(tag_names)
+
+    def clean_url_filters(self):
+        value = self.cleaned_data.get('url_filters') or {}
+        return {
+            'allowlist': '\n'.join(Crawl.split_filter_patterns(value.get('allowlist', ''))),
+            'denylist': '\n'.join(Crawl.split_filter_patterns(value.get('denylist', ''))),
+            'same_domain_only': bool(value.get('same_domain_only')),
+        }
+
+    def save(self, commit=True):
+        instance = super().save(commit=False)
+        instance.tags_str = self.cleaned_data.get('tags_editor', '')
+        url_filters = self.cleaned_data.get('url_filters') or {}
+        instance.set_url_filters(
+            url_filters.get('allowlist', ''),
+            url_filters.get('denylist', ''),
+        )
+        if commit:
+            instance.save()
+            instance.apply_crawl_config_filters()
+            save_m2m = getattr(self, '_save_m2m', None)
+            if callable(save_m2m):
+                save_m2m()
+        return instance
+

 class CrawlAdmin(ConfigEditorMixin, BaseModelAdmin):
    form = CrawlAdminForm
@@ -161,11 +449,11 @@ class CrawlAdmin(ConfigEditorMixin, BaseModelAdmin):
            'classes': ('card', 'wide'),
        }),
        ('Info', {
-            'fields': ('label', 'notes', 'tags_str'),
+            'fields': ('label', 'notes', 'tags_editor'),
            'classes': ('card',),
        }),
        ('Settings', {
-            'fields': ('max_depth', 'config'),
+            'fields': (('max_depth', 'url_filters'), 'config'),
            'classes': ('card',),
        }),
        ('Status', {
@@ -185,6 +473,28 @@ class CrawlAdmin(ConfigEditorMixin, BaseModelAdmin):
            'classes': ('card', 'wide'),
        }),
    )
+    add_fieldsets = (
+        ('URLs', {
+            'fields': ('urls',),
+            'classes': ('card', 'wide'),
+        }),
+        ('Info', {
+            'fields': ('label', 'notes', 'tags_editor'),
+            'classes': ('card',),
+        }),
+        ('Settings', {
+            'fields': (('max_depth', 'url_filters'), 'config'),
+            'classes': ('card',),
+        }),
+        ('Status', {
+            'fields': ('status', 'retry_at'),
+            'classes': ('card',),
+        }),
+        ('Relations', {
+            'fields': ('schedule', 'created_by'),
+            'classes': ('card',),
+        }),
+    )

    list_filter = ('max_depth', 'schedule', 'created_by', 'status', 'retry_at')
    ordering = ['-created_at', '-retry_at']
@@ -199,6 +509,25 @@ class CrawlAdmin(ConfigEditorMixin, BaseModelAdmin):
            num_snapshots_cached=Count('snapshot_set')
        )

+    def get_fieldsets(self, request, obj=None):
+        return self.fieldsets if obj else self.add_fieldsets
+
+    def get_urls(self):
+        urls = super().get_urls()
+        custom_urls = [
+            path(
+                '<path:object_id>/snapshot/<path:snapshot_id>/delete/',
+                self.admin_site.admin_view(self.delete_snapshot_view),
+                name='crawls_crawl_snapshot_delete',
+            ),
+            path(
+                '<path:object_id>/snapshot/<path:snapshot_id>/exclude-domain/',
+                self.admin_site.admin_view(self.exclude_domain_view),
+                name='crawls_crawl_snapshot_exclude_domain',
+            ),
+        ]
+        return custom_urls + urls
+
    @admin.action(description='Delete selected crawls')
    def delete_selected_batched(self, request, queryset):
        """Delete crawls in a single transaction to avoid SQLite concurrency issues."""
@@ -218,8 +547,6 @@ class CrawlAdmin(ConfigEditorMixin, BaseModelAdmin):
    @action(label='Recrawl', description='Create a new crawl with the same settings')
    def recrawl(self, request, obj):
        """Duplicate this crawl as a new crawl with the same URLs and settings."""
-        from django.utils import timezone
-        from django.shortcuts import redirect

        # Validate URLs (required for crawl to start)
        if not obj.urls:
@@ -252,7 +579,37 @@ class CrawlAdmin(ConfigEditorMixin, BaseModelAdmin):
        return getattr(obj, 'num_snapshots_cached', obj.snapshot_set.count())

    def snapshots(self, obj):
-        return render_snapshots_list(obj.snapshot_set.all())
+        return render_snapshots_list(obj.snapshot_set.all(), crawl=obj)
+
+    def delete_snapshot_view(self, request: HttpRequest, object_id: str, snapshot_id: str):
+        if request.method != 'POST':
+            return HttpResponseNotAllowed(['POST'])
+
+        crawl = get_object_or_404(Crawl, pk=object_id)
+        snapshot = get_object_or_404(Snapshot, pk=snapshot_id, crawl=crawl)
+
+        if snapshot.status == Snapshot.StatusChoices.STARTED:
+            snapshot.cancel_running_hooks()
+
+        removed_urls = crawl.prune_url(snapshot.url)
+        snapshot.delete()
+        return JsonResponse({
+            'ok': True,
+            'snapshot_id': str(snapshot.id),
+            'removed_urls': removed_urls,
+        })
+
+    def exclude_domain_view(self, request: HttpRequest, object_id: str, snapshot_id: str):
+        if request.method != 'POST':
+            return HttpResponseNotAllowed(['POST'])
+
+        crawl = get_object_or_404(Crawl, pk=object_id)
+        snapshot = get_object_or_404(Snapshot, pk=snapshot_id, crawl=crawl)
+        result = crawl.exclude_domain(snapshot.url)
+        return JsonResponse({
+            'ok': True,
+            **result,
+        })

    @admin.display(description='Schedule', ordering='schedule')
    def schedule_str(self, obj):
--- a/archivebox/crawls/models.py
+++ b/archivebox/crawls/models.py
@@ -2,9 +2,12 @@ __package__ = 'archivebox.crawls'

 from typing import TYPE_CHECKING
 import uuid
+import json
+import re
 from datetime import timedelta
 from archivebox.uuid_compat import uuid7
 from pathlib import Path
+from urllib.parse import urlparse

 from django.db import models
 from django.core.validators import MaxValueValidator, MinValueValidator
@@ -141,22 +144,21 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
        return f'[...{short_id}] {first_url[:120]}'

    def save(self, *args, **kwargs):
-        is_new = self._state.adding
        super().save(*args, **kwargs)
-        if is_new:
-            from archivebox.misc.logging_util import log_worker_event
-            first_url = self.get_urls_list()[0] if self.get_urls_list() else ''
-            log_worker_event(
-                worker_type='DB',
-                event='Created Crawl',
-                indent_level=1,
-                metadata={
-                    'id': str(self.id),
-                    'first_url': first_url[:64],
-                    'max_depth': self.max_depth,
-                    'status': self.status,
-                },
-            )
+        # if is_new:
+        #     from archivebox.misc.logging_util import log_worker_event
+        #     first_url = self.get_urls_list()[0] if self.get_urls_list() else ''
+        #     log_worker_event(
+        #         worker_type='DB',
+        #         event='Created Crawl',
+        #         indent_level=1,
+        #         metadata={
+        #             'id': str(self.id),
+        #             'first_url': first_url[:64],
+        #             'max_depth': self.max_depth,
+        #             'status': self.status,
+        #         },
+        #     )

    @property
    def api_url(self) -> str:
@@ -248,6 +250,222 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
            if url.strip() and not url.strip().startswith('#')
        ]

+    @staticmethod
+    def normalize_domain(value: str) -> str:
+        candidate = (value or '').strip().lower()
+        if not candidate:
+            return ''
+        if '://' not in candidate and '/' not in candidate:
+            candidate = f'https://{candidate.lstrip(".")}'
+        try:
+            parsed = urlparse(candidate)
+            hostname = parsed.hostname or ''
+            if not hostname:
+                return ''
+            if parsed.port:
+                return f'{hostname}_{parsed.port}'
+            return hostname
+        except Exception:
+            return ''
+
+    @staticmethod
+    def split_filter_patterns(value) -> list[str]:
+        patterns = []
+        seen = set()
+        if isinstance(value, list):
+            raw_values = value
+        elif isinstance(value, str):
+            raw_values = value.splitlines()
+        else:
+            raw_values = []
+
+        for raw_value in raw_values:
+            pattern = str(raw_value or '').strip()
+            if not pattern or pattern in seen:
+                continue
+            seen.add(pattern)
+            patterns.append(pattern)
+        return patterns
+
+    @classmethod
+    def _pattern_matches_url(cls, url: str, pattern: str) -> bool:
+        normalized_pattern = str(pattern or '').strip()
+        if not normalized_pattern:
+            return False
+
+        if re.fullmatch(r'[\w.*:-]+', normalized_pattern):
+            wildcard_only_subdomains = normalized_pattern.startswith('*.')
+            normalized_domain = cls.normalize_domain(
+                normalized_pattern[2:] if wildcard_only_subdomains else normalized_pattern
+            )
+            normalized_url_domain = cls.normalize_domain(url)
+            if not normalized_domain or not normalized_url_domain:
+                return False
+
+            pattern_host = normalized_domain.split('_', 1)[0]
+            url_host = normalized_url_domain.split('_', 1)[0]
+
+            if wildcard_only_subdomains:
+                return url_host.endswith(f'.{pattern_host}')
+
+            if normalized_url_domain == normalized_domain:
+                return True
+            return url_host == pattern_host or url_host.endswith(f'.{pattern_host}')
+
+        try:
+            return bool(re.search(normalized_pattern, url))
+        except re.error:
+            return False
+
+    def get_url_allowlist(self, *, use_effective_config: bool = False, snapshot=None) -> list[str]:
+        if use_effective_config:
+            from archivebox.config.configset import get_config
+
+            config = get_config(crawl=self, snapshot=snapshot)
+        else:
+            config = self.config or {}
+        return self.split_filter_patterns(config.get('URL_ALLOWLIST', ''))
+
+    def get_url_denylist(self, *, use_effective_config: bool = False, snapshot=None) -> list[str]:
+        if use_effective_config:
+            from archivebox.config.configset import get_config
+
+            config = get_config(crawl=self, snapshot=snapshot)
+        else:
+            config = self.config or {}
+        return self.split_filter_patterns(config.get('URL_DENYLIST', ''))
+
+    def url_passes_filters(self, url: str, *, snapshot=None, use_effective_config: bool = True) -> bool:
+        denylist = self.get_url_denylist(use_effective_config=use_effective_config, snapshot=snapshot)
+        allowlist = self.get_url_allowlist(use_effective_config=use_effective_config, snapshot=snapshot)
+
+        for pattern in denylist:
+            if self._pattern_matches_url(url, pattern):
+                return False
+
+        if allowlist:
+            return any(self._pattern_matches_url(url, pattern) for pattern in allowlist)
+
+        return True
+
+    def set_url_filters(self, allowlist, denylist) -> None:
+        config = dict(self.config or {})
+        allow_patterns = self.split_filter_patterns(allowlist)
+        deny_patterns = self.split_filter_patterns(denylist)
+
+        if allow_patterns:
+            config['URL_ALLOWLIST'] = '\n'.join(allow_patterns)
+        else:
+            config.pop('URL_ALLOWLIST', None)
+
+        if deny_patterns:
+            config['URL_DENYLIST'] = '\n'.join(deny_patterns)
+        else:
+            config.pop('URL_DENYLIST', None)
+
+        self.config = config
+
+    def apply_crawl_config_filters(self) -> dict[str, int]:
+        from archivebox.core.models import Snapshot
+
+        removed_urls = self.prune_urls(
+            lambda url: not self.url_passes_filters(url, use_effective_config=False)
+        )
+
+        filtered_snapshots = [
+            snapshot
+            for snapshot in self.snapshot_set.filter(
+                status__in=[Snapshot.StatusChoices.QUEUED, Snapshot.StatusChoices.STARTED],
+            ).only('pk', 'url', 'status')
+            if not self.url_passes_filters(snapshot.url, snapshot=snapshot, use_effective_config=False)
+        ]
+
+        deleted_snapshots = 0
+        if filtered_snapshots:
+            started_snapshots = [
+                snapshot for snapshot in filtered_snapshots
+                if snapshot.status == Snapshot.StatusChoices.STARTED
+            ]
+            for snapshot in started_snapshots:
+                snapshot.cancel_running_hooks()
+
+            filtered_snapshot_ids = [snapshot.pk for snapshot in filtered_snapshots]
+            deleted_snapshots, _ = self.snapshot_set.filter(pk__in=filtered_snapshot_ids).delete()
+
+        return {
+            'removed_urls': len(removed_urls),
+            'deleted_snapshots': deleted_snapshots,
+        }
+
+    def _iter_url_lines(self) -> list[tuple[str, str]]:
+        entries: list[tuple[str, str]] = []
+        for raw_line in (self.urls or '').splitlines():
+            stripped = raw_line.strip()
+            if not stripped:
+                continue
+            if stripped.startswith('#'):
+                entries.append((raw_line.rstrip(), ''))
+                continue
+            try:
+                entry = json.loads(stripped)
+                entries.append((raw_line.rstrip(), str(entry.get('url', '') or '').strip()))
+            except json.JSONDecodeError:
+                entries.append((raw_line.rstrip(), stripped))
+        return entries
+
+    def prune_urls(self, predicate) -> list[str]:
+        kept_lines: list[str] = []
+        removed_urls: list[str] = []
+
+        for raw_line, url in self._iter_url_lines():
+            if not url:
+                kept_lines.append(raw_line)
+                continue
+            if predicate(url):
+                removed_urls.append(url)
+                continue
+            kept_lines.append(raw_line)
+
+        next_urls = '\n'.join(kept_lines)
+        if next_urls != (self.urls or ''):
+            self.urls = next_urls
+            self.save(update_fields=['urls', 'modified_at'])
+        return removed_urls
+
+    def prune_url(self, url: str) -> int:
+        target = (url or '').strip()
+        removed = self.prune_urls(lambda candidate: candidate == target)
+        return len(removed)
+
+    def exclude_domain(self, domain: str) -> dict[str, int | str | bool]:
+        normalized_domain = self.normalize_domain(domain)
+        if not normalized_domain:
+            return {
+                'domain': '',
+                'created': False,
+                'removed_urls': 0,
+                'deleted_snapshots': 0,
+            }
+
+        domains = self.get_url_denylist(use_effective_config=False)
+        created = normalized_domain not in domains
+        if created:
+            domains.append(normalized_domain)
+            self.set_url_filters(
+                self.get_url_allowlist(use_effective_config=False),
+                domains,
+            )
+            self.save(update_fields=['config', 'modified_at'])
+
+        filter_result = self.apply_crawl_config_filters()
+
+        return {
+            'domain': normalized_domain,
+            'created': created,
+            'removed_urls': filter_result['removed_urls'],
+            'deleted_snapshots': filter_result['deleted_snapshots'],
+        }
+
    def get_system_task(self) -> str | None:
        urls = self.get_urls_list()
        if len(urls) != 1:
@@ -284,11 +502,13 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
        Returns:
            True if URL was added, False if skipped (duplicate or depth exceeded)
        """
-        import json
+        from archivebox.misc.util import fix_url_from_markdown

-        url = entry.get('url', '')
+        url = fix_url_from_markdown(str(entry.get('url', '') or '').strip())
        if not url:
            return False
+        if not self.url_passes_filters(url):
+            return False

        depth = entry.get('depth', 1)

@@ -301,20 +521,13 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
            return False

        # Check if already in urls (parse existing JSONL entries)
-        existing_urls = set()
-        for line in self.urls.splitlines():
-            if not line.strip():
-                continue
-            try:
-                existing_entry = json.loads(line)
-                existing_urls.add(existing_entry.get('url', ''))
-            except json.JSONDecodeError:
-                existing_urls.add(line.strip())
+        existing_urls = {url for _raw_line, url in self._iter_url_lines() if url}

        if url in existing_urls:
            return False

        # Append as JSONL
+        entry = {**entry, 'url': url}
        jsonl_entry = json.dumps(entry)
        self.urls = (self.urls.rstrip() + '\n' + jsonl_entry).lstrip('\n')
        self.save(update_fields=['urls', 'modified_at'])
@@ -327,15 +540,11 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
        Returns:
            List of newly created Snapshot objects
        """
-        import sys
-        import json
        from archivebox.core.models import Snapshot
+        from archivebox.misc.util import fix_url_from_markdown

        created_snapshots = []

-        print(f'[cyan]DEBUG create_snapshots_from_urls: self.urls={repr(self.urls)}[/cyan]', file=sys.stderr)
-        print(f'[cyan]DEBUG create_snapshots_from_urls: lines={self.urls.splitlines()}[/cyan]', file=sys.stderr)
-
        for line in self.urls.splitlines():
            if not line.strip():
                continue
@@ -343,13 +552,13 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
            # Parse JSONL or plain URL
            try:
                entry = json.loads(line)
-                url = entry.get('url', '')
+                url = fix_url_from_markdown(str(entry.get('url', '') or '').strip())
                depth = entry.get('depth', 0)
                title = entry.get('title')
                timestamp = entry.get('timestamp')
                tags = entry.get('tags', '')
            except json.JSONDecodeError:
-                url = line.strip()
+                url = fix_url_from_markdown(line.strip())
                depth = 0
                title = None
                timestamp = None
@@ -357,6 +566,8 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith

            if not url:
                continue
+            if not self.url_passes_filters(url):
+                continue

            # Skip if depth exceeds max_depth
            if depth > self.max_depth:
--- a/archivebox/hooks.py
+++ b/archivebox/hooks.py
@@ -64,6 +64,7 @@ from abx_plugins import get_plugins_dir
 from django.conf import settings
 from django.utils.safestring import mark_safe
 from archivebox.config.constants import CONSTANTS
+from archivebox.misc.util import fix_url_from_markdown

 if TYPE_CHECKING:
    from archivebox.machine.models import Process
@@ -266,7 +267,7 @@ def run_hook(
        if process.status == 'exited':
            records = process.get_records()  # Get parsed JSONL output
    """
-    from archivebox.machine.models import Process, Machine
+    from archivebox.machine.models import Process, Machine, NetworkInterface
    from archivebox.config.constants import CONSTANTS
    import sys

@@ -280,6 +281,8 @@ def run_hook(

    # Get current machine
    machine = Machine.current()
+    iface = NetworkInterface.current(refresh=True)
+    machine = iface.machine

    # Auto-detect parent process if not explicitly provided
    # This enables automatic hierarchy tracking: Worker -> Hook
@@ -294,6 +297,7 @@ def run_hook(
        # Create a failed Process record for hooks that don't exist
        process = Process.objects.create(
            machine=machine,
+            iface=iface,
            parent=parent,
            process_type=Process.TypeChoices.HOOK,
            pwd=str(output_dir),
@@ -449,6 +453,7 @@ def run_hook(
        # Create Process record
        process = Process.objects.create(
            machine=machine,
+            iface=iface,
            parent=parent,
            process_type=Process.TypeChoices.HOOK,
            pwd=str(output_dir),
@@ -458,6 +463,7 @@ def run_hook(

        # Copy the env dict we already built (includes os.environ + all customizations)
        process.env = env.copy()
+        process.hydrate_binary_from_context(plugin_name=script.parent.name, hook_path=str(script))

        # Save env before launching
        process.save()
@@ -472,6 +478,7 @@ def run_hook(
        # Create a failed Process record for exceptions
        process = Process.objects.create(
            machine=machine,
+            iface=iface,
            process_type=Process.TypeChoices.HOOK,
            pwd=str(output_dir),
            cmd=cmd,
@@ -544,6 +551,9 @@ def collect_urls_from_plugins(snapshot_dir: Path) -> List[Dict[str, Any]]:
            text = urls_file.read_text()
            for entry in Process.parse_records_from_text(text):
                if entry.get('url'):
+                    entry['url'] = fix_url_from_markdown(str(entry['url']).strip())
+                    if not entry['url']:
+                        continue
                    # Track which parser plugin found this URL
                    entry['plugin'] = subdir.name
                    urls.append(entry)
@@ -615,11 +625,30 @@ def get_enabled_plugins(config: Optional[Dict[str, Any]] = None) -> List[str]:
        from archivebox.config.configset import get_config
        config = get_config()

+    def normalize_enabled_plugins(value: Any) -> List[str]:
+        if value is None:
+            return []
+        if isinstance(value, str):
+            raw = value.strip()
+            if not raw:
+                return []
+            if raw.startswith('['):
+                try:
+                    parsed = json.loads(raw)
+                except json.JSONDecodeError:
+                    parsed = None
+                if isinstance(parsed, list):
+                    return [str(plugin).strip() for plugin in parsed if str(plugin).strip()]
+            return [plugin.strip() for plugin in raw.split(',') if plugin.strip()]
+        if isinstance(value, (list, tuple, set)):
+            return [str(plugin).strip() for plugin in value if str(plugin).strip()]
+        return [str(value).strip()] if str(value).strip() else []
+
    # Support explicit ENABLED_PLUGINS override (legacy)
    if 'ENABLED_PLUGINS' in config:
-        return config['ENABLED_PLUGINS']
+        return normalize_enabled_plugins(config['ENABLED_PLUGINS'])
    if 'ENABLED_EXTRACTORS' in config:
-        return config['ENABLED_EXTRACTORS']
+        return normalize_enabled_plugins(config['ENABLED_EXTRACTORS'])

    # Filter all plugins by enabled status
    all_plugins = get_plugins()
@@ -1042,6 +1071,14 @@ def process_hook_records(records: List[Dict[str, Any]], overrides: Dict[str, Any
            if record_type == 'Snapshot':
                from archivebox.core.models import Snapshot

+                if record.get('url'):
+                    record = {
+                        **record,
+                        'url': fix_url_from_markdown(str(record['url']).strip()),
+                    }
+                    if not record['url']:
+                        continue
+
                # Check if discovered snapshot exceeds crawl max_depth
                snapshot_depth = record.get('depth', 0)
                crawl = overrides.get('crawl')
--- a/archivebox/machine/admin.py
+++ b/archivebox/machine/admin.py
@@ -113,7 +113,7 @@ class BinaryAdmin(BaseModelAdmin):
    sort_fields = ('id', 'created_at', 'machine_info', 'name', 'binprovider', 'version', 'abspath', 'sha256', 'status')
    search_fields = ('id', 'machine__id', 'name', 'binprovider', 'version', 'abspath', 'sha256')

-    readonly_fields = ('created_at', 'modified_at')
+    readonly_fields = ('created_at', 'modified_at', 'output_dir')

    fieldsets = (
        ('Binary Info', {
@@ -166,7 +166,7 @@ class ProcessAdmin(BaseModelAdmin):
    sort_fields = ('id', 'created_at', 'status', 'exit_code', 'pid')
    search_fields = ('id', 'machine__id', 'binary__name', 'cmd', 'pwd', 'stdout', 'stderr')

-    readonly_fields = ('created_at', 'modified_at', 'machine', 'binary', 'iface', 'archiveresult_link')
+    readonly_fields = ('created_at', 'modified_at', 'machine', 'binary_link', 'iface_link', 'archiveresult_link')

    fieldsets = (
        ('Process Info', {
@@ -178,7 +178,7 @@ class ProcessAdmin(BaseModelAdmin):
            'classes': ('card', 'wide'),
        }),
        ('Execution', {
-            'fields': ('binary', 'iface', 'pid', 'exit_code', 'url'),
+            'fields': ('binary_link', 'iface_link', 'pid', 'exit_code', 'url'),
            'classes': ('card',),
        }),
        ('Timing', {
@@ -216,6 +216,21 @@ class ProcessAdmin(BaseModelAdmin):
            process.binary.id, process.binary.name, process.binary.version,
        )

+    @admin.display(description='Binary', ordering='binary__name')
+    def binary_link(self, process):
+        return self.binary_info(process)
+
+    @admin.display(description='Network Interface', ordering='iface__id')
+    def iface_link(self, process):
+        if not process.iface:
+            return '-'
+        return format_html(
+            '<a href="/admin/machine/networkinterface/{}/change"><code>{}</code> {}</a>',
+            process.iface.id,
+            str(process.iface.id)[:8],
+            process.iface.iface or process.iface.ip_public or process.iface.ip_local,
+        )
+
    @admin.display(description='ArchiveResult')
    def archiveresult_link(self, process):
        if not hasattr(process, 'archiveresult'):
--- a/archivebox/machine/models.py
+++ b/archivebox/machine/models.py
@@ -49,6 +49,89 @@ BINARY_RECHECK_INTERVAL = 1 * 30 * 60
 PROCESS_RECHECK_INTERVAL = 60  # Re-validate every 60 seconds
 PID_REUSE_WINDOW = timedelta(hours=24)  # Max age for considering a PID match valid
 START_TIME_TOLERANCE = 5.0  # Seconds tolerance for start time matching
+LEGACY_MACHINE_CONFIG_KEYS = frozenset({"CHROMIUM_VERSION"})
+
+
+def _find_existing_binary_for_reference(machine: 'Machine', reference: str) -> 'Binary | None':
+    reference = str(reference or '').strip()
+    if not reference:
+        return None
+
+    qs = Binary.objects.filter(machine=machine)
+
+    direct_match = qs.filter(abspath=reference).order_by('-modified_at').first()
+    if direct_match:
+        return direct_match
+
+    ref_name = Path(reference).name
+    if ref_name:
+        named_match = qs.filter(name=ref_name).order_by('-modified_at').first()
+        if named_match:
+            return named_match
+
+    return qs.filter(name=reference).order_by('-modified_at').first()
+
+
+def _get_process_binary_env_keys(plugin_name: str, hook_path: str, env: dict[str, Any] | None) -> list[str]:
+    env = env or {}
+    plugin_name = str(plugin_name or '').strip()
+    hook_path = str(hook_path or '').strip()
+    plugin_key = plugin_name.upper().replace('-', '_')
+    keys: list[str] = []
+    seen: set[str] = set()
+
+    def add(key: str) -> None:
+        if key and key not in seen and env.get(key):
+            seen.add(key)
+            keys.append(key)
+
+    if plugin_key:
+        add(f'{plugin_key}_BINARY')
+
+    try:
+        from archivebox.hooks import discover_plugin_configs
+
+        plugin_schema = discover_plugin_configs().get(plugin_name, {})
+        schema_keys = [
+            key
+            for key in (plugin_schema.get('properties') or {})
+            if key.endswith('_BINARY')
+        ]
+    except Exception:
+        schema_keys = []
+
+    schema_keys.sort(key=lambda key: (
+        key != f'{plugin_key}_BINARY',
+        key.endswith('_NODE_BINARY'),
+        key.endswith('_CHROME_BINARY'),
+        key,
+    ))
+    for key in schema_keys:
+        add(key)
+
+    if plugin_name.startswith('search_backend_'):
+        backend_name = plugin_name.removeprefix('search_backend_').upper().replace('-', '_')
+        configured_engine = str(env.get('SEARCH_BACKEND_ENGINE') or '').strip().upper().replace('-', '_')
+        if backend_name and backend_name == configured_engine:
+            add(f'{backend_name}_BINARY')
+
+    hook_suffix = Path(hook_path).suffix.lower()
+    if hook_suffix == '.js':
+        if plugin_key:
+            add(f'{plugin_key}_NODE_BINARY')
+        add('NODE_BINARY')
+
+    return keys
+
+
+def _sanitize_machine_config(config: dict[str, Any] | None) -> dict[str, Any]:
+    if not isinstance(config, dict):
+        return {}
+
+    sanitized = dict(config)
+    for key in LEGACY_MACHINE_CONFIG_KEYS:
+        sanitized.pop(key, None)
+    return sanitized


 class MachineManager(models.Manager):
@@ -89,13 +172,13 @@ class Machine(ModelWithHealthStats):
        global _CURRENT_MACHINE
        if _CURRENT_MACHINE:
            if timezone.now() < _CURRENT_MACHINE.modified_at + timedelta(seconds=MACHINE_RECHECK_INTERVAL):
-                return cls._hydrate_config_from_sibling(_CURRENT_MACHINE)
+                return cls._sanitize_config(cls._hydrate_config_from_sibling(_CURRENT_MACHINE))
            _CURRENT_MACHINE = None
        _CURRENT_MACHINE, _ = cls.objects.update_or_create(
            guid=get_host_guid(),
            defaults={'hostname': socket.gethostname(), **get_os_info(), **get_vm_info(), 'stats': get_host_stats()},
        )
-        return cls._hydrate_config_from_sibling(_CURRENT_MACHINE)
+        return cls._sanitize_config(cls._hydrate_config_from_sibling(_CURRENT_MACHINE))

    @classmethod
    def _hydrate_config_from_sibling(cls, machine: 'Machine') -> 'Machine':
@@ -115,6 +198,15 @@ class Machine(ModelWithHealthStats):
            machine.save(update_fields=['config', 'modified_at'])
        return machine

+    @classmethod
+    def _sanitize_config(cls, machine: 'Machine') -> 'Machine':
+        sanitized = _sanitize_machine_config(machine.config)
+        current = machine.config or {}
+        if sanitized != current:
+            machine.config = sanitized
+            machine.save(update_fields=['config', 'modified_at'])
+        return machine
+
    def to_json(self) -> dict:
        """
        Convert Machine model instance to a JSON-serializable dict.
@@ -152,11 +244,10 @@ class Machine(ModelWithHealthStats):
        Returns:
            Machine instance or None
        """
-        config_patch = record.get('config')
-        if isinstance(config_patch, dict) and config_patch:
+        config_patch = _sanitize_machine_config(record.get('config'))
+        if config_patch:
            machine = Machine.current()
-            if not machine.config:
-                machine.config = {}
+            machine.config = _sanitize_machine_config(machine.config)
            machine.config.update(config_patch)
            machine.save(update_fields=['config'])
            return machine
@@ -194,13 +285,17 @@ class NetworkInterface(ModelWithHealthStats):
        unique_together = (('machine', 'ip_public', 'ip_local', 'mac_address', 'dns_server'),)

    @classmethod
-    def current(cls) -> 'NetworkInterface':
+    def current(cls, refresh: bool = False) -> 'NetworkInterface':
        global _CURRENT_INTERFACE
+        machine = Machine.current()
        if _CURRENT_INTERFACE:
-            if timezone.now() < _CURRENT_INTERFACE.modified_at + timedelta(seconds=NETWORK_INTERFACE_RECHECK_INTERVAL):
+            if (
+                not refresh
+                and _CURRENT_INTERFACE.machine_id == machine.id
+                and timezone.now() < _CURRENT_INTERFACE.modified_at + timedelta(seconds=NETWORK_INTERFACE_RECHECK_INTERVAL)
+            ):
                return _CURRENT_INTERFACE
            _CURRENT_INTERFACE = None
-        machine = Machine.current()
        net_info = get_host_network()
        _CURRENT_INTERFACE, _ = cls.objects.update_or_create(
            machine=machine, ip_public=net_info.pop('ip_public'), ip_local=net_info.pop('ip_local'),
@@ -747,14 +842,17 @@ class ProcessManager(models.Manager):

        Called during migration and when creating new ArchiveResults.
        """
+        iface = kwargs.get('iface') or NetworkInterface.current()
+
        # Defaults from ArchiveResult if not provided
        defaults = {
-            'machine': Machine.current(),
+            'machine': iface.machine,
            'pwd': kwargs.get('pwd') or str(archiveresult.snapshot.output_dir / archiveresult.plugin),
            'cmd': kwargs.get('cmd') or [],
            'status': 'queued',
            'timeout': kwargs.get('timeout', 120),
            'env': kwargs.get('env', {}),
+            'iface': iface,
        }
        defaults.update(kwargs)

@@ -971,6 +1069,28 @@ class Process(models.Model):
            record['timeout'] = self.timeout
        return record

+    def hydrate_binary_from_context(self, *, plugin_name: str = '', hook_path: str = '') -> 'Binary | None':
+        machine = self.machine if self.machine_id else Machine.current()
+
+        references: list[str] = []
+        for key in _get_process_binary_env_keys(plugin_name, hook_path, self.env):
+            value = str(self.env.get(key) or '').strip()
+            if value and value not in references:
+                references.append(value)
+
+        if self.cmd:
+            cmd_0 = str(self.cmd[0]).strip()
+            if cmd_0 and cmd_0 not in references:
+                references.append(cmd_0)
+
+        for reference in references:
+            binary = _find_existing_binary_for_reference(machine, reference)
+            if binary:
+                self.binary = binary
+                return binary
+
+        return None
+
    @classmethod
    def parse_records_from_text(cls, text: str) -> list[dict]:
        """Parse JSONL records from raw text using the shared JSONL parser."""
@@ -1044,6 +1164,7 @@ class Process(models.Model):

        current_pid = os.getpid()
        machine = Machine.current()
+        iface = NetworkInterface.current()

        # Check cache validity
        if _CURRENT_PROCESS:
@@ -1053,6 +1174,9 @@ class Process(models.Model):
                and _CURRENT_PROCESS.machine_id == machine.id
                and timezone.now() < _CURRENT_PROCESS.modified_at + timedelta(seconds=PROCESS_RECHECK_INTERVAL)
            ):
+                if _CURRENT_PROCESS.iface_id != iface.id:
+                    _CURRENT_PROCESS.iface = iface
+                    _CURRENT_PROCESS.save(update_fields=['iface', 'modified_at'])
                _CURRENT_PROCESS.ensure_log_files()
                return _CURRENT_PROCESS
            _CURRENT_PROCESS = None
@@ -1080,6 +1204,9 @@ class Process(models.Model):
                db_start_time = existing.started_at.timestamp()
                if abs(db_start_time - os_start_time) < START_TIME_TOLERANCE:
                    _CURRENT_PROCESS = existing
+                    if existing.iface_id != iface.id:
+                        existing.iface = iface
+                        existing.save(update_fields=['iface', 'modified_at'])
                    _CURRENT_PROCESS.ensure_log_files()
                    return existing

@@ -1112,6 +1239,7 @@ class Process(models.Model):
            pid=current_pid,
            started_at=started_at,
            status=cls.StatusChoices.RUNNING,
+            iface=iface,
        )
        _CURRENT_PROCESS.ensure_log_files()
        return _CURRENT_PROCESS
@@ -1176,7 +1304,9 @@ class Process(models.Model):

        if 'supervisord' in argv_str:
            return cls.TypeChoices.SUPERVISORD
-        elif 'archivebox run' in argv_str or 'runner_watch' in argv_str:
+        elif 'runner_watch' in argv_str:
+            return cls.TypeChoices.WORKER
+        elif 'archivebox run' in argv_str:
            return cls.TypeChoices.ORCHESTRATOR
        elif 'archivebox' in argv_str:
            return cls.TypeChoices.CLI
@@ -1321,14 +1451,17 @@ class Process(models.Model):
        if self.cmd:
            try:
                os_cmdline = os_proc.cmdline()
-                # Check if first arg (binary) matches
                if os_cmdline and self.cmd:
-                    os_binary = os_cmdline[0] if os_cmdline else ''
                    db_binary = self.cmd[0] if self.cmd else ''
-                    # Match by basename (handles /usr/bin/python3 vs python3)
-                    if os_binary and db_binary:
-                        if Path(os_binary).name != Path(db_binary).name:
-                            return None  # Different binary, PID reused
+                    if db_binary:
+                        db_binary_name = Path(db_binary).name
+                        cmd_matches = any(
+                            arg == db_binary or Path(arg).name == db_binary_name
+                            for arg in os_cmdline
+                            if arg
+                        )
+                        if not cmd_matches:
+                            return None  # Different command, PID reused
            except (psutil.AccessDenied, psutil.ZombieProcess):
                pass  # Can't check cmdline, trust start time match

--- a/archivebox/misc/util.py
+++ b/archivebox/misc/util.py
@@ -4,6 +4,7 @@ import re
 import requests
 import json as pyjson
 import http.cookiejar
+from dateparser import parse as dateparser

 from typing import List, Optional, Any, Callable
 from pathlib import Path
@@ -13,7 +14,6 @@ from hashlib import sha256
 from urllib.parse import urlparse, quote, unquote
 from html import escape, unescape
 from datetime import datetime, timezone
-from dateparser import parse as dateparser
 from requests.exceptions import RequestException, ReadTimeout

 from base32_crockford import encode as base32_encode
@@ -122,9 +122,35 @@ def fix_url_from_markdown(url_str: str) -> str:
    
    return url_str

+def split_comma_separated_urls(url: str):
+    offset = 0
+    while True:
+        http_index = url.find('http://', 1)
+        https_index = url.find('https://', 1)
+        next_indices = [idx for idx in (http_index, https_index) if idx != -1]
+        if not next_indices:
+            yield offset, url
+            return
+
+        next_index = min(next_indices)
+        if url[next_index - 1] != ',':
+            yield offset, url
+            return
+
+        yield offset, url[:next_index - 1]
+        offset += next_index
+        url = url[next_index:]
+
 def find_all_urls(urls_str: str):
-    for url in re.findall(URL_REGEX, urls_str):
-        yield fix_url_from_markdown(url)
+    skipped_starts = set()
+    for match in re.finditer(URL_REGEX, urls_str):
+        if match.start() in skipped_starts:
+            continue
+
+        for offset, url in split_comma_separated_urls(fix_url_from_markdown(match.group(1))):
+            if offset:
+                skipped_starts.add(match.start() + offset)
+            yield url


 def is_static_file(url: str):
@@ -214,7 +240,25 @@ def parse_date(date: Any) -> datetime | None:
        date = str(date)

    if isinstance(date, str):
-        parsed_date = dateparser(date, settings={'TIMEZONE': 'UTC'})
+        normalized = date.strip()
+        if not normalized:
+            raise ValueError(f'Tried to parse invalid date string! {date}')
+
+        try:
+            return datetime.fromtimestamp(float(normalized), tz=timezone.utc)
+        except (TypeError, ValueError, OSError):
+            pass
+
+        try:
+            iso_date = normalized.replace('Z', '+00:00')
+            parsed_date = datetime.fromisoformat(iso_date)
+            if parsed_date.tzinfo is None:
+                return parsed_date.replace(tzinfo=timezone.utc)
+            return parsed_date.astimezone(timezone.utc)
+        except ValueError:
+            pass
+
+        parsed_date = dateparser(normalized, settings={'TIMEZONE': 'UTC'})
        if parsed_date is None:
            raise ValueError(f'Tried to parse invalid date string! {date}')
        return parsed_date.astimezone(timezone.utc)
@@ -408,6 +452,7 @@ assert fix_url_from_markdown('https://wikipedia.org/en/some_article_(Disambiguat

 URL_REGEX_TESTS = [
    ('https://example.com', ['https://example.com']),
+    ('https://sweeting.me,https://google.com', ['https://sweeting.me', 'https://google.com']),
    ('http://abc-file234example.com/abc?def=abc&23423=sdfsdf#abc=234&234=a234', ['http://abc-file234example.com/abc?def=abc&23423=sdfsdf#abc=234&234=a234']),

    ('https://twitter.com/share?url=https://akaao.success-corp.co.jp&text=ア@サ!ト&hashtags=ア%オ,元+ア.ア-オ_イ*シ$ロ abc', ['https://twitter.com/share?url=https://akaao.success-corp.co.jp&text=ア@サ!ト&hashtags=ア%オ,元+ア.ア-オ_イ*シ$ロ', 'https://akaao.success-corp.co.jp&text=ア@サ!ト&hashtags=ア%オ,元+ア.ア-オ_イ*シ$ロ']),
--- a/archivebox/personas/admin.py
+++ b/archivebox/personas/admin.py
@@ -1,2 +1,169 @@
+__package__ = "archivebox.personas"

-# Register your models here.
+import shutil
+
+from django.contrib import admin, messages
+from django.utils.html import format_html, format_html_join
+
+from archivebox.base_models.admin import BaseModelAdmin, ConfigEditorMixin
+from archivebox.personas.forms import PersonaAdminForm
+from archivebox.personas.importers import discover_local_browser_profiles
+from archivebox.personas.models import Persona
+
+
+class PersonaAdmin(ConfigEditorMixin, BaseModelAdmin):
+    form = PersonaAdminForm
+    change_form_template = "admin/personas/persona/change_form.html"
+
+    list_display = ("name", "created_by", "created_at", "chrome_profile_state", "cookies_state", "auth_state")
+    search_fields = ("name", "created_by__username")
+    list_filter = ("created_by",)
+    ordering = ["name"]
+    list_per_page = 100
+    readonly_fields = ("id", "created_at", "persona_paths", "import_artifact_status")
+
+    add_fieldsets = (
+        ("Persona", {
+            "fields": ("name", "created_by"),
+            "classes": ("card",),
+        }),
+        ("Browser Import", {
+            "fields": (
+                "import_mode",
+                "import_discovered_profile",
+                "import_source",
+                "import_profile_name",
+                "import_copy_profile",
+                "import_extract_cookies",
+                "import_capture_storage",
+            ),
+            "classes": ("card", "wide"),
+        }),
+        ("Advanced", {
+            "fields": ("config",),
+            "classes": ("card", "wide"),
+        }),
+    )
+
+    change_fieldsets = add_fieldsets + (
+        ("Artifacts", {
+            "fields": ("persona_paths", "import_artifact_status"),
+            "classes": ("card", "wide"),
+        }),
+        ("Timestamps", {
+            "fields": ("id", "created_at"),
+            "classes": ("card",),
+        }),
+    )
+
+    @admin.display(description="Chrome Profile")
+    def chrome_profile_state(self, obj: Persona) -> str:
+        return "yes" if (obj.path / "chrome_user_data").exists() else "no"
+
+    @admin.display(description="cookies.txt")
+    def cookies_state(self, obj: Persona) -> str:
+        return "yes" if obj.COOKIES_FILE else "no"
+
+    @admin.display(description="auth.json")
+    def auth_state(self, obj: Persona) -> str:
+        return "yes" if obj.AUTH_STORAGE_FILE else "no"
+
+    @admin.display(description="Persona Paths")
+    def persona_paths(self, obj: Persona) -> str:
+        return format_html(
+            "<div class='abx-persona-path-list'>"
+            "<div><strong>Persona root</strong><code>{}</code></div>"
+            "<div><strong>chrome_user_data</strong><code>{}</code></div>"
+            "<div><strong>chrome_extensions</strong><code>{}</code></div>"
+            "<div><strong>chrome_downloads</strong><code>{}</code></div>"
+            "<div><strong>cookies.txt</strong><code>{}</code></div>"
+            "<div><strong>auth.json</strong><code>{}</code></div>"
+            "</div>",
+            obj.path,
+            obj.CHROME_USER_DATA_DIR,
+            obj.CHROME_EXTENSIONS_DIR,
+            obj.CHROME_DOWNLOADS_DIR,
+            obj.COOKIES_FILE or (obj.path / "cookies.txt"),
+            obj.AUTH_STORAGE_FILE or (obj.path / "auth.json"),
+        )
+
+    @admin.display(description="Import Artifacts")
+    def import_artifact_status(self, obj: Persona) -> str:
+        entries = [
+            ("Browser profile", (obj.path / "chrome_user_data").exists(), obj.CHROME_USER_DATA_DIR),
+            ("cookies.txt", bool(obj.COOKIES_FILE), obj.COOKIES_FILE or (obj.path / "cookies.txt")),
+            ("auth.json", bool(obj.AUTH_STORAGE_FILE), obj.AUTH_STORAGE_FILE or (obj.path / "auth.json")),
+        ]
+        return format_html(
+            "<div class='abx-persona-artifacts'>{}</div>",
+            format_html_join(
+                "",
+                "<div class='abx-persona-artifact'><strong>{}</strong><span class='{}'>{}</span><code>{}</code></div>",
+                (
+                    (
+                        label,
+                        "abx-artifact-state abx-artifact-state--yes" if enabled else "abx-artifact-state abx-artifact-state--no",
+                        "present" if enabled else "missing",
+                        path,
+                    )
+                    for label, enabled, path in entries
+                ),
+            ),
+        )
+
+    def get_fieldsets(self, request, obj=None):
+        return self.change_fieldsets if obj else self.add_fieldsets
+
+    def render_change_form(self, request, context, add=False, change=False, form_url="", obj=None):
+        context["detected_profile_count"] = len(discover_local_browser_profiles())
+        return super().render_change_form(request, context, add=add, change=change, form_url=form_url, obj=obj)
+
+    def save_model(self, request, obj, form, change):
+        old_path = None
+        new_path = None
+        if change:
+            previous = Persona.objects.get(pk=obj.pk)
+            if previous.name != obj.name:
+                old_path = previous.path
+                new_path = obj.path
+
+        super().save_model(request, obj, form, change)
+
+        if old_path and new_path and old_path != new_path and old_path.exists():
+            if new_path.exists():
+                raise FileExistsError(f"Cannot rename Persona directory because the destination already exists: {new_path}")
+            shutil.move(str(old_path), str(new_path))
+
+        obj.ensure_dirs()
+
+        import_result = form.apply_import(obj)
+        if import_result is None:
+            return
+
+        completed_actions = []
+        if import_result.profile_copied:
+            completed_actions.append("profile copied")
+        if import_result.cookies_imported:
+            completed_actions.append("cookies.txt generated")
+        if import_result.storage_captured:
+            completed_actions.append("auth.json captured")
+        if import_result.user_agent_imported:
+            completed_actions.append("USER_AGENT copied")
+
+        if completed_actions:
+            messages.success(
+                request,
+                f'Imported {", ".join(completed_actions)} from {import_result.source.display_label}.',
+            )
+        else:
+            messages.warning(
+                request,
+                f"Persona saved, but no browser artifacts were imported from {import_result.source.display_label}.",
+            )
+
+        for warning in import_result.warnings:
+            messages.warning(request, warning)
+
+
+def register_admin(admin_site: admin.AdminSite) -> None:
+    admin_site.register(Persona, PersonaAdmin)
--- a/archivebox/personas/export_browser_state.js
+++ b/archivebox/personas/export_browser_state.js
@@ -0,0 +1,210 @@
+#!/usr/bin/env node
+/**
+ * Export cookies and open-tab storage from a Chromium profile or live CDP URL.
+ *
+ * Environment variables:
+ *   ARCHIVEBOX_ABX_PLUGINS_DIR  Absolute path to abx_plugins/plugins
+ *   CHROME_USER_DATA_DIR        Local Chromium user-data directory to launch
+ *   CHROME_CDP_URL              Existing browser CDP URL to attach to
+ *   COOKIES_OUTPUT_FILE         Optional output path for Netscape cookies.txt
+ *   AUTH_STORAGE_OUTPUT_FILE    Optional output path for auth.json
+ *   CHROME_BINARY               Optional browser binary override
+ *   NODE_MODULES_DIR            Optional node_modules path for puppeteer-core
+ */
+
+const fs = require('fs');
+const os = require('os');
+const path = require('path');
+
+const pluginsDir = process.env.ARCHIVEBOX_ABX_PLUGINS_DIR || process.env.ABX_PLUGINS_DIR;
+if (!pluginsDir) {
+    console.error('ARCHIVEBOX_ABX_PLUGINS_DIR is required');
+    process.exit(1);
+}
+
+const baseUtils = require(path.join(pluginsDir, 'base', 'utils.js'));
+baseUtils.ensureNodeModuleResolution(module);
+
+const chromeUtils = require(path.join(pluginsDir, 'chrome', 'chrome_utils.js'));
+const puppeteer = require('puppeteer-core');
+
+function cookieToNetscape(cookie) {
+    let domain = cookie.domain;
+    if (!domain.startsWith('.') && !cookie.hostOnly) {
+        domain = '.' + domain;
+    }
+
+    const includeSubdomains = domain.startsWith('.') ? 'TRUE' : 'FALSE';
+    const cookiePath = cookie.path || '/';
+    const secure = cookie.secure ? 'TRUE' : 'FALSE';
+    const expiry = cookie.expires && cookie.expires > 0 ? Math.floor(cookie.expires).toString() : '0';
+
+    return `${domain}\t${includeSubdomains}\t${cookiePath}\t${secure}\t${expiry}\t${cookie.name}\t${cookie.value}`;
+}
+
+function writeCookiesFile(cookies, outputPath) {
+    const lines = [
+        '# Netscape HTTP Cookie File',
+        '# https://curl.se/docs/http-cookies.html',
+        '# This file was generated by ArchiveBox persona cookie extraction',
+        '#',
+        '# Format: domain\\tincludeSubdomains\\tpath\\tsecure\\texpiry\\tname\\tvalue',
+        '',
+    ];
+
+    for (const cookie of cookies) {
+        lines.push(cookieToNetscape(cookie));
+    }
+
+    fs.mkdirSync(path.dirname(outputPath), { recursive: true });
+    fs.writeFileSync(outputPath, lines.join('\n') + '\n');
+}
+
+async function collectStorage(browser) {
+    const localStorage = {};
+    const sessionStorage = {};
+    const pages = await browser.pages();
+
+    for (const page of pages) {
+        try {
+            const url = page.url();
+            if (!url || url === 'about:blank') continue;
+            if (url.startsWith('chrome:') || url.startsWith('edge:') || url.startsWith('devtools:')) continue;
+
+            const payload = await page.evaluate(() => ({
+                origin: window.location.origin,
+                localStorage: Object.fromEntries(Object.entries(window.localStorage)),
+                sessionStorage: Object.fromEntries(Object.entries(window.sessionStorage)),
+            }));
+
+            if (!payload.origin || payload.origin === 'null') continue;
+            if (Object.keys(payload.localStorage || {}).length > 0) {
+                localStorage[payload.origin] = payload.localStorage;
+            }
+            if (Object.keys(payload.sessionStorage || {}).length > 0) {
+                sessionStorage[payload.origin] = payload.sessionStorage;
+            }
+        } catch (error) {
+            // Ignore pages that cannot be inspected via evaluate().
+        }
+    }
+
+    return { localStorage, sessionStorage };
+}
+
+async function openBrowser() {
+    const cdpUrl = process.env.CHROME_CDP_URL || '';
+    if (cdpUrl) {
+        const browser = await chromeUtils.connectToBrowserEndpoint(puppeteer, cdpUrl, { defaultViewport: null });
+        return {
+            browser,
+            async cleanup() {
+                try {
+                    await browser.disconnect();
+                } catch (error) {}
+            },
+            sourceDescription: cdpUrl,
+        };
+    }
+
+    const userDataDir = process.env.CHROME_USER_DATA_DIR;
+    if (!userDataDir) {
+        throw new Error('Either CHROME_USER_DATA_DIR or CHROME_CDP_URL is required');
+    }
+    if (!fs.existsSync(userDataDir)) {
+        throw new Error(`User data directory does not exist: ${userDataDir}`);
+    }
+
+    const outputDir = fs.mkdtempSync(path.join(os.tmpdir(), 'abx-browser-state-'));
+    const binary = process.env.CHROME_BINARY || chromeUtils.findAnyChromiumBinary();
+    if (!binary) {
+        throw new Error('Could not find a Chromium binary for browser state export');
+    }
+
+    const launched = await chromeUtils.launchChromium({
+        binary,
+        outputDir,
+        userDataDir,
+        headless: true,
+        killZombies: false,
+    });
+
+    if (!launched.success) {
+        throw new Error(launched.error || 'Chrome launch failed');
+    }
+
+    const browser = await chromeUtils.connectToBrowserEndpoint(puppeteer, launched.cdpUrl, { defaultViewport: null });
+
+    return {
+        browser,
+        async cleanup() {
+            try {
+                await browser.disconnect();
+            } catch (error) {}
+            try {
+                await chromeUtils.killChrome(launched.pid, outputDir);
+            } catch (error) {}
+            try {
+                fs.rmSync(outputDir, { recursive: true, force: true });
+            } catch (error) {}
+        },
+        sourceDescription: userDataDir,
+    };
+}
+
+async function main() {
+    const cookiesOutput = process.env.COOKIES_OUTPUT_FILE || '';
+    const authOutput = process.env.AUTH_STORAGE_OUTPUT_FILE || '';
+    if (!cookiesOutput && !authOutput) {
+        throw new Error('COOKIES_OUTPUT_FILE or AUTH_STORAGE_OUTPUT_FILE is required');
+    }
+
+    const { browser, cleanup, sourceDescription } = await openBrowser();
+
+    try {
+        const session = await browser.target().createCDPSession();
+        const browserVersion = await session.send('Browser.getVersion');
+        const cookieResult = await session.send('Storage.getCookies');
+        const cookies = cookieResult?.cookies || [];
+        const { localStorage, sessionStorage } = await collectStorage(browser);
+        const userAgent = browserVersion?.userAgent || '';
+
+        if (cookiesOutput) {
+            writeCookiesFile(cookies, cookiesOutput);
+        }
+
+        if (authOutput) {
+            fs.mkdirSync(path.dirname(authOutput), { recursive: true });
+            fs.writeFileSync(
+                authOutput,
+                JSON.stringify(
+                    {
+                        TYPE: 'auth',
+                        SOURCE: sourceDescription,
+                        captured_at: new Date().toISOString(),
+                        user_agent: userAgent,
+                        cookies,
+                        localStorage,
+                        sessionStorage,
+                    },
+                    null,
+                    2,
+                ) + '\n',
+            );
+        }
+
+        console.error(
+            `[+] Exported ${cookies.length} cookies` +
+            `${authOutput ? ` and ${Object.keys(localStorage).length + Object.keys(sessionStorage).length} storage origins` : ''}` +
+            `${userAgent ? ' with browser USER_AGENT' : ''}` +
+            ` from ${sourceDescription}`,
+        );
+    } finally {
+        await cleanup();
+    }
+}
+
+main().catch((error) => {
+    console.error(`ERROR: ${error.message}`);
+    process.exit(1);
+});
--- a/archivebox/personas/forms.py
+++ b/archivebox/personas/forms.py
@@ -0,0 +1,176 @@
+__package__ = "archivebox.personas"
+
+from typing import Any
+
+from django import forms
+from django.utils.safestring import mark_safe
+
+from archivebox.personas.importers import (
+    PersonaImportResult,
+    PersonaImportSource,
+    discover_local_browser_profiles,
+    import_persona_from_source,
+    resolve_custom_import_source,
+    validate_persona_name,
+)
+from archivebox.personas.models import Persona
+
+
+def _mode_label(title: str, description: str) -> str:
+    return mark_safe(
+        f'<span class="abx-import-mode-option"><strong>{title}</strong><span>{description}</span></span>'
+    )
+
+
+class PersonaAdminForm(forms.ModelForm):
+    import_mode = forms.ChoiceField(
+        required=False,
+        initial="none",
+        label="Bootstrap this persona",
+        widget=forms.RadioSelect,
+        choices=(
+            ("none", _mode_label("Blank Persona", "Create the persona without importing browser state yet.")),
+            ("discovered", _mode_label("Use a detected profile", "Pick from Chromium profiles auto-discovered on this host.")),
+            ("custom", _mode_label("Use a custom path or CDP URL", "Paste an absolute Chromium path or attach to a live browser debugging endpoint.")),
+        ),
+        help_text="These options run after the Persona row is saved, using the same backend import helpers as the CLI.",
+    )
+    import_discovered_profile = forms.ChoiceField(
+        required=False,
+        label="Autodiscovered profiles",
+        widget=forms.RadioSelect,
+        choices=(),
+        help_text="Detected from local Chrome, Chromium, Brave, and Edge profile roots.",
+    )
+    import_source = forms.CharField(
+        required=False,
+        label="Absolute path or CDP URL",
+        widget=forms.TextInput(
+            attrs={
+                "placeholder": "/Users/alice/Library/Application Support/Google/Chrome  or  ws://127.0.0.1:9222/devtools/browser/...",
+                "style": "width: 100%; font-family: monospace;",
+            }
+        ),
+        help_text="Accepts an absolute Chromium user-data dir, an exact profile dir, or a live HTTP/WS CDP endpoint.",
+    )
+    import_profile_name = forms.CharField(
+        required=False,
+        label="Profile directory name",
+        widget=forms.TextInput(
+            attrs={
+                "placeholder": "Default or Profile 1",
+                "style": "width: 100%; font-family: monospace;",
+            }
+        ),
+        help_text="Only used when the custom path points at a browser root containing multiple profiles.",
+    )
+    import_copy_profile = forms.BooleanField(
+        required=False,
+        initial=True,
+        label="Copy browser profile into this persona",
+        help_text="Copies the chosen Chromium user-data tree into `chrome_user_data` for future archiving runs.",
+    )
+    import_extract_cookies = forms.BooleanField(
+        required=False,
+        initial=True,
+        label="Generate `cookies.txt`",
+        help_text="Extracts cookies through Chrome DevTools Protocol and writes a Netscape cookie jar for wget/curl-based plugins.",
+    )
+    import_capture_storage = forms.BooleanField(
+        required=False,
+        initial=True,
+        label="Capture open-tab storage into `auth.json`",
+        help_text="Snapshots currently open tab `localStorage` / `sessionStorage` values by origin. This is most useful for live CDP imports.",
+    )
+
+    class Meta:
+        model = Persona
+        fields = ("name", "created_by", "config")
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+        self.discovered_profiles = discover_local_browser_profiles()
+        self._resolved_import_source: PersonaImportSource | None = None
+
+        self.fields["import_mode"].widget.attrs["class"] = "abx-import-mode"
+        self.fields["import_discovered_profile"].widget.attrs["class"] = "abx-profile-picker"
+
+        if self.discovered_profiles:
+            self.fields["import_discovered_profile"].choices = [
+                (profile.choice_value, profile.as_choice_label()) for profile in self.discovered_profiles
+            ]
+        else:
+            self.fields["import_discovered_profile"].choices = []
+            self.fields["import_discovered_profile"].help_text = (
+                "No local Chromium profiles were detected on this host right now. "
+                "Use the custom path/CDP option if the browser data lives elsewhere."
+            )
+
+    def clean_name(self) -> str:
+        name = str(self.cleaned_data.get("name") or "").strip()
+        is_valid, error_message = validate_persona_name(name)
+        if not is_valid:
+            raise forms.ValidationError(error_message)
+        return name
+
+    def clean(self) -> dict[str, Any]:
+        cleaned_data = super().clean()
+        self._resolved_import_source = None
+
+        import_mode = str(cleaned_data.get("import_mode") or "none").strip() or "none"
+        if import_mode == "none":
+            return cleaned_data
+
+        if import_mode == "discovered":
+            selection = str(cleaned_data.get("import_discovered_profile") or "").strip()
+            if not selection:
+                self.add_error("import_discovered_profile", "Choose one of the discovered profiles to import.")
+                return cleaned_data
+            try:
+                self._resolved_import_source = PersonaImportSource.from_choice_value(selection)
+            except ValueError as err:
+                self.add_error("import_discovered_profile", str(err))
+                return cleaned_data
+        elif import_mode == "custom":
+            raw_value = str(cleaned_data.get("import_source") or "").strip()
+            if not raw_value:
+                self.add_error("import_source", "Provide an absolute Chromium profile path or a CDP URL.")
+                return cleaned_data
+            try:
+                self._resolved_import_source = resolve_custom_import_source(
+                    raw_value,
+                    profile_dir=str(cleaned_data.get("import_profile_name") or "").strip() or None,
+                )
+            except ValueError as err:
+                self.add_error("import_source", str(err))
+                return cleaned_data
+        else:
+            self.add_error("import_mode", "Choose how this Persona should be bootstrapped.")
+            return cleaned_data
+
+        copy_profile = bool(cleaned_data.get("import_copy_profile"))
+        import_cookies = bool(cleaned_data.get("import_extract_cookies"))
+        capture_storage = bool(cleaned_data.get("import_capture_storage"))
+
+        if self._resolved_import_source.kind == "cdp":
+            if not (import_cookies or capture_storage):
+                self.add_error(
+                    "import_extract_cookies",
+                    "CDP imports can only capture cookies and/or open-tab storage. Profile copying is not available for a remote browser endpoint.",
+                )
+        elif not (copy_profile or import_cookies or capture_storage):
+            raise forms.ValidationError("Select at least one import action.")
+
+        return cleaned_data
+
+    def apply_import(self, persona: Persona) -> PersonaImportResult | None:
+        if not self._resolved_import_source:
+            return None
+
+        return import_persona_from_source(
+            persona,
+            self._resolved_import_source,
+            copy_profile=bool(self.cleaned_data.get("import_copy_profile")),
+            import_cookies=bool(self.cleaned_data.get("import_extract_cookies")),
+            capture_storage=bool(self.cleaned_data.get("import_capture_storage")),
+        )
--- a/archivebox/personas/importers.py
+++ b/archivebox/personas/importers.py
@@ -0,0 +1,845 @@
+"""
+Shared persona browser discovery/import helpers.
+
+These helpers are used by both the CLI and the Django admin so Persona import
+behavior stays consistent regardless of where it is triggered from.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import platform
+import shutil
+import subprocess
+import tempfile
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import TYPE_CHECKING, Optional
+from urllib.parse import urlparse
+
+from django.utils.html import format_html
+from django.utils.safestring import SafeString
+
+if TYPE_CHECKING:
+    from archivebox.personas.models import Persona
+
+
+BROWSER_LABELS = {
+    "chrome": "Google Chrome",
+    "chromium": "Chromium",
+    "brave": "Brave",
+    "edge": "Microsoft Edge",
+    "custom": "Custom Path",
+    "persona": "Persona Template",
+}
+
+BROWSER_PROFILE_DIR_NAMES = (
+    "Default",
+    "Profile ",
+    "Guest Profile",
+)
+
+VOLATILE_PROFILE_COPY_PATTERNS = (
+    "Cache",
+    "Code Cache",
+    "GPUCache",
+    "ShaderCache",
+    "Service Worker",
+    "GCM Store",
+    "*.log",
+    "Crashpad",
+    "BrowserMetrics",
+    "BrowserMetrics-spare.pma",
+    "SingletonLock",
+    "SingletonSocket",
+    "SingletonCookie",
+)
+
+PERSONA_PROFILE_DIR_CANDIDATES = (
+    "chrome_profile",
+    "chrome_user_data",
+)
+
+
+@dataclass(frozen=True)
+class PersonaImportSource:
+    kind: str
+    browser: str = "custom"
+    source_name: str | None = None
+    user_data_dir: Path | None = None
+    profile_dir: str | None = None
+    browser_binary: str | None = None
+    cdp_url: str | None = None
+
+    @property
+    def browser_label(self) -> str:
+        return BROWSER_LABELS.get(self.browser, self.browser.title())
+
+    @property
+    def profile_path(self) -> Path | None:
+        if not self.user_data_dir or not self.profile_dir:
+            return None
+        return self.user_data_dir / self.profile_dir
+
+    @property
+    def display_label(self) -> str:
+        if self.kind == "cdp":
+            return self.cdp_url or "CDP URL"
+        profile_suffix = f" / {self.profile_dir}" if self.profile_dir else ""
+        source_prefix = f": {self.source_name}" if self.source_name else ""
+        return f"{self.browser_label}{source_prefix}{profile_suffix}"
+
+    @property
+    def choice_value(self) -> str:
+        return json.dumps(
+            {
+                "kind": self.kind,
+                "browser": self.browser,
+                "source_name": self.source_name or "",
+                "user_data_dir": str(self.user_data_dir) if self.user_data_dir else "",
+                "profile_dir": self.profile_dir or "",
+                "browser_binary": self.browser_binary or "",
+                "cdp_url": self.cdp_url or "",
+            },
+            sort_keys=True,
+        )
+
+    def as_choice_label(self) -> SafeString:
+        path_str = str(self.profile_path or self.user_data_dir or self.cdp_url or "")
+        binary_suffix = f"Using {self.browser_binary}" if self.browser_binary else "Will auto-detect a Chromium binary"
+        return format_html(
+            '<span class="abx-profile-option">'
+            '<strong>{}</strong>'
+            '<span class="abx-profile-option__meta">{}</span>'
+            '<code>{}</code>'
+            "</span>",
+            self.display_label,
+            binary_suffix,
+            path_str,
+        )
+
+    @classmethod
+    def from_choice_value(cls, value: str) -> "PersonaImportSource":
+        try:
+            payload = json.loads(value)
+        except json.JSONDecodeError as err:
+            raise ValueError("Invalid discovered profile selection.") from err
+
+        if payload.get("kind") != "browser-profile":
+            raise ValueError("Invalid discovered profile selection.")
+
+        user_data_dir = Path(str(payload.get("user_data_dir") or "")).expanduser()
+        profile_dir = str(payload.get("profile_dir") or "").strip()
+        browser = str(payload.get("browser") or "custom").strip().lower() or "custom"
+        source_name = str(payload.get("source_name") or "").strip() or None
+        browser_binary = str(payload.get("browser_binary") or "").strip() or None
+
+        return resolve_browser_profile_source(
+            browser=browser,
+            source_name=source_name,
+            user_data_dir=user_data_dir,
+            profile_dir=profile_dir,
+            browser_binary=browser_binary,
+        )
+
+
+@dataclass
+class PersonaImportResult:
+    source: PersonaImportSource
+    profile_copied: bool = False
+    cookies_imported: bool = False
+    storage_captured: bool = False
+    user_agent_imported: bool = False
+    warnings: list[str] = field(default_factory=list)
+
+    @property
+    def did_work(self) -> bool:
+        return self.profile_copied or self.cookies_imported or self.storage_captured or self.user_agent_imported
+
+
+def get_chrome_user_data_dir() -> Optional[Path]:
+    """Get the default Chrome user data directory for the current platform."""
+    system = platform.system()
+    home = Path.home()
+
+    if system == "Darwin":
+        candidates = [
+            home / "Library" / "Application Support" / "Google" / "Chrome",
+            home / "Library" / "Application Support" / "Chromium",
+        ]
+    elif system == "Linux":
+        candidates = [
+            home / ".config" / "google-chrome",
+            home / ".config" / "chromium",
+            home / ".config" / "chrome",
+            home / "snap" / "chromium" / "common" / "chromium",
+        ]
+    elif system == "Windows":
+        local_app_data = Path(os.environ.get("LOCALAPPDATA", home / "AppData" / "Local"))
+        candidates = [
+            local_app_data / "Google" / "Chrome" / "User Data",
+            local_app_data / "Chromium" / "User Data",
+        ]
+    else:
+        candidates = []
+
+    for candidate in candidates:
+        if candidate.exists() and _list_profile_names(candidate):
+            return candidate
+
+    return None
+
+
+def get_brave_user_data_dir() -> Optional[Path]:
+    """Get the default Brave user data directory for the current platform."""
+    system = platform.system()
+    home = Path.home()
+
+    if system == "Darwin":
+        candidates = [
+            home / "Library" / "Application Support" / "BraveSoftware" / "Brave-Browser",
+        ]
+    elif system == "Linux":
+        candidates = [
+            home / ".config" / "BraveSoftware" / "Brave-Browser",
+        ]
+    elif system == "Windows":
+        local_app_data = Path(os.environ.get("LOCALAPPDATA", home / "AppData" / "Local"))
+        candidates = [
+            local_app_data / "BraveSoftware" / "Brave-Browser" / "User Data",
+        ]
+    else:
+        candidates = []
+
+    for candidate in candidates:
+        if candidate.exists() and _list_profile_names(candidate):
+            return candidate
+
+    return None
+
+
+def get_edge_user_data_dir() -> Optional[Path]:
+    """Get the default Edge user data directory for the current platform."""
+    system = platform.system()
+    home = Path.home()
+
+    if system == "Darwin":
+        candidates = [
+            home / "Library" / "Application Support" / "Microsoft Edge",
+        ]
+    elif system == "Linux":
+        candidates = [
+            home / ".config" / "microsoft-edge",
+            home / ".config" / "microsoft-edge-beta",
+            home / ".config" / "microsoft-edge-dev",
+        ]
+    elif system == "Windows":
+        local_app_data = Path(os.environ.get("LOCALAPPDATA", home / "AppData" / "Local"))
+        candidates = [
+            local_app_data / "Microsoft" / "Edge" / "User Data",
+        ]
+    else:
+        candidates = []
+
+    for candidate in candidates:
+        if candidate.exists() and _list_profile_names(candidate):
+            return candidate
+
+    return None
+
+
+def get_browser_binary(browser: str) -> Optional[str]:
+    system = platform.system()
+    home = Path.home()
+    browser = browser.lower()
+
+    if system == "Darwin":
+        candidates = {
+            "chrome": ["/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"],
+            "chromium": ["/Applications/Chromium.app/Contents/MacOS/Chromium"],
+            "brave": ["/Applications/Brave Browser.app/Contents/MacOS/Brave Browser"],
+            "edge": ["/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge"],
+        }.get(browser, [])
+    elif system == "Linux":
+        candidates = {
+            "chrome": ["/usr/bin/google-chrome", "/usr/bin/google-chrome-stable", "/usr/bin/google-chrome-beta", "/usr/bin/google-chrome-unstable"],
+            "chromium": ["/usr/bin/chromium", "/usr/bin/chromium-browser"],
+            "brave": ["/usr/bin/brave-browser", "/usr/bin/brave-browser-beta", "/usr/bin/brave-browser-nightly"],
+            "edge": ["/usr/bin/microsoft-edge", "/usr/bin/microsoft-edge-stable", "/usr/bin/microsoft-edge-beta", "/usr/bin/microsoft-edge-dev"],
+        }.get(browser, [])
+    elif system == "Windows":
+        local_app_data = Path(os.environ.get("LOCALAPPDATA", home / "AppData" / "Local"))
+        candidates = {
+            "chrome": [
+                str(local_app_data / "Google" / "Chrome" / "Application" / "chrome.exe"),
+                "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
+                "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe",
+            ],
+            "chromium": [str(local_app_data / "Chromium" / "Application" / "chrome.exe")],
+            "brave": [
+                str(local_app_data / "BraveSoftware" / "Brave-Browser" / "Application" / "brave.exe"),
+                "C:\\Program Files\\BraveSoftware\\Brave-Browser\\Application\\brave.exe",
+                "C:\\Program Files (x86)\\BraveSoftware\\Brave-Browser\\Application\\brave.exe",
+            ],
+            "edge": [
+                str(local_app_data / "Microsoft" / "Edge" / "Application" / "msedge.exe"),
+                "C:\\Program Files\\Microsoft\\Edge\\Application\\msedge.exe",
+                "C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe",
+            ],
+        }.get(browser, [])
+    else:
+        candidates = []
+
+    for candidate in candidates:
+        if candidate and Path(candidate).exists():
+            return candidate
+
+    return None
+
+
+BROWSER_PROFILE_FINDERS = {
+    "chrome": get_chrome_user_data_dir,
+    "chromium": get_chrome_user_data_dir,
+    "brave": get_brave_user_data_dir,
+    "edge": get_edge_user_data_dir,
+}
+
+CHROMIUM_BROWSERS = tuple(BROWSER_PROFILE_FINDERS.keys())
+
+
+NETSCAPE_COOKIE_HEADER = [
+    "# Netscape HTTP Cookie File",
+    "# https://curl.se/docs/http-cookies.html",
+    "# This file was generated by ArchiveBox persona cookie extraction",
+    "#",
+    "# Format: domain\\tincludeSubdomains\\tpath\\tsecure\\texpiry\\tname\\tvalue",
+    "",
+]
+
+
+def validate_persona_name(name: str) -> tuple[bool, str]:
+    """Validate persona name to prevent path traversal."""
+    if not name or not name.strip():
+        return False, "Persona name cannot be empty"
+    if "/" in name or "\\" in name:
+        return False, "Persona name cannot contain path separators (/ or \\)"
+    if ".." in name:
+        return False, "Persona name cannot contain parent directory references (..)"
+    if name.startswith("."):
+        return False, "Persona name cannot start with a dot (.)"
+    if "\x00" in name or "\n" in name or "\r" in name:
+        return False, "Persona name contains invalid characters"
+    return True, ""
+
+
+def discover_local_browser_profiles() -> list[PersonaImportSource]:
+    discovered: list[PersonaImportSource] = []
+
+    for browser, finder in BROWSER_PROFILE_FINDERS.items():
+        user_data_dir = finder()
+        if not user_data_dir:
+            continue
+
+        browser_binary = get_browser_binary(browser)
+        for profile_dir in _list_profile_names(user_data_dir):
+            try:
+                discovered.append(
+                    resolve_browser_profile_source(
+                        browser=browser,
+                        user_data_dir=user_data_dir,
+                        profile_dir=profile_dir,
+                        browser_binary=browser_binary,
+                    )
+                )
+            except ValueError:
+                continue
+
+    discovered.extend(discover_persona_template_profiles())
+
+    return discovered
+
+
+def discover_persona_template_profiles(personas_dir: Path | None = None) -> list[PersonaImportSource]:
+    from archivebox.config.constants import CONSTANTS
+
+    templates: list[PersonaImportSource] = []
+    candidate_roots: list[Path] = []
+
+    if personas_dir is not None:
+        candidate_roots.append(personas_dir.expanduser())
+    else:
+        candidate_roots.extend(
+            [
+                CONSTANTS.PERSONAS_DIR.expanduser(),
+                Path.home() / ".config" / "abx" / "personas",
+            ]
+        )
+
+    seen_roots: set[Path] = set()
+    for personas_root in candidate_roots:
+        resolved_root = personas_root.resolve()
+        if resolved_root in seen_roots:
+            continue
+        seen_roots.add(resolved_root)
+
+        if not resolved_root.exists() or not resolved_root.is_dir():
+            continue
+
+        for persona_dir in sorted((path for path in resolved_root.iterdir() if path.is_dir()), key=lambda path: path.name.lower()):
+            for candidate_dir_name in PERSONA_PROFILE_DIR_CANDIDATES:
+                user_data_dir = persona_dir / candidate_dir_name
+                if not user_data_dir.exists() or not user_data_dir.is_dir():
+                    continue
+
+                for profile_dir in _list_profile_names(user_data_dir):
+                    try:
+                        templates.append(
+                            resolve_browser_profile_source(
+                                browser="persona",
+                                source_name=persona_dir.name,
+                                user_data_dir=user_data_dir,
+                                profile_dir=profile_dir,
+                                browser_binary=get_browser_binary("chrome"),
+                            )
+                        )
+                    except ValueError:
+                        continue
+
+    return templates
+
+
+def resolve_browser_import_source(browser: str, profile_dir: str | None = None) -> PersonaImportSource:
+    browser = browser.lower().strip()
+    if browser not in BROWSER_PROFILE_FINDERS:
+        supported = ", ".join(BROWSER_PROFILE_FINDERS)
+        raise ValueError(f"Unknown browser: {browser}. Supported browsers: {supported}")
+
+    user_data_dir = BROWSER_PROFILE_FINDERS[browser]()
+    if not user_data_dir:
+        raise ValueError(f"Could not find {browser} profile directory")
+
+    chosen_profile = profile_dir or pick_default_profile_dir(user_data_dir)
+    if not chosen_profile:
+        raise ValueError(f"Could not find a profile in {user_data_dir}")
+
+    return resolve_browser_profile_source(
+        browser=browser,
+        user_data_dir=user_data_dir,
+        profile_dir=chosen_profile,
+        browser_binary=get_browser_binary(browser),
+    )
+
+
+def resolve_browser_profile_source(
+    browser: str,
+    user_data_dir: Path,
+    profile_dir: str,
+    source_name: str | None = None,
+    browser_binary: str | None = None,
+) -> PersonaImportSource:
+    resolved_root = user_data_dir.expanduser()
+    if not resolved_root.is_absolute():
+        resolved_root = resolved_root.resolve()
+    if not resolved_root.exists():
+        raise ValueError(f"Profile root does not exist: {resolved_root}")
+    if not profile_dir.strip():
+        raise ValueError("Profile directory name cannot be empty.")
+
+    profile_path = resolved_root / profile_dir
+    if not _looks_like_profile_dir(profile_path):
+        raise ValueError(f"Profile directory does not look valid: {profile_path}")
+
+    return PersonaImportSource(
+        kind="browser-profile",
+        browser=browser,
+        source_name=source_name,
+        user_data_dir=resolved_root,
+        profile_dir=profile_dir,
+        browser_binary=browser_binary,
+    )
+
+
+def resolve_custom_import_source(raw_value: str, profile_dir: str | None = None) -> PersonaImportSource:
+    raw_value = raw_value.strip()
+    if not raw_value:
+        raise ValueError("Provide an absolute browser profile path or a CDP URL.")
+
+    if _looks_like_cdp_url(raw_value):
+        return PersonaImportSource(kind="cdp", cdp_url=raw_value)
+
+    source_path = Path(raw_value).expanduser()
+    if not source_path.is_absolute():
+        raise ValueError("Custom browser path must be an absolute path.")
+    if not source_path.exists():
+        raise ValueError(f"Custom browser path does not exist: {source_path}")
+
+    explicit_profile = profile_dir.strip() if profile_dir else ""
+    if _looks_like_profile_dir(source_path):
+        if explicit_profile and explicit_profile != source_path.name:
+            raise ValueError("Profile name does not match the provided profile directory path.")
+        return resolve_browser_profile_source(
+            browser="custom",
+            user_data_dir=source_path.parent.resolve(),
+            profile_dir=source_path.name,
+        )
+
+    chosen_profile = explicit_profile or pick_default_profile_dir(source_path)
+    if not chosen_profile:
+        raise ValueError(
+            "Could not find a Chromium profile in that directory. "
+            "Provide an exact profile directory path or fill in the profile name field."
+        )
+
+    return resolve_browser_profile_source(
+        browser="custom",
+        user_data_dir=source_path.resolve(),
+        profile_dir=chosen_profile,
+    )
+
+
+def pick_default_profile_dir(user_data_dir: Path) -> str | None:
+    profiles = _list_profile_names(user_data_dir)
+    if not profiles:
+        return None
+    if "Default" in profiles:
+        return "Default"
+    return profiles[0]
+
+
+def import_persona_from_source(
+    persona: "Persona",
+    source: PersonaImportSource,
+    *,
+    copy_profile: bool = True,
+    import_cookies: bool = True,
+    capture_storage: bool = False,
+) -> PersonaImportResult:
+    persona.ensure_dirs()
+    result = PersonaImportResult(source=source)
+
+    persona_chrome_dir = Path(persona.CHROME_USER_DATA_DIR)
+    cookies_file = persona.path / "cookies.txt"
+    auth_file = persona.path / "auth.json"
+
+    launch_user_data_dir: Path | None = None
+
+    if source.kind == "browser-profile":
+        if copy_profile and source.user_data_dir:
+            resolved_source_root = source.user_data_dir.resolve()
+            resolved_persona_root = persona_chrome_dir.resolve()
+            if resolved_source_root == resolved_persona_root:
+                result.warnings.append("Skipped profile copy because the selected source is already this persona's chrome_user_data directory.")
+            else:
+                copy_browser_user_data_dir(resolved_source_root, resolved_persona_root)
+                persona.cleanup_chrome_profile(resolved_persona_root)
+                result.profile_copied = True
+            launch_user_data_dir = resolved_persona_root
+        else:
+            launch_user_data_dir = source.user_data_dir
+    elif copy_profile:
+        result.warnings.append("Profile copying is only available for local Chromium profile paths. CDP imports can only pull cookies and open-tab storage.")
+
+    if source.kind == "cdp":
+        export_success, auth_payload, export_message = export_browser_state(
+            cdp_url=source.cdp_url,
+            cookies_output_file=cookies_file if import_cookies else None,
+            auth_output_file=auth_file if capture_storage else None,
+        )
+    else:
+        export_success, auth_payload, export_message = export_browser_state(
+            user_data_dir=launch_user_data_dir,
+            profile_dir=source.profile_dir,
+            chrome_binary=source.browser_binary,
+            cookies_output_file=cookies_file if import_cookies else None,
+            auth_output_file=auth_file if capture_storage else None,
+        )
+
+    if not export_success:
+        result.warnings.append(export_message or "Browser import failed.")
+        return result
+
+    if import_cookies and cookies_file.exists():
+        result.cookies_imported = True
+    if capture_storage and auth_file.exists():
+        result.storage_captured = True
+    if _apply_imported_user_agent(persona, auth_payload):
+        result.user_agent_imported = True
+
+    return result
+
+
+def copy_browser_user_data_dir(source_dir: Path, destination_dir: Path) -> None:
+    destination_dir.parent.mkdir(parents=True, exist_ok=True)
+    shutil.rmtree(destination_dir, ignore_errors=True)
+    shutil.copytree(
+        source_dir,
+        destination_dir,
+        symlinks=True,
+        ignore=shutil.ignore_patterns(*VOLATILE_PROFILE_COPY_PATTERNS),
+    )
+
+
+def export_browser_state(
+    *,
+    user_data_dir: Path | None = None,
+    cdp_url: str | None = None,
+    profile_dir: str | None = None,
+    chrome_binary: str | None = None,
+    cookies_output_file: Path | None = None,
+    auth_output_file: Path | None = None,
+) -> tuple[bool, dict | None, str]:
+    if not user_data_dir and not cdp_url:
+        return False, None, "Missing browser source."
+
+    from abx_plugins import get_plugins_dir
+    from archivebox.config.common import STORAGE_CONFIG
+
+    state_script = Path(__file__).with_name("export_browser_state.js")
+    if not state_script.exists():
+        return False, None, f"Browser state export script not found at {state_script}"
+
+    node_modules_dir = STORAGE_CONFIG.LIB_DIR / "npm" / "node_modules"
+    chrome_plugin_dir = Path(get_plugins_dir()).resolve()
+
+    env = os.environ.copy()
+    env["NODE_MODULES_DIR"] = str(node_modules_dir)
+    env["ARCHIVEBOX_ABX_PLUGINS_DIR"] = str(chrome_plugin_dir)
+
+    if user_data_dir:
+        env["CHROME_USER_DATA_DIR"] = str(user_data_dir)
+    if cdp_url:
+        env["CHROME_CDP_URL"] = cdp_url
+        env["CHROME_IS_LOCAL"] = "false"
+    if chrome_binary:
+        env["CHROME_BINARY"] = str(chrome_binary)
+    if profile_dir:
+        extra_arg = f"--profile-directory={profile_dir}"
+        existing_extra = env.get("CHROME_ARGS_EXTRA", "").strip()
+        args_list: list[str] = []
+        if existing_extra:
+            if existing_extra.startswith("["):
+                try:
+                    parsed = json.loads(existing_extra)
+                    if isinstance(parsed, list):
+                        args_list.extend(str(x) for x in parsed)
+                except Exception:
+                    args_list.extend([s.strip() for s in existing_extra.split(",") if s.strip()])
+            else:
+                args_list.extend([s.strip() for s in existing_extra.split(",") if s.strip()])
+        args_list.append(extra_arg)
+        env["CHROME_ARGS_EXTRA"] = json.dumps(args_list)
+
+    temp_dir: Path | None = None
+    tmp_cookies_file: Path | None = None
+    tmp_auth_file: Path | None = None
+
+    if cookies_output_file and cookies_output_file.exists():
+        temp_dir = Path(tempfile.mkdtemp(prefix="ab_browser_state_"))
+        tmp_cookies_file = temp_dir / "cookies.txt"
+        env["COOKIES_OUTPUT_FILE"] = str(tmp_cookies_file)
+    elif cookies_output_file:
+        env["COOKIES_OUTPUT_FILE"] = str(cookies_output_file)
+
+    if auth_output_file and auth_output_file.exists():
+        temp_dir = temp_dir or Path(tempfile.mkdtemp(prefix="ab_browser_state_"))
+        tmp_auth_file = temp_dir / "auth.json"
+        env["AUTH_STORAGE_OUTPUT_FILE"] = str(tmp_auth_file)
+    elif auth_output_file:
+        env["AUTH_STORAGE_OUTPUT_FILE"] = str(auth_output_file)
+    else:
+        temp_dir = temp_dir or Path(tempfile.mkdtemp(prefix="ab_browser_state_"))
+        tmp_auth_file = temp_dir / "auth.json"
+        env["AUTH_STORAGE_OUTPUT_FILE"] = str(tmp_auth_file)
+
+    try:
+        result = subprocess.run(
+            ["node", str(state_script)],
+            env=env,
+            capture_output=True,
+            text=True,
+            timeout=120,
+        )
+    except subprocess.TimeoutExpired:
+        return False, None, "Browser state export timed out."
+    except FileNotFoundError:
+        return False, None, "Node.js was not found, so ArchiveBox could not extract browser state."
+    except Exception as err:
+        return False, None, f"Browser state export failed: {err}"
+
+    if result.returncode != 0:
+        message = (result.stderr or result.stdout or "").strip() or "Browser state export failed."
+        return False, None, message
+
+    auth_payload: dict | None = None
+    if cookies_output_file and tmp_cookies_file and tmp_cookies_file.exists():
+        _merge_netscape_cookies(cookies_output_file, tmp_cookies_file)
+    if auth_output_file and tmp_auth_file and tmp_auth_file.exists():
+        _merge_auth_storage(auth_output_file, tmp_auth_file)
+        auth_payload = _load_auth_storage(tmp_auth_file)
+    elif auth_output_file and auth_output_file.exists():
+        auth_payload = _load_auth_storage(auth_output_file)
+    elif tmp_auth_file and tmp_auth_file.exists():
+        auth_payload = _load_auth_storage(tmp_auth_file)
+
+    if temp_dir and temp_dir.exists():
+        shutil.rmtree(temp_dir, ignore_errors=True)
+
+    return True, auth_payload, (result.stderr or result.stdout or "").strip()
+
+
+def _list_profile_names(user_data_dir: Path) -> list[str]:
+    if not user_data_dir.exists() or not user_data_dir.is_dir():
+        return []
+
+    profiles: list[str] = []
+    for child in sorted(user_data_dir.iterdir(), key=lambda path: path.name.lower()):
+        if not child.is_dir():
+            continue
+        if child.name == "System Profile":
+            continue
+        if child.name == "Default" or child.name.startswith("Profile ") or child.name.startswith("Guest Profile"):
+            if _looks_like_profile_dir(child):
+                profiles.append(child.name)
+                continue
+        if _looks_like_profile_dir(child):
+            profiles.append(child.name)
+    return profiles
+
+
+def _looks_like_profile_dir(path: Path) -> bool:
+    if not path.exists() or not path.is_dir():
+        return False
+
+    marker_paths = (
+        path / "Preferences",
+        path / "History",
+        path / "Cookies",
+        path / "Network" / "Cookies",
+        path / "Local Storage",
+        path / "Session Storage",
+    )
+
+    if any(marker.exists() for marker in marker_paths):
+        return True
+
+    return any(path.name == prefix or path.name.startswith(prefix) for prefix in BROWSER_PROFILE_DIR_NAMES)
+
+
+def _looks_like_cdp_url(value: str) -> bool:
+    parsed = urlparse(value)
+    return parsed.scheme in {"ws", "wss", "http", "https"} and bool(parsed.netloc)
+
+
+def _parse_netscape_cookies(path: Path) -> dict[tuple[str, str, str], tuple[str, str, str, str, str, str, str]]:
+    cookies: dict[tuple[str, str, str], tuple[str, str, str, str, str, str, str]] = {}
+    if not path.exists():
+        return cookies
+
+    for line in path.read_text().splitlines():
+        if not line or line.startswith("#"):
+            continue
+        parts = line.split("\t")
+        if len(parts) < 7:
+            continue
+        domain, include_subdomains, cookie_path, secure, expiry, name, value = parts[:7]
+        cookies[(domain, cookie_path, name)] = (domain, include_subdomains, cookie_path, secure, expiry, name, value)
+    return cookies
+
+
+def _write_netscape_cookies(
+    path: Path,
+    cookies: dict[tuple[str, str, str], tuple[str, str, str, str, str, str, str]],
+) -> None:
+    lines = list(NETSCAPE_COOKIE_HEADER)
+    for cookie in cookies.values():
+        lines.append("\t".join(cookie))
+    path.write_text("\n".join(lines) + "\n")
+
+
+def _merge_netscape_cookies(existing_file: Path, new_file: Path) -> None:
+    existing = _parse_netscape_cookies(existing_file)
+    new = _parse_netscape_cookies(new_file)
+    existing.update(new)
+    _write_netscape_cookies(existing_file, existing)
+
+
+def _merge_auth_storage(existing_file: Path, new_file: Path) -> None:
+    existing_payload = _load_auth_storage(existing_file)
+    new_payload = _load_auth_storage(new_file)
+
+    existing_local = existing_payload.setdefault("localStorage", {})
+    existing_session = existing_payload.setdefault("sessionStorage", {})
+
+    for origin, payload in (new_payload.get("localStorage") or {}).items():
+        existing_local[origin] = payload
+    for origin, payload in (new_payload.get("sessionStorage") or {}).items():
+        existing_session[origin] = payload
+
+    cookies = _merge_cookie_dicts(existing_payload.get("cookies") or [], new_payload.get("cookies") or [])
+
+    merged = {
+        **existing_payload,
+        **new_payload,
+        "cookies": cookies,
+        "localStorage": existing_local,
+        "sessionStorage": existing_session,
+        "user_agent": new_payload.get("user_agent") or existing_payload.get("user_agent") or "",
+    }
+    existing_file.write_text(json.dumps(merged, indent=2, sort_keys=True) + "\n")
+
+
+def _load_auth_storage(path: Path) -> dict:
+    if not path.exists():
+        return {
+            "TYPE": "auth",
+            "cookies": [],
+            "localStorage": {},
+            "sessionStorage": {},
+        }
+    try:
+        payload = json.loads(path.read_text())
+    except json.JSONDecodeError:
+        return {
+            "TYPE": "auth",
+            "cookies": [],
+            "localStorage": {},
+            "sessionStorage": {},
+        }
+    if not isinstance(payload, dict):
+        return {
+            "TYPE": "auth",
+            "cookies": [],
+            "localStorage": {},
+            "sessionStorage": {},
+        }
+    return payload
+
+
+def _merge_cookie_dicts(existing: list[dict], new: list[dict]) -> list[dict]:
+    merged: dict[tuple[str, str, str], dict] = {}
+    for cookie in existing:
+        key = (str(cookie.get("domain") or ""), str(cookie.get("path") or "/"), str(cookie.get("name") or ""))
+        merged[key] = cookie
+    for cookie in new:
+        key = (str(cookie.get("domain") or ""), str(cookie.get("path") or "/"), str(cookie.get("name") or ""))
+        merged[key] = cookie
+    return list(merged.values())
+
+
+def _apply_imported_user_agent(persona: "Persona", auth_payload: dict | None) -> bool:
+    if not auth_payload:
+        return False
+
+    user_agent = str(auth_payload.get("user_agent") or "").strip()
+    if not user_agent:
+        return False
+
+    config = dict(persona.config or {})
+    if config.get("USER_AGENT") == user_agent:
+        return False
+
+    config["USER_AGENT"] = user_agent
+    persona.config = config
+    persona.save(update_fields=["config"])
+    return True
--- a/archivebox/personas/models.py
+++ b/archivebox/personas/models.py
@@ -117,6 +117,12 @@ class Persona(ModelWithConfig):
        cookies_path = self.path / 'cookies.txt'
        return str(cookies_path) if cookies_path.exists() else ''

+    @property
+    def AUTH_STORAGE_FILE(self) -> str:
+        """Derived path to auth.json for this persona (if it exists)."""
+        auth_path = self.path / 'auth.json'
+        return str(auth_path) if auth_path.exists() else ''
+
    def get_derived_config(self) -> dict:
        """
        Get config dict with derived paths filled in.
@@ -127,6 +133,7 @@ class Persona(ModelWithConfig):
        - CHROME_EXTENSIONS_DIR (derived from persona path)
        - CHROME_DOWNLOADS_DIR (derived from persona path)
        - COOKIES_FILE (derived from persona path, if file exists)
+        - AUTH_STORAGE_FILE (derived from persona path, if file exists)
        - ACTIVE_PERSONA (set to this persona's name)
        """
        derived = dict(self.config or {})
@@ -140,6 +147,8 @@ class Persona(ModelWithConfig):
            derived['CHROME_DOWNLOADS_DIR'] = self.CHROME_DOWNLOADS_DIR
        if 'COOKIES_FILE' not in derived and self.COOKIES_FILE:
            derived['COOKIES_FILE'] = self.COOKIES_FILE
+        if 'AUTH_STORAGE_FILE' not in derived and self.AUTH_STORAGE_FILE:
+            derived['AUTH_STORAGE_FILE'] = self.AUTH_STORAGE_FILE

        # Always set ACTIVE_PERSONA to this persona's name
        derived['ACTIVE_PERSONA'] = self.name
--- a/archivebox/services/archive_result_service.py
+++ b/archivebox/services/archive_result_service.py
@@ -1,5 +1,6 @@
 from __future__ import annotations

+import json
 import mimetypes
 from collections import defaultdict
 from pathlib import Path
@@ -7,9 +8,10 @@ from pathlib import Path
 from asgiref.sync import sync_to_async
 from django.utils import timezone

-from abx_dl.events import ArchiveResultEvent
+from abx_dl.events import ArchiveResultEvent, ProcessCompletedEvent
 from abx_dl.services.base import BaseService

+from .db import run_db_op
 from .process_service import ProcessService, parse_event_datetime


@@ -48,22 +50,93 @@ def _collect_output_metadata(plugin_dir: Path) -> tuple[dict[str, dict], int, st

 def _normalize_status(status: str) -> str:
    if status == "noresult":
-        return "skipped"
+        return "noresults"
    return status or "failed"


+def _has_content_files(output_files: list[str]) -> bool:
+    return any(Path(path).suffix not in {".log", ".pid", ".sh"} for path in output_files)
+
+
+def _iter_archiveresult_records(stdout: str) -> list[dict]:
+    records: list[dict] = []
+    for raw_line in stdout.splitlines():
+        line = raw_line.strip()
+        if not line.startswith("{"):
+            continue
+        try:
+            record = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+        if record.get("type") == "ArchiveResult":
+            records.append(record)
+    return records
+
+
 class ArchiveResultService(BaseService):
-    LISTENS_TO = [ArchiveResultEvent]
+    LISTENS_TO = [ArchiveResultEvent, ProcessCompletedEvent]
    EMITS = []

    def __init__(self, bus, *, process_service: ProcessService):
        self.process_service = process_service
        super().__init__(bus)

-    async def on_ArchiveResultEvent(self, event: ArchiveResultEvent) -> None:
-        await sync_to_async(self._project, thread_sensitive=True)(event)
+    async def on_ArchiveResultEvent__Outer(self, event: ArchiveResultEvent) -> None:
+        snapshot_output_dir = await run_db_op(self._get_snapshot_output_dir, event.snapshot_id)
+        if snapshot_output_dir is None:
+            return
+        plugin_dir = Path(snapshot_output_dir) / event.plugin
+        output_files, output_size, output_mimetypes = await sync_to_async(_collect_output_metadata)(plugin_dir)
+        await run_db_op(self._project, event, output_files, output_size, output_mimetypes)

-    def _project(self, event: ArchiveResultEvent) -> None:
+    async def on_ProcessCompletedEvent__Outer(self, event: ProcessCompletedEvent) -> None:
+        if not event.snapshot_id or not event.hook_name.startswith("on_Snapshot"):
+            return
+
+        plugin_dir = Path(event.output_dir)
+        output_files, output_size, output_mimetypes = await sync_to_async(_collect_output_metadata)(plugin_dir)
+        records = _iter_archiveresult_records(event.stdout)
+        if records:
+            for record in records:
+                await run_db_op(
+                    self._project_from_process_completed,
+                    event,
+                    record,
+                    output_files,
+                    output_size,
+                    output_mimetypes,
+                )
+            return
+
+        synthetic_record = {
+            "plugin": event.plugin_name,
+            "hook_name": event.hook_name,
+            "status": "failed" if event.exit_code != 0 else ("succeeded" if _has_content_files(event.output_files) else "skipped"),
+            "output_str": event.stderr if event.exit_code != 0 else "",
+            "error": event.stderr if event.exit_code != 0 else "",
+        }
+        await run_db_op(
+            self._project_from_process_completed,
+            event,
+            synthetic_record,
+            output_files,
+            output_size,
+            output_mimetypes,
+        )
+
+    def _get_snapshot_output_dir(self, snapshot_id: str) -> str | None:
+        from archivebox.core.models import Snapshot
+
+        snapshot = Snapshot.objects.filter(id=snapshot_id).only("output_dir").first()
+        return str(snapshot.output_dir) if snapshot is not None else None
+
+    def _project(
+        self,
+        event: ArchiveResultEvent,
+        output_files: dict[str, dict],
+        output_size: int,
+        output_mimetypes: str,
+    ) -> None:
        from archivebox.core.models import ArchiveResult, Snapshot
        from archivebox.machine.models import Process

@@ -86,8 +159,6 @@ class ArchiveResultService(BaseService):
            },
        )

-        plugin_dir = Path(snapshot.output_dir) / event.plugin
-        output_files, output_size, output_mimetypes = _collect_output_metadata(plugin_dir)
        result.process = process or result.process
        result.status = _normalize_status(event.status)
        result.output_str = event.output_str
@@ -97,7 +168,28 @@ class ArchiveResultService(BaseService):
        result.output_mimetypes = output_mimetypes
        result.start_ts = parse_event_datetime(event.start_ts) or result.start_ts or timezone.now()
        result.end_ts = parse_event_datetime(event.end_ts) or timezone.now()
-        result.retry_at = None
        if event.error:
            result.notes = event.error
        result.save()
+
+    def _project_from_process_completed(
+        self,
+        event: ProcessCompletedEvent,
+        record: dict,
+        output_files: dict[str, dict],
+        output_size: int,
+        output_mimetypes: str,
+    ) -> None:
+        archive_result_event = ArchiveResultEvent(
+            snapshot_id=record.get("snapshot_id") or event.snapshot_id,
+            plugin=record.get("plugin") or event.plugin_name,
+            hook_name=record.get("hook_name") or event.hook_name,
+            status=record.get("status") or "",
+            process_id=event.process_id,
+            output_str=record.get("output_str") or "",
+            output_json=record.get("output_json") if isinstance(record.get("output_json"), dict) else None,
+            start_ts=event.start_ts,
+            end_ts=event.end_ts,
+            error=record.get("error") or (event.stderr if event.exit_code != 0 else ""),
+        )
+        self._project(archive_result_event, output_files, output_size, output_mimetypes)
--- a/archivebox/services/binary_service.py
+++ b/archivebox/services/binary_service.py
@@ -1,19 +1,23 @@
 from __future__ import annotations

-from asgiref.sync import sync_to_async
+import asyncio
+
 from abx_dl.events import BinaryEvent, BinaryInstalledEvent
 from abx_dl.services.base import BaseService

+from .db import run_db_op
+

 class BinaryService(BaseService):
    LISTENS_TO = [BinaryEvent, BinaryInstalledEvent]
    EMITS = []

-    async def on_BinaryEvent(self, event: BinaryEvent) -> None:
-        await sync_to_async(self._project_binary, thread_sensitive=True)(event)
+    async def on_BinaryEvent__Outer(self, event: BinaryEvent) -> None:
+        await run_db_op(self._project_binary, event)

-    async def on_BinaryInstalledEvent(self, event: BinaryInstalledEvent) -> None:
-        await sync_to_async(self._project_installed_binary, thread_sensitive=True)(event)
+    async def on_BinaryInstalledEvent__Outer(self, event: BinaryInstalledEvent) -> None:
+        resolved = await asyncio.to_thread(self._resolve_installed_binary_metadata, event)
+        await run_db_op(self._project_installed_binary, event, resolved)

    def _project_binary(self, event: BinaryEvent) -> None:
        from archivebox.machine.models import Binary, Machine
@@ -44,7 +48,39 @@ class BinaryService(BaseService):
            },
        )

-    def _project_installed_binary(self, event: BinaryInstalledEvent) -> None:
+    def _resolve_installed_binary_metadata(self, event: BinaryInstalledEvent) -> dict[str, str]:
+        resolved = {
+            "abspath": event.abspath or "",
+            "version": event.version or "",
+            "sha256": event.sha256 or "",
+            "binproviders": event.binproviders or "",
+            "binprovider": event.binprovider or "",
+        }
+        if resolved["abspath"] and resolved["version"] and resolved["binprovider"]:
+            return resolved
+
+        try:
+            from abx_dl.dependencies import load_binary
+
+            allowed_providers = resolved["binproviders"] or resolved["binprovider"] or "env,pip,npm,brew,apt"
+            spec = {
+                "name": event.name,
+                "binproviders": allowed_providers,
+                "overrides": event.overrides or {},
+            }
+            binary = load_binary(spec)
+            resolved["abspath"] = str(getattr(binary, "abspath", None) or resolved["abspath"] or "")
+            resolved["version"] = str(getattr(binary, "version", None) or resolved["version"] or "")
+            resolved["sha256"] = str(getattr(binary, "sha256", None) or resolved["sha256"] or "")
+            provider_name = getattr(getattr(binary, "loaded_binprovider", None), "name", None)
+            if provider_name:
+                resolved["binprovider"] = str(provider_name)
+        except Exception:
+            pass
+
+        return resolved
+
+    def _project_installed_binary(self, event: BinaryInstalledEvent, resolved: dict[str, str]) -> None:
        from archivebox.machine.models import Binary, Machine

        machine = Machine.current()
@@ -55,10 +91,14 @@ class BinaryService(BaseService):
                "status": Binary.StatusChoices.QUEUED,
            },
        )
-        binary.abspath = event.abspath or binary.abspath
-        binary.version = event.version or binary.version
-        binary.sha256 = event.sha256 or binary.sha256
-        binary.binprovider = event.binprovider or binary.binprovider
+        binary.abspath = resolved["abspath"] or binary.abspath
+        binary.version = resolved["version"] or binary.version
+        binary.sha256 = resolved["sha256"] or binary.sha256
+        if resolved["binproviders"]:
+            binary.binproviders = resolved["binproviders"]
+        binary.binprovider = resolved["binprovider"] or binary.binprovider
+        if event.overrides and binary.overrides != event.overrides:
+            binary.overrides = event.overrides
        binary.status = Binary.StatusChoices.INSTALLED
        binary.retry_at = None
-        binary.save(update_fields=["abspath", "version", "sha256", "binprovider", "status", "retry_at", "modified_at"])
+        binary.save(update_fields=["abspath", "version", "sha256", "binproviders", "binprovider", "overrides", "status", "retry_at", "modified_at"])
--- a/archivebox/services/crawl_service.py
+++ b/archivebox/services/crawl_service.py
@@ -1,11 +1,10 @@
 from __future__ import annotations

-from asgiref.sync import sync_to_async
-from django.utils import timezone
-
 from abx_dl.events import CrawlCleanupEvent, CrawlCompletedEvent, CrawlSetupEvent, CrawlStartEvent
 from abx_dl.services.base import BaseService

+from .db import run_db_op
+

 class CrawlService(BaseService):
    LISTENS_TO = [CrawlSetupEvent, CrawlStartEvent, CrawlCleanupEvent, CrawlCompletedEvent]
@@ -15,17 +14,17 @@ class CrawlService(BaseService):
        self.crawl_id = crawl_id
        super().__init__(bus)

-    async def on_CrawlSetupEvent(self, event: CrawlSetupEvent) -> None:
-        await sync_to_async(self._mark_started, thread_sensitive=True)()
+    async def on_CrawlSetupEvent__Outer(self, event: CrawlSetupEvent) -> None:
+        await run_db_op(self._mark_started)

-    async def on_CrawlStartEvent(self, event: CrawlStartEvent) -> None:
-        await sync_to_async(self._mark_started, thread_sensitive=True)()
+    async def on_CrawlStartEvent__Outer(self, event: CrawlStartEvent) -> None:
+        await run_db_op(self._mark_started)

-    async def on_CrawlCleanupEvent(self, event: CrawlCleanupEvent) -> None:
-        await sync_to_async(self._mark_started, thread_sensitive=True)()
+    async def on_CrawlCleanupEvent__Outer(self, event: CrawlCleanupEvent) -> None:
+        await run_db_op(self._mark_started)

-    async def on_CrawlCompletedEvent(self, event: CrawlCompletedEvent) -> None:
-        await sync_to_async(self._mark_completed, thread_sensitive=True)()
+    async def on_CrawlCompletedEvent__Outer(self, event: CrawlCompletedEvent) -> None:
+        await run_db_op(self._mark_completed)

    def _mark_started(self) -> None:
        from archivebox.crawls.models import Crawl
--- a/archivebox/services/db.py
+++ b/archivebox/services/db.py
@@ -0,0 +1,16 @@
+from __future__ import annotations
+
+from asgiref.sync import sync_to_async
+from django.db import close_old_connections
+
+
+def _run_db_op(func, *args, **kwargs):
+    close_old_connections()
+    try:
+        return func(*args, **kwargs)
+    finally:
+        close_old_connections()
+
+
+async def run_db_op(func, *args, **kwargs):
+    return await sync_to_async(_run_db_op, thread_sensitive=True)(func, *args, **kwargs)
--- a/archivebox/services/live_ui.py
+++ b/archivebox/services/live_ui.py
@@ -0,0 +1 @@
+from abx_dl.cli import LiveBusUI
--- a/archivebox/services/machine_service.py
+++ b/archivebox/services/machine_service.py
@@ -1,16 +1,17 @@
 from __future__ import annotations

-from asgiref.sync import sync_to_async
 from abx_dl.events import MachineEvent
 from abx_dl.services.base import BaseService

+from .db import run_db_op
+

 class MachineService(BaseService):
    LISTENS_TO = [MachineEvent]
    EMITS = []

-    async def on_MachineEvent(self, event: MachineEvent) -> None:
-        await sync_to_async(self._project, thread_sensitive=True)(event)
+    async def on_MachineEvent__Outer(self, event: MachineEvent) -> None:
+        await run_db_op(self._project, event)

    def _project(self, event: MachineEvent) -> None:
        from archivebox.machine.models import Machine
--- a/archivebox/services/process_service.py
+++ b/archivebox/services/process_service.py
@@ -3,12 +3,13 @@ from __future__ import annotations
 from datetime import datetime
 from typing import TYPE_CHECKING

-from asgiref.sync import sync_to_async
 from django.utils import timezone

 from abx_dl.events import ProcessCompletedEvent, ProcessStartedEvent
 from abx_dl.services.base import BaseService

+from .db import run_db_op
+
 if TYPE_CHECKING:
    from archivebox.machine.models import Process

@@ -33,27 +34,33 @@ class ProcessService(BaseService):
        self.process_ids: dict[str, str] = {}
        super().__init__(bus)

-    async def on_ProcessStartedEvent(self, event: ProcessStartedEvent) -> None:
-        await sync_to_async(self._project_started, thread_sensitive=True)(event)
+    async def on_ProcessStartedEvent__Outer(self, event: ProcessStartedEvent) -> None:
+        await run_db_op(self._project_started, event)

-    async def on_ProcessCompletedEvent(self, event: ProcessCompletedEvent) -> None:
-        await sync_to_async(self._project_completed, thread_sensitive=True)(event)
+    async def on_ProcessCompletedEvent__Outer(self, event: ProcessCompletedEvent) -> None:
+        await run_db_op(self._project_completed, event)

    def get_db_process_id(self, process_id: str) -> str | None:
        return self.process_ids.get(process_id)

    def _get_or_create_process(self, event: ProcessStartedEvent | ProcessCompletedEvent) -> "Process":
-        from archivebox.machine.models import Machine, Process
+        from archivebox.machine.models import NetworkInterface, Process

        db_process_id = self.process_ids.get(event.process_id)
+        iface = NetworkInterface.current(refresh=True)
        if db_process_id:
            process = Process.objects.filter(id=db_process_id).first()
            if process is not None:
+                if process.iface_id != iface.id or process.machine_id != iface.machine_id:
+                    process.iface = iface
+                    process.machine = iface.machine
+                    process.save(update_fields=["iface", "machine", "modified_at"])
                return process

        process_type = Process.TypeChoices.BINARY if event.hook_name.startswith("on_Binary") else Process.TypeChoices.HOOK
        process = Process.objects.create(
-            machine=Machine.current(),
+            machine=iface.machine,
+            iface=iface,
            process_type=process_type,
            pwd=event.output_dir,
            cmd=[event.hook_path, *event.hook_args],
@@ -77,12 +84,14 @@ class ProcessService(BaseService):
        process.started_at = parse_event_datetime(event.start_ts) or process.started_at or timezone.now()
        process.status = process.StatusChoices.RUNNING
        process.retry_at = None
+        process.hydrate_binary_from_context(plugin_name=event.plugin_name, hook_path=event.hook_path)
        process.save()

    def _project_completed(self, event: ProcessCompletedEvent) -> None:
        process = self._get_or_create_process(event)
        process.pwd = event.output_dir
-        process.cmd = [event.hook_path, *event.hook_args]
+        if not process.cmd:
+            process.cmd = [event.hook_path, *event.hook_args]
        process.env = event.env
        process.pid = event.pid or process.pid
        process.started_at = parse_event_datetime(event.start_ts) or process.started_at
@@ -92,4 +101,5 @@ class ProcessService(BaseService):
        process.exit_code = event.exit_code
        process.status = process.StatusChoices.EXITED
        process.retry_at = None
+        process.hydrate_binary_from_context(plugin_name=event.plugin_name, hook_path=event.hook_path)
        process.save()
--- a/archivebox/services/runner.py
+++ b/archivebox/services/runner.py
@@ -3,16 +3,21 @@ from __future__ import annotations
 import asyncio
 import json
 import os
+import shutil
+import subprocess
 import sys
 import time
+from contextlib import nullcontext
 from pathlib import Path
+from tempfile import TemporaryDirectory
 from typing import Any

 from django.utils import timezone
+from rich.console import Console

 from abx_dl.events import BinaryEvent
-from abx_dl.models import INSTALL_URL, Snapshot as AbxSnapshot, discover_plugins
-from abx_dl.orchestrator import create_bus, download, install_plugins as abx_install_plugins, setup_services as setup_abx_services
+from abx_dl.models import INSTALL_URL, Plugin, Snapshot as AbxSnapshot, discover_plugins, filter_plugins
+from abx_dl.orchestrator import create_bus, download, install_plugins as abx_install_plugins, prepare_install_plugins, setup_services as setup_abx_services

 from .archive_result_service import ArchiveResultService
 from .binary_service import BinaryService
@@ -21,6 +26,7 @@ from .machine_service import MachineService
 from .process_service import ProcessService
 from .snapshot_service import SnapshotService
 from .tag_service import TagService
+from .live_ui import LiveBusUI


 def _bus_name(prefix: str, identifier: str) -> str:
@@ -35,6 +41,19 @@ def _selected_plugins_from_config(config: dict[str, Any]) -> list[str] | None:
    return [name.strip() for name in raw.split(",") if name.strip()]


+def _count_selected_hooks(plugins: dict[str, Plugin], selected_plugins: list[str] | None) -> int:
+    selected = filter_plugins(plugins, selected_plugins) if selected_plugins else plugins
+    total = 0
+    for plugin in selected.values():
+        total += len(list(plugin.get_crawl_hooks()))
+        total += len(list(plugin.get_snapshot_hooks()))
+    return total
+
+
+def _runner_debug(message: str) -> None:
+    print(f"[runner] {message}", file=sys.stderr, flush=True)
+
+
 def _attach_bus_trace(bus) -> None:
    trace_target = (os.environ.get("ARCHIVEBOX_BUS_TRACE") or "").strip()
    if not trace_target:
@@ -78,10 +97,51 @@ async def _stop_bus_trace(bus) -> None:
    bus._archivebox_trace_task = None


+def ensure_background_runner(*, allow_under_pytest: bool = False) -> bool:
+    if os.environ.get("PYTEST_CURRENT_TEST") and not allow_under_pytest:
+        return False
+
+    from archivebox.config import CONSTANTS
+    from archivebox.machine.models import Machine, Process
+
+    Process.cleanup_stale_running()
+    machine = Machine.current()
+    if Process.objects.filter(
+        machine=machine,
+        status=Process.StatusChoices.RUNNING,
+        process_type=Process.TypeChoices.ORCHESTRATOR,
+    ).exists():
+        return False
+
+    log_path = CONSTANTS.LOGS_DIR / "errors.log"
+    log_path.parent.mkdir(parents=True, exist_ok=True)
+    env = os.environ.copy()
+    env.setdefault("DATA_DIR", str(CONSTANTS.DATA_DIR))
+
+    with log_path.open("a", encoding="utf-8") as log_handle:
+        subprocess.Popen(
+            [sys.executable, "-m", "archivebox", "run", "--daemon"],
+            cwd=str(CONSTANTS.DATA_DIR),
+            env=env,
+            stdin=subprocess.DEVNULL,
+            stdout=log_handle,
+            stderr=log_handle,
+            start_new_session=True,
+        )
+    return True
+
+
 class CrawlRunner:
    MAX_CONCURRENT_SNAPSHOTS = 8

-    def __init__(self, crawl, *, snapshot_ids: list[str] | None = None, selected_plugins: list[str] | None = None):
+    def __init__(
+        self,
+        crawl,
+        *,
+        snapshot_ids: list[str] | None = None,
+        selected_plugins: list[str] | None = None,
+        process_discovered_snapshots_inline: bool = True,
+    ):
        self.crawl = crawl
        self.bus = create_bus(name=_bus_name("ArchiveBox", str(crawl.id)), total_timeout=3600.0)
        self.plugins = discover_plugins()
@@ -90,7 +150,12 @@ class CrawlRunner:
        self.binary_service = BinaryService(self.bus)
        self.tag_service = TagService(self.bus)
        self.crawl_service = CrawlService(self.bus, crawl_id=str(crawl.id))
-        self.snapshot_service = SnapshotService(self.bus, crawl_id=str(crawl.id), schedule_snapshot=self.enqueue_snapshot)
+        self.process_discovered_snapshots_inline = process_discovered_snapshots_inline
+        self.snapshot_service = SnapshotService(
+            self.bus,
+            crawl_id=str(crawl.id),
+            schedule_snapshot=self.enqueue_snapshot if process_discovered_snapshots_inline else self.leave_snapshot_queued,
+        )
        self.archive_result_service = ArchiveResultService(self.bus, process_service=self.process_service)
        self.selected_plugins = selected_plugins
        self.initial_snapshot_ids = snapshot_ids
@@ -100,6 +165,29 @@ class CrawlRunner:
        self.persona = None
        self.base_config: dict[str, Any] = {}
        self.primary_url = ""
+        self._live_stream = None
+
+    def _create_projector_bus(self, *, identifier: str, config_overrides: dict[str, Any]):
+        bus = create_bus(name=_bus_name("ArchiveBox", identifier), total_timeout=3600.0)
+        process_service = ProcessService(bus)
+        MachineService(bus)
+        BinaryService(bus)
+        TagService(bus)
+        CrawlService(bus, crawl_id=str(self.crawl.id))
+        SnapshotService(
+            bus,
+            crawl_id=str(self.crawl.id),
+            schedule_snapshot=self.enqueue_snapshot if self.process_discovered_snapshots_inline else self.leave_snapshot_queued,
+        )
+        ArchiveResultService(bus, process_service=process_service)
+        abx_services = setup_abx_services(
+            bus,
+            plugins=self.plugins,
+            config_overrides=config_overrides,
+            auto_install=True,
+            emit_jsonl=False,
+        )
+        return bus, abx_services

    async def run(self) -> None:
        from asgiref.sync import sync_to_async
@@ -107,35 +195,63 @@ class CrawlRunner:

        try:
            await sync_to_async(self._prepare, thread_sensitive=True)()
-            _attach_bus_trace(self.bus)
-            self.abx_services = setup_abx_services(
-                self.bus,
-                plugins=self.plugins,
-                config_overrides=self.base_config,
-                auto_install=True,
-                emit_jsonl=False,
-            )
-            if self.crawl.get_system_task() == INSTALL_URL:
-                await self._run_install_crawl()
-            else:
-                snapshot_ids = await sync_to_async(self._initial_snapshot_ids, thread_sensitive=True)()
-                if snapshot_ids:
-                    root_snapshot_id = snapshot_ids[0]
-                    await self._run_crawl_setup(root_snapshot_id)
-                    for snapshot_id in snapshot_ids:
-                        await self.enqueue_snapshot(snapshot_id)
-                    await self._wait_for_snapshot_tasks()
-                    await self._run_crawl_cleanup(root_snapshot_id)
-            if self.abx_services is not None:
-                await self.abx_services.process.wait_for_background_monitors()
+            live_ui = self._create_live_ui()
+            with live_ui if live_ui is not None else nullcontext():
+                _attach_bus_trace(self.bus)
+                self.abx_services = setup_abx_services(
+                    self.bus,
+                    plugins=self.plugins,
+                    config_overrides=self.base_config,
+                    auto_install=True,
+                    emit_jsonl=False,
+                )
+                if self.crawl.get_system_task() == INSTALL_URL:
+                    await self._run_install_crawl()
+                else:
+                    snapshot_ids = await sync_to_async(self._initial_snapshot_ids, thread_sensitive=True)()
+                    if snapshot_ids:
+                        root_snapshot_id = snapshot_ids[0]
+                        _runner_debug(f"crawl {self.crawl.id} starting crawl setup root_snapshot={root_snapshot_id}")
+                        await self._run_crawl_setup(root_snapshot_id)
+                        _runner_debug(f"crawl {self.crawl.id} finished crawl setup root_snapshot={root_snapshot_id}")
+                        for snapshot_id in snapshot_ids:
+                            await self.enqueue_snapshot(snapshot_id)
+                        _runner_debug(f"crawl {self.crawl.id} waiting for snapshot tasks count={len(self.snapshot_tasks)}")
+                        await self._wait_for_snapshot_tasks()
+                        _runner_debug(f"crawl {self.crawl.id} finished waiting for snapshot tasks")
+                        _runner_debug(f"crawl {self.crawl.id} starting django crawl.cleanup()")
+                        await sync_to_async(self.crawl.cleanup, thread_sensitive=True)()
+                        _runner_debug(f"crawl {self.crawl.id} finished django crawl.cleanup()")
+                        _runner_debug(f"crawl {self.crawl.id} starting abx crawl cleanup root_snapshot={root_snapshot_id}")
+                        await self._run_crawl_cleanup(root_snapshot_id)
+                        _runner_debug(f"crawl {self.crawl.id} finished abx crawl cleanup root_snapshot={root_snapshot_id}")
+                if self.abx_services is not None:
+                    _runner_debug(f"crawl {self.crawl.id} waiting for main bus background monitors")
+                    await self.abx_services.process.wait_for_background_monitors()
+                    _runner_debug(f"crawl {self.crawl.id} finished waiting for main bus background monitors")
        finally:
            await _stop_bus_trace(self.bus)
            await self.bus.stop()
+            if self._live_stream is not None:
+                try:
+                    self._live_stream.close()
+                except Exception:
+                    pass
+                self._live_stream = None
            await sync_to_async(self._cleanup_persona, thread_sensitive=True)()
            crawl = await sync_to_async(Crawl.objects.get, thread_sensitive=True)(id=self.crawl.id)
-            if crawl.status != Crawl.StatusChoices.SEALED:
-                crawl.status = Crawl.StatusChoices.SEALED
-                crawl.retry_at = None
+            crawl_is_finished = await sync_to_async(crawl.is_finished, thread_sensitive=True)()
+            if crawl_is_finished:
+                if crawl.status != Crawl.StatusChoices.SEALED:
+                    crawl.status = Crawl.StatusChoices.SEALED
+                    crawl.retry_at = None
+                    await sync_to_async(crawl.save, thread_sensitive=True)(update_fields=["status", "retry_at", "modified_at"])
+            else:
+                if crawl.status == Crawl.StatusChoices.SEALED:
+                    crawl.status = Crawl.StatusChoices.QUEUED
+                elif crawl.status != Crawl.StatusChoices.STARTED:
+                    crawl.status = Crawl.StatusChoices.STARTED
+                crawl.retry_at = crawl.retry_at or timezone.now()
                await sync_to_async(crawl.save, thread_sensitive=True)(update_fields=["status", "retry_at", "modified_at"])

    async def enqueue_snapshot(self, snapshot_id: str) -> None:
@@ -145,17 +261,36 @@ class CrawlRunner:
        task = asyncio.create_task(self._run_snapshot(snapshot_id))
        self.snapshot_tasks[snapshot_id] = task

+    async def leave_snapshot_queued(self, snapshot_id: str) -> None:
+        return None
+
    async def _wait_for_snapshot_tasks(self) -> None:
        while True:
-            active = [task for task in self.snapshot_tasks.values() if not task.done()]
-            if not active:
+            pending_tasks: list[asyncio.Task[None]] = []
+            for snapshot_id, task in list(self.snapshot_tasks.items()):
+                if task.done():
+                    if self.snapshot_tasks.get(snapshot_id) is task:
+                        self.snapshot_tasks.pop(snapshot_id, None)
+                    task.result()
+                    continue
+                pending_tasks.append(task)
+            if not pending_tasks:
                return
-            await asyncio.gather(*active)
+            done, _pending = await asyncio.wait(pending_tasks, return_when=asyncio.FIRST_COMPLETED)
+            for task in done:
+                task.result()

    def _prepare(self) -> None:
        from archivebox.config.configset import get_config
+        from archivebox.machine.models import NetworkInterface, Process

        self.primary_url = self.crawl.get_urls_list()[0] if self.crawl.get_urls_list() else ""
+        current_iface = NetworkInterface.current(refresh=True)
+        current_process = Process.current()
+        if current_process.iface_id != current_iface.id or current_process.machine_id != current_iface.machine_id:
+            current_process.iface = current_iface
+            current_process.machine = current_iface.machine
+            current_process.save(update_fields=["iface", "machine", "modified_at"])
        self.persona = self.crawl.resolve_persona()
        self.base_config = get_config(crawl=self.crawl)
        if self.selected_plugins is None:
@@ -168,6 +303,52 @@ class CrawlRunner:
        if self.persona:
            self.persona.cleanup_runtime_for_crawl(self.crawl)

+    def _create_live_ui(self) -> LiveBusUI | None:
+        stdout_is_tty = sys.stdout.isatty()
+        stderr_is_tty = sys.stderr.isatty()
+        interactive_tty = stdout_is_tty or stderr_is_tty
+        if not interactive_tty:
+            return None
+        stream = sys.stderr if stderr_is_tty else sys.stdout
+        if os.path.exists("/dev/tty"):
+            try:
+                self._live_stream = open("/dev/tty", "w", buffering=1, encoding=getattr(stream, "encoding", None) or "utf-8")
+                stream = self._live_stream
+            except OSError:
+                self._live_stream = None
+        try:
+            terminal_size = os.get_terminal_size(stream.fileno())
+            terminal_width = terminal_size.columns
+            terminal_height = terminal_size.lines
+        except (AttributeError, OSError, ValueError):
+            terminal_size = shutil.get_terminal_size(fallback=(160, 40))
+            terminal_width = terminal_size.columns
+            terminal_height = terminal_size.lines
+        ui_console = Console(
+            file=stream,
+            force_terminal=True,
+            width=terminal_width,
+            height=terminal_height,
+            _environ={
+                "COLUMNS": str(terminal_width),
+                "LINES": str(terminal_height),
+            },
+        )
+        plugins_label = ", ".join(self.selected_plugins) if self.selected_plugins else f"all ({len(self.plugins)} available)"
+        live_ui = LiveBusUI(
+            self.bus,
+            total_hooks=_count_selected_hooks(self.plugins, self.selected_plugins),
+            timeout_seconds=int(self.base_config.get("TIMEOUT") or 60),
+            ui_console=ui_console,
+            interactive_tty=True,
+        )
+        live_ui.print_intro(
+            url=self.primary_url or INSTALL_URL,
+            output_dir=Path(self.crawl.output_dir),
+            plugins_label=plugins_label,
+        )
+        return live_ui
+
    def _create_root_snapshots(self) -> list[str]:
        created = self.crawl.create_snapshots_from_urls()
        snapshots = created or list(self.crawl.snapshot_set.filter(depth=0).order_by("created_at"))
@@ -290,18 +471,34 @@ class CrawlRunner:
                parent_snapshot_id=snapshot["parent_snapshot_id"],
                crawl_id=str(self.crawl.id),
            )
-            await download(
-                url=snapshot["url"],
-                plugins=self.plugins,
-                output_dir=Path(snapshot["output_dir"]),
-                selected_plugins=self.selected_plugins,
+            snapshot_bus, snapshot_services = self._create_projector_bus(
+                identifier=f"{self.crawl.id}_{snapshot['id']}",
                config_overrides=snapshot["config"],
-                bus=self.bus,
-                emit_jsonl=False,
-                snapshot=abx_snapshot,
-                skip_crawl_setup=True,
-                skip_crawl_cleanup=True,
            )
+            try:
+                _attach_bus_trace(snapshot_bus)
+                _runner_debug(f"snapshot {snapshot_id} starting download()")
+                await download(
+                    url=snapshot["url"],
+                    plugins=self.plugins,
+                    output_dir=Path(snapshot["output_dir"]),
+                    selected_plugins=self.selected_plugins,
+                    config_overrides=snapshot["config"],
+                    bus=snapshot_bus,
+                    emit_jsonl=False,
+                    snapshot=abx_snapshot,
+                    skip_crawl_setup=True,
+                    skip_crawl_cleanup=True,
+                )
+                _runner_debug(f"snapshot {snapshot_id} finished download(), waiting for background monitors")
+                await snapshot_services.process.wait_for_background_monitors()
+                _runner_debug(f"snapshot {snapshot_id} finished waiting for background monitors")
+            finally:
+                current_task = asyncio.current_task()
+                if current_task is not None and self.snapshot_tasks.get(snapshot_id) is current_task:
+                    self.snapshot_tasks.pop(snapshot_id, None)
+                await _stop_bus_trace(snapshot_bus)
+                await snapshot_bus.stop()

    def _load_snapshot_run_data(self, snapshot_id: str):
        from archivebox.core.models import Snapshot
@@ -322,11 +519,24 @@ class CrawlRunner:
        }


-def run_crawl(crawl_id: str, *, snapshot_ids: list[str] | None = None, selected_plugins: list[str] | None = None) -> None:
+def run_crawl(
+    crawl_id: str,
+    *,
+    snapshot_ids: list[str] | None = None,
+    selected_plugins: list[str] | None = None,
+    process_discovered_snapshots_inline: bool = True,
+) -> None:
    from archivebox.crawls.models import Crawl

    crawl = Crawl.objects.get(id=crawl_id)
-    asyncio.run(CrawlRunner(crawl, snapshot_ids=snapshot_ids, selected_plugins=selected_plugins).run())
+    asyncio.run(
+        CrawlRunner(
+            crawl,
+            snapshot_ids=snapshot_ids,
+            selected_plugins=selected_plugins,
+            process_discovered_snapshots_inline=process_discovered_snapshots_inline,
+        ).run()
+    )


 async def _run_binary(binary_id: str) -> None:
@@ -397,28 +607,203 @@ async def _run_install(plugin_names: list[str] | None = None) -> None:
    BinaryService(bus)
    TagService(bus)
    ArchiveResultService(bus, process_service=process_service)
+    live_stream = None

    try:
-        _attach_bus_trace(bus)
-        await abx_install_plugins(
-            plugin_names=plugin_names,
-            plugins=plugins,
-            config_overrides=config,
-            emit_jsonl=False,
-            bus=bus,
-        )
-        await abx_services.process.wait_for_background_monitors()
+        selected_plugins = prepare_install_plugins(plugins, plugin_names=plugin_names)
+        plugins_label = ", ".join(plugin_names) if plugin_names else f"all ({len(plugins)} available)"
+        timeout_seconds = int(config.get("TIMEOUT") or 60)
+        stdout_is_tty = sys.stdout.isatty()
+        stderr_is_tty = sys.stderr.isatty()
+        interactive_tty = stdout_is_tty or stderr_is_tty
+        ui_console = None
+        live_ui = None
+
+        if interactive_tty:
+            stream = sys.stderr if stderr_is_tty else sys.stdout
+            if os.path.exists("/dev/tty"):
+                try:
+                    live_stream = open("/dev/tty", "w", buffering=1, encoding=getattr(stream, "encoding", None) or "utf-8")
+                    stream = live_stream
+                except OSError:
+                    live_stream = None
+            try:
+                terminal_size = os.get_terminal_size(stream.fileno())
+                terminal_width = terminal_size.columns
+                terminal_height = terminal_size.lines
+            except (AttributeError, OSError, ValueError):
+                terminal_size = shutil.get_terminal_size(fallback=(160, 40))
+                terminal_width = terminal_size.columns
+                terminal_height = terminal_size.lines
+            ui_console = Console(
+                file=stream,
+                force_terminal=True,
+                width=terminal_width,
+                height=terminal_height,
+                _environ={
+                    "COLUMNS": str(terminal_width),
+                    "LINES": str(terminal_height),
+                },
+            )
+
+        with TemporaryDirectory(prefix="archivebox-install-") as temp_dir:
+            output_dir = Path(temp_dir)
+            if ui_console is not None:
+                live_ui = LiveBusUI(
+                    bus,
+                    total_hooks=_count_selected_hooks(selected_plugins, None),
+                    timeout_seconds=timeout_seconds,
+                    ui_console=ui_console,
+                    interactive_tty=interactive_tty,
+                )
+                live_ui.print_intro(
+                    url=INSTALL_URL,
+                    output_dir=output_dir,
+                    plugins_label=plugins_label,
+                )
+            with live_ui if live_ui is not None else nullcontext():
+                _attach_bus_trace(bus)
+                results = await abx_install_plugins(
+                    plugin_names=plugin_names,
+                    plugins=plugins,
+                    output_dir=output_dir,
+                    config_overrides=config,
+                    emit_jsonl=False,
+                    bus=bus,
+                )
+                await abx_services.process.wait_for_background_monitors()
+            if live_ui is not None:
+                live_ui.print_summary(results, output_dir=output_dir)
    finally:
        await _stop_bus_trace(bus)
        await bus.stop()
+        try:
+            if live_stream is not None:
+                live_stream.close()
+        except Exception:
+            pass


 def run_install(*, plugin_names: list[str] | None = None) -> None:
    asyncio.run(_run_install(plugin_names=plugin_names))


+def recover_orphaned_crawls() -> int:
+    from archivebox.crawls.models import Crawl
+    from archivebox.core.models import Snapshot
+    from archivebox.machine.models import Process
+
+    active_crawl_ids: set[str] = set()
+    running_processes = Process.objects.filter(
+        status=Process.StatusChoices.RUNNING,
+        process_type__in=[
+            Process.TypeChoices.WORKER,
+            Process.TypeChoices.HOOK,
+            Process.TypeChoices.BINARY,
+        ],
+    ).only("env")
+
+    for proc in running_processes:
+        env = proc.env or {}
+        if not isinstance(env, dict):
+            continue
+        crawl_id = env.get("CRAWL_ID")
+        if crawl_id:
+            active_crawl_ids.add(str(crawl_id))
+
+    recovered = 0
+    now = timezone.now()
+    orphaned_crawls = Crawl.objects.filter(
+        status=Crawl.StatusChoices.STARTED,
+        retry_at__isnull=True,
+    ).prefetch_related("snapshot_set")
+
+    for crawl in orphaned_crawls:
+        if str(crawl.id) in active_crawl_ids:
+            continue
+
+        snapshots = list(crawl.snapshot_set.all())
+        if not snapshots or all(snapshot.status == Snapshot.StatusChoices.SEALED for snapshot in snapshots):
+            crawl.status = Crawl.StatusChoices.SEALED
+            crawl.retry_at = None
+            crawl.save(update_fields=["status", "retry_at", "modified_at"])
+            recovered += 1
+            continue
+
+        crawl.retry_at = now
+        crawl.save(update_fields=["retry_at", "modified_at"])
+        recovered += 1
+
+    return recovered
+
+
+def recover_orphaned_snapshots() -> int:
+    from archivebox.crawls.models import Crawl
+    from archivebox.core.models import ArchiveResult, Snapshot
+    from archivebox.machine.models import Process
+
+    active_snapshot_ids: set[str] = set()
+    running_processes = Process.objects.filter(
+        status=Process.StatusChoices.RUNNING,
+        process_type__in=[
+            Process.TypeChoices.WORKER,
+            Process.TypeChoices.HOOK,
+            Process.TypeChoices.BINARY,
+        ],
+    ).only("env")
+
+    for proc in running_processes:
+        env = proc.env or {}
+        if not isinstance(env, dict):
+            continue
+        snapshot_id = env.get("SNAPSHOT_ID")
+        if snapshot_id:
+            active_snapshot_ids.add(str(snapshot_id))
+
+    recovered = 0
+    now = timezone.now()
+    orphaned_snapshots = (
+        Snapshot.objects
+        .filter(status=Snapshot.StatusChoices.STARTED, retry_at__isnull=True)
+        .select_related("crawl")
+        .prefetch_related("archiveresult_set")
+    )
+
+    for snapshot in orphaned_snapshots:
+        if str(snapshot.id) in active_snapshot_ids:
+            continue
+
+        results = list(snapshot.archiveresult_set.all())
+        if results and all(result.status in ArchiveResult.FINAL_STATES for result in results):
+            snapshot.status = Snapshot.StatusChoices.SEALED
+            snapshot.retry_at = None
+            snapshot.downloaded_at = snapshot.downloaded_at or now
+            snapshot.save(update_fields=["status", "retry_at", "downloaded_at", "modified_at"])
+
+            crawl = snapshot.crawl
+            if crawl.is_finished() and crawl.status != Crawl.StatusChoices.SEALED:
+                crawl.status = Crawl.StatusChoices.SEALED
+                crawl.retry_at = None
+                crawl.save(update_fields=["status", "retry_at", "modified_at"])
+            recovered += 1
+            continue
+
+        snapshot.status = Snapshot.StatusChoices.QUEUED
+        snapshot.retry_at = now
+        snapshot.save(update_fields=["status", "retry_at", "modified_at"])
+
+        crawl = snapshot.crawl
+        crawl.status = Crawl.StatusChoices.QUEUED
+        crawl.retry_at = now
+        crawl.save(update_fields=["status", "retry_at", "modified_at"])
+        recovered += 1
+
+    return recovered
+
+
 def run_pending_crawls(*, daemon: bool = False, crawl_id: str | None = None) -> int:
    from archivebox.crawls.models import Crawl, CrawlSchedule
+    from archivebox.core.models import Snapshot
    from archivebox.machine.models import Binary

    while True:
@@ -436,10 +821,48 @@ def run_pending_crawls(*, daemon: bool = False, crawl_id: str | None = None) ->
                .first()
            )
            if binary is not None:
+                if not binary.claim_processing_lock(lock_seconds=60):
+                    continue
                run_binary(str(binary.id))
                continue

-        pending = Crawl.objects.filter(retry_at__lte=timezone.now()).exclude(status=Crawl.StatusChoices.SEALED)
+        queued_crawls = Crawl.objects.filter(
+            retry_at__lte=timezone.now(),
+            status=Crawl.StatusChoices.QUEUED,
+        )
+        if crawl_id:
+            queued_crawls = queued_crawls.filter(id=crawl_id)
+        queued_crawls = queued_crawls.order_by("retry_at", "created_at")
+
+        queued_crawl = queued_crawls.first()
+        if queued_crawl is not None:
+            if not queued_crawl.claim_processing_lock(lock_seconds=60):
+                continue
+            run_crawl(str(queued_crawl.id), process_discovered_snapshots_inline=False)
+            continue
+
+        if crawl_id is None:
+            snapshot = (
+                Snapshot.objects.filter(retry_at__lte=timezone.now())
+                .exclude(status=Snapshot.StatusChoices.SEALED)
+                .select_related("crawl")
+                .order_by("retry_at", "created_at")
+                .first()
+            )
+            if snapshot is not None:
+                if not snapshot.claim_processing_lock(lock_seconds=60):
+                    continue
+                run_crawl(
+                    str(snapshot.crawl_id),
+                    snapshot_ids=[str(snapshot.id)],
+                    process_discovered_snapshots_inline=False,
+                )
+                continue
+
+        pending = Crawl.objects.filter(
+            retry_at__lte=timezone.now(),
+            status=Crawl.StatusChoices.STARTED,
+        )
        if crawl_id:
            pending = pending.filter(id=crawl_id)
        pending = pending.order_by("retry_at", "created_at")
@@ -451,4 +874,7 @@ def run_pending_crawls(*, daemon: bool = False, crawl_id: str | None = None) ->
                continue
            return 0

-        run_crawl(str(crawl.id))
+        if not crawl.claim_processing_lock(lock_seconds=60):
+            continue
+
+        run_crawl(str(crawl.id), process_discovered_snapshots_inline=False)
--- a/archivebox/services/snapshot_service.py
+++ b/archivebox/services/snapshot_service.py
@@ -1,13 +1,13 @@
 from __future__ import annotations

-import re
-
 from asgiref.sync import sync_to_async
 from django.utils import timezone

 from abx_dl.events import SnapshotCompletedEvent, SnapshotEvent
 from abx_dl.services.base import BaseService

+from .db import run_db_op
+

 class SnapshotService(BaseService):
    LISTENS_TO = [SnapshotEvent, SnapshotCompletedEvent]
@@ -18,13 +18,17 @@ class SnapshotService(BaseService):
        self.schedule_snapshot = schedule_snapshot
        super().__init__(bus)

-    async def on_SnapshotEvent(self, event: SnapshotEvent) -> None:
-        snapshot_id = await sync_to_async(self._project_snapshot, thread_sensitive=True)(event)
+    async def on_SnapshotEvent__Outer(self, event: SnapshotEvent) -> None:
+        snapshot_id = await run_db_op(self._project_snapshot, event)
+        if snapshot_id:
+            await sync_to_async(self._ensure_crawl_symlink)(snapshot_id)
        if snapshot_id and event.depth > 0:
            await self.schedule_snapshot(snapshot_id)

-    async def on_SnapshotCompletedEvent(self, event: SnapshotCompletedEvent) -> None:
-        await sync_to_async(self._seal_snapshot, thread_sensitive=True)(event.snapshot_id)
+    async def on_SnapshotCompletedEvent__Outer(self, event: SnapshotCompletedEvent) -> None:
+        snapshot_id = await run_db_op(self._seal_snapshot, event.snapshot_id)
+        if snapshot_id:
+            await sync_to_async(self._write_snapshot_details)(snapshot_id)

    def _project_snapshot(self, event: SnapshotEvent) -> str | None:
        from archivebox.core.models import Snapshot
@@ -39,7 +43,6 @@ class SnapshotService(BaseService):
            snapshot.status = Snapshot.StatusChoices.STARTED
            snapshot.retry_at = None
            snapshot.save(update_fields=["status", "retry_at", "modified_at"])
-            snapshot.ensure_crawl_symlink()
            return str(snapshot.id)

        if event.depth > crawl.max_depth:
@@ -73,56 +76,36 @@ class SnapshotService(BaseService):
        if snapshot.status != Snapshot.StatusChoices.SEALED:
            snapshot.status = Snapshot.StatusChoices.QUEUED
        snapshot.save(update_fields=["status", "retry_at", "modified_at"])
-        snapshot.ensure_crawl_symlink()
        return str(snapshot.id)

    def _url_passes_filters(self, crawl, parent_snapshot, url: str) -> bool:
-        from archivebox.config.configset import get_config
+        return crawl.url_passes_filters(url, snapshot=parent_snapshot)

-        config = get_config(
-            user=getattr(crawl, "created_by", None),
-            crawl=crawl,
-            snapshot=parent_snapshot,
-        )
-
-        def to_pattern_list(value):
-            if isinstance(value, list):
-                return value
-            if isinstance(value, str):
-                return [pattern.strip() for pattern in value.split(",") if pattern.strip()]
-            return []
-
-        allowlist = to_pattern_list(config.get("URL_ALLOWLIST", ""))
-        denylist = to_pattern_list(config.get("URL_DENYLIST", ""))
-
-        for pattern in denylist:
-            try:
-                if re.search(pattern, url):
-                    return False
-            except re.error:
-                continue
-
-        if allowlist:
-            for pattern in allowlist:
-                try:
-                    if re.search(pattern, url):
-                        return True
-                except re.error:
-                    continue
-            return False
-
-        return True
-
-    def _seal_snapshot(self, snapshot_id: str) -> None:
+    def _seal_snapshot(self, snapshot_id: str) -> str | None:
        from archivebox.core.models import Snapshot

        snapshot = Snapshot.objects.filter(id=snapshot_id).first()
        if snapshot is None:
-            return
+            return None
        snapshot.status = Snapshot.StatusChoices.SEALED
        snapshot.retry_at = None
        snapshot.downloaded_at = snapshot.downloaded_at or timezone.now()
        snapshot.save(update_fields=["status", "retry_at", "downloaded_at", "modified_at"])
+        return str(snapshot.id)
+
+    def _ensure_crawl_symlink(self, snapshot_id: str) -> None:
+        from archivebox.core.models import Snapshot
+
+        snapshot = Snapshot.objects.filter(id=snapshot_id).select_related("crawl", "crawl__created_by").first()
+        if snapshot is not None:
+            snapshot.ensure_crawl_symlink()
+
+    def _write_snapshot_details(self, snapshot_id: str) -> None:
+        from archivebox.core.models import Snapshot
+
+        snapshot = Snapshot.objects.filter(id=snapshot_id).select_related("crawl", "crawl__created_by").first()
+        if snapshot is None:
+            return
        snapshot.write_index_jsonl()
        snapshot.write_json_details()
        snapshot.write_html_details()
--- a/archivebox/services/tag_service.py
+++ b/archivebox/services/tag_service.py
@@ -1,16 +1,17 @@
 from __future__ import annotations

-from asgiref.sync import sync_to_async
 from abx_dl.events import TagEvent
 from abx_dl.services.base import BaseService

+from .db import run_db_op
+

 class TagService(BaseService):
    LISTENS_TO = [TagEvent]
    EMITS = []

-    async def on_TagEvent(self, event: TagEvent) -> None:
-        await sync_to_async(self._project, thread_sensitive=True)(event)
+    async def on_TagEvent__Outer(self, event: TagEvent) -> None:
+        await run_db_op(self._project, event)

    def _project(self, event: TagEvent) -> None:
        from archivebox.core.models import Snapshot, Tag
--- a/archivebox/templates/admin/base.html
+++ b/archivebox/templates/admin/base.html
@@ -1083,8 +1083,11 @@
                width: 100% !important;
            }

-            body.filters-collapsed.change-list #changelist .changelist-form-container > div {
+            body.filters-collapsed.change-list #changelist .changelist-form-container > div,
+            body.filters-collapsed.change-list #changelist .changelist-form-container > form {
                max-width: 100% !important;
+                width: 100% !important;
+                flex: 1 1 100% !important;
            }

            /* Actions bar */
@@ -1372,7 +1375,8 @@
                order: 2;
                align-self: flex-start;
            }
-            body.change-list #changelist .changelist-form-container > div {
+            body.change-list #changelist .changelist-form-container > div,
+            body.change-list #changelist .changelist-form-container > form {
                flex: 1 1 auto;
                min-width: 0;
                order: 1;
--- a/archivebox/templates/admin/core/tag/change_form.html
+++ b/archivebox/templates/admin/core/tag/change_form.html
@@ -0,0 +1,268 @@
+{% extends "admin/change_form.html" %}
+
+{% block bodyclass %}{{ block.super }} app-core model-tag tag-form-page{% endblock %}
+
+{% block extrastyle %}
+{{ block.super }}
+<style>
+    .tag-form-hero {
+        margin: 0 0 20px;
+        padding: 22px 24px;
+        border-radius: 20px;
+        border: 1px solid #dbe4ee;
+        background:
+            radial-gradient(circle at top right, rgba(245, 158, 11, 0.12), transparent 30%),
+            linear-gradient(135deg, #fff7ed 0%, #ffffff 48%, #eff6ff 100%);
+        box-shadow: 0 12px 30px rgba(15, 23, 42, 0.06);
+        display: grid;
+        gap: 16px;
+        grid-template-columns: minmax(0, 1.7fr) minmax(260px, 1fr);
+    }
+
+    .tag-form-hero h2 {
+        margin: 0 0 8px;
+        font-size: 28px;
+        line-height: 1.05;
+        color: #111827;
+    }
+
+    .tag-form-hero p {
+        margin: 0;
+        color: #475569;
+        font-size: 14px;
+        max-width: 70ch;
+    }
+
+    .tag-form-hero__meta {
+        display: grid;
+        gap: 10px;
+    }
+
+    .tag-form-hero__meta div {
+        padding: 14px 16px;
+        border-radius: 14px;
+        border: 1px solid rgba(203, 213, 225, 0.85);
+        background: rgba(255, 255, 255, 0.88);
+    }
+
+    .tag-form-hero__meta span {
+        display: block;
+        margin-bottom: 8px;
+        font-size: 11px;
+        font-weight: 700;
+        text-transform: uppercase;
+        letter-spacing: 0.08em;
+        color: #64748b;
+    }
+
+    .tag-similar-panel {
+        margin-top: 18px;
+        padding: 18px;
+        border-radius: 18px;
+        border: 1px solid #dbe4ee;
+        background: #fff;
+        box-shadow: 0 10px 24px rgba(15, 23, 42, 0.05);
+    }
+
+    .tag-similar-panel h3 {
+        margin: 0 0 6px;
+        font-size: 16px;
+        color: #111827;
+    }
+
+    .tag-similar-panel p {
+        margin: 0 0 14px;
+        font-size: 13px;
+        color: #64748b;
+    }
+
+    .tag-similar-list {
+        display: grid;
+        gap: 10px;
+        grid-template-columns: repeat(auto-fit, minmax(240px, 1fr));
+    }
+
+    .tag-similar-card {
+        display: grid;
+        gap: 8px;
+        padding: 14px 16px;
+        border-radius: 16px;
+        border: 1px solid #dbe4ee;
+        background: #f8fafc;
+        text-decoration: none;
+        color: #0f172a;
+    }
+
+    .tag-similar-card strong {
+        font-size: 15px;
+        line-height: 1.1;
+    }
+
+    .tag-similar-card span {
+        font-size: 12px;
+        color: #64748b;
+    }
+
+    .tag-similar-card__snapshots {
+        display: flex;
+        flex-wrap: wrap;
+        gap: 8px;
+    }
+
+    .tag-similar-snapshot {
+        display: inline-flex;
+        align-items: center;
+        gap: 6px;
+        min-width: 0;
+        max-width: 100%;
+        padding: 6px 8px;
+        border-radius: 999px;
+        background: #fff;
+        border: 1px solid #dbe4ee;
+        font-size: 11px;
+        color: #334155;
+    }
+
+    .tag-similar-snapshot img {
+        width: 14px;
+        height: 14px;
+        border-radius: 4px;
+        flex: 0 0 auto;
+    }
+
+    .tag-similar-empty {
+        padding: 16px;
+        border-radius: 16px;
+        border: 1px dashed #cbd5e1;
+        background: #f8fafc;
+        color: #64748b;
+        font-size: 13px;
+    }
+
+    @media (max-width: 920px) {
+        .tag-form-hero {
+            grid-template-columns: 1fr;
+        }
+    }
+</style>
+{% endblock %}
+
+{% block form_top %}
+<section class="tag-form-hero">
+    <div>
+        <h2>{% if add %}New Tag{% else %}Edit Tag{% endif %}</h2>
+        <p>Similar tags are shown below while typing.</p>
+    </div>
+    <div class="tag-form-hero__meta">
+        <div>
+            <span>Matches</span>
+            <strong>Current tags</strong>
+        </div>
+        <div>
+            <span>Links</span>
+            <strong>Open filtered snapshots</strong>
+        </div>
+    </div>
+</section>
+{{ block.super }}
+{% endblock %}
+
+{% block after_field_sets %}
+{{ block.super }}
+<section
+    id="tag-similar-panel"
+    class="tag-similar-panel"
+    data-search-url="{{ tag_search_api_url }}"
+>
+    <h3>Similar Tags</h3>
+    <p>Updates while typing.</p>
+    <div id="tag-similar-list" class="tag-similar-list"></div>
+</section>
+
+{{ tag_similar_cards|json_script:"abx-tag-similar-data" }}
+
+<script>
+document.addEventListener('DOMContentLoaded', function () {
+    const panel = document.getElementById('tag-similar-panel');
+    const list = document.getElementById('tag-similar-list');
+    const nameInput = document.querySelector('input[data-tag-name-input="1"]');
+    if (!panel || !list || !nameInput) return;
+
+    const searchUrl = panel.dataset.searchUrl;
+    let similarCards = JSON.parse(document.getElementById('abx-tag-similar-data').textContent || '[]');
+    let timeoutId = null;
+
+    function escapeHtml(value) {
+        const div = document.createElement('div');
+        div.textContent = value == null ? '' : String(value);
+        return div.innerHTML;
+    }
+
+    function getApiKey() {
+        return (window.ARCHIVEBOX_API_KEY || '').trim();
+    }
+
+    function withApiKey(url) {
+        const apiKey = getApiKey();
+        if (!apiKey) return url;
+        const separator = url.includes('?') ? '&' : '?';
+        return url + separator + 'api_key=' + encodeURIComponent(apiKey);
+    }
+
+    function buildHeaders() {
+        const headers = {};
+        const apiKey = getApiKey();
+        if (apiKey) headers['X-ArchiveBox-API-Key'] = apiKey;
+        return headers;
+    }
+
+    function render(cards) {
+        const filtered = (cards || []).filter(function (card) {
+            return (card.name || '').toLowerCase() !== (nameInput.value || '').trim().toLowerCase();
+        });
+
+        if (!filtered.length) {
+            list.innerHTML = '<div class="tag-similar-empty">No similar tags.</div>';
+            return;
+        }
+
+        list.innerHTML = filtered.map(function (card) {
+            const snapshots = (card.snapshots || []).slice(0, 3).map(function (snapshot) {
+                return '' +
+                    '<span class="tag-similar-snapshot">' +
+                        '<img src="' + escapeHtml(snapshot.favicon_url) + '" alt="" onerror="this.style.display=\\'none\\'">' +
+                        '<span>' + escapeHtml(snapshot.title) + '</span>' +
+                    '</span>';
+            }).join('');
+
+            return '' +
+                '<a class="tag-similar-card" href="' + escapeHtml(card.filter_url) + '">' +
+                    '<strong>' + escapeHtml(card.name) + '</strong>' +
+                    '<span>' + escapeHtml(card.num_snapshots) + ' snapshots · slug: ' + escapeHtml(card.slug) + '</span>' +
+                    '<div class="tag-similar-card__snapshots">' + (snapshots || '<span class="tag-similar-snapshot">No snapshots</span>') + '</div>' +
+                '</a>';
+        }).join('');
+    }
+
+    async function fetchSimilar(query) {
+        const response = await fetch(withApiKey(searchUrl + '?q=' + encodeURIComponent(query || '')), {
+            headers: buildHeaders(),
+            credentials: 'same-origin',
+        });
+        if (!response.ok) return [];
+        const payload = await response.json();
+        return payload.tags || [];
+    }
+
+    nameInput.addEventListener('input', function () {
+        window.clearTimeout(timeoutId);
+        timeoutId = window.setTimeout(async function () {
+            similarCards = await fetchSimilar((nameInput.value || '').trim());
+            render(similarCards);
+        }, 140);
+    });
+
+    render(similarCards);
+});
+</script>
+{% endblock %}
--- a/archivebox/templates/admin/core/tag/change_list.html
+++ b/archivebox/templates/admin/core/tag/change_list.html
@@ -0,0 +1,997 @@
+{% extends "admin/change_list.html" %}
+
+{% block bodyclass %}{{ block.super }} app-core model-tag change-list tag-admin-page{% endblock %}
+
+{% block object-tools %}{% endblock %}
+
+{% block extrastyle %}
+{{ block.super }}
+<style>
+    .tag-admin-shell {
+        display: grid;
+        gap: 12px;
+    }
+
+    .tag-admin-toolbar {
+        display: flex;
+        flex-wrap: wrap;
+        gap: 12px;
+        align-items: start;
+    }
+
+    .tag-admin-panel {
+        flex: 1 1 320px;
+        padding: 12px;
+        border-radius: 16px;
+        border: 1px solid #dbe4ee;
+        background: #fff;
+        box-shadow: 0 8px 18px rgba(15, 23, 42, 0.05);
+    }
+
+    .tag-admin-panel--search {
+        flex: 3 1 360px;
+    }
+
+    .tag-admin-panel--filters {
+        flex: 3 1 440px;
+    }
+
+    .tag-admin-panel--create {
+        flex: 1 1 280px;
+    }
+
+    .tag-admin-panel h2 {
+        margin: 0 0 12px;
+        font-size: 16px;
+        color: #0f172a;
+    }
+
+    .tag-create-form,
+    .tag-search-form {
+        display: grid;
+        gap: 10px;
+    }
+
+    .tag-input-row {
+        display: flex;
+        gap: 10px;
+        align-items: center;
+    }
+
+    .tag-create-form .tag-input-row {
+        display: grid;
+        grid-template-columns: minmax(0, 1fr) auto;
+        align-items: center;
+    }
+
+    .tag-input-row input {
+        flex: 1 1 auto;
+        min-width: 0;
+        height: 40px;
+        box-sizing: border-box;
+        padding: 0 12px;
+        line-height: 1.2;
+        border-radius: 10px;
+        border: 1px solid #cbd5e1;
+        background: #f8fafc;
+        font-size: 13px;
+        color: #0f172a;
+    }
+
+    .tag-input-row input:focus {
+        outline: none;
+        border-color: #0ea5e9;
+        box-shadow: 0 0 0 4px rgba(14, 165, 233, 0.14);
+        background: #fff;
+    }
+
+    .tag-button,
+    .tag-chip-button {
+        border: 0;
+        border-radius: 10px;
+        cursor: pointer;
+        font-weight: 700;
+        transition: transform 0.12s ease, box-shadow 0.12s ease, opacity 0.12s ease;
+    }
+
+    .tag-button:hover,
+    .tag-chip-button:hover {
+        transform: translateY(-1px);
+        box-shadow: 0 8px 20px rgba(15, 23, 42, 0.08);
+    }
+
+    .tag-button:disabled,
+    .tag-chip-button:disabled {
+        cursor: wait;
+        opacity: 0.6;
+        transform: none;
+        box-shadow: none;
+    }
+
+    .tag-button {
+        flex: 0 0 auto;
+        height: 40px;
+        padding: 0 12px;
+        background: linear-gradient(135deg, #0f766e 0%, #0ea5e9 100%);
+        color: #fff;
+        white-space: nowrap;
+        font-size: 12px;
+    }
+
+    .tag-toolbar-meta {
+        display: flex;
+        align-items: center;
+        justify-content: space-between;
+        gap: 10px;
+        flex-wrap: wrap;
+        font-size: 12px;
+        color: #64748b;
+    }
+
+    .tag-toolbar-meta strong {
+        color: #0f172a;
+    }
+
+    .tag-help {
+        margin: 0;
+        font-size: 12px;
+        color: #64748b;
+    }
+
+    .tag-filter-grid {
+        display: grid;
+        gap: 10px;
+        grid-template-columns: repeat(3, minmax(0, 1fr));
+    }
+
+    .tag-select-field {
+        display: grid;
+        gap: 4px;
+        min-width: 0;
+        font-size: 11px;
+        font-weight: 700;
+        color: #475569;
+    }
+
+    .tag-select-field select {
+        width: 100%;
+        min-width: 0;
+        height: 40px;
+        box-sizing: border-box;
+        padding: 0 10px;
+        line-height: 1.2;
+        border-radius: 10px;
+        border: 1px solid #cbd5e1;
+        background: #f8fafc;
+        color: #0f172a;
+        font-size: 12px;
+        vertical-align: middle;
+    }
+
+    .tag-select-field select:focus {
+        outline: none;
+        border-color: #0ea5e9;
+        box-shadow: 0 0 0 4px rgba(14, 165, 233, 0.14);
+        background: #fff;
+    }
+
+    .tag-grid {
+        display: grid;
+        gap: 12px;
+        grid-template-columns: repeat(auto-fit, minmax(240px, 1fr));
+    }
+
+    .tag-card {
+        position: relative;
+        display: grid;
+        gap: 10px;
+        padding: 10px;
+        border-radius: 16px;
+        border: 1px solid #dbe4ee;
+        background:
+            linear-gradient(180deg, rgba(255, 255, 255, 0.96) 0%, rgba(248, 250, 252, 0.94) 100%);
+        box-shadow: 0 8px 18px rgba(15, 23, 42, 0.05);
+        transition: transform 0.14s ease, border-color 0.14s ease, box-shadow 0.14s ease;
+        cursor: pointer;
+    }
+
+    .tag-card:hover {
+        transform: translateY(-2px);
+        border-color: #93c5fd;
+        box-shadow: 0 14px 26px rgba(15, 23, 42, 0.08);
+    }
+
+    .tag-card__header {
+        display: flex;
+        justify-content: space-between;
+        gap: 10px;
+        align-items: flex-start;
+    }
+
+    .tag-card__title {
+        flex: 1 1 auto;
+        min-width: 0;
+        display: grid;
+        gap: 4px;
+    }
+
+    .tag-card__title strong,
+    .tag-card__rename strong {
+        display: block;
+        font-size: 17px;
+        line-height: 1.1;
+        color: #111827;
+        word-break: break-word;
+    }
+
+    .tag-card__count {
+        display: inline-flex;
+        align-items: center;
+        white-space: nowrap;
+        padding: 3px 8px;
+        border-radius: 999px;
+        background: #e0f2fe;
+        color: #075985;
+        font-size: 11px;
+        font-weight: 700;
+    }
+
+    .tag-card__actions {
+        flex: 0 0 auto;
+        display: flex;
+        flex-wrap: wrap;
+        justify-content: flex-end;
+        align-items: center;
+        gap: 6px;
+    }
+
+    .tag-chip-button {
+        height: 30px;
+        padding: 0 8px;
+        background: #fff;
+        border: 1px solid #dbe4ee;
+        color: #334155;
+        font-size: 11px;
+    }
+
+    .tag-chip-button.is-danger {
+        background: #fff1f2;
+        border-color: #fecdd3;
+        color: #be123c;
+    }
+
+    .tag-card__rename {
+        display: none;
+        gap: 6px;
+        align-items: center;
+        flex-wrap: wrap;
+        margin-top: 2px;
+    }
+
+    .tag-card.is-editing .tag-card__display {
+        display: none;
+    }
+
+    .tag-card.is-editing .tag-card__rename {
+        display: flex;
+    }
+
+    .tag-card.is-editing .tag-card__header {
+        display: grid;
+        grid-template-columns: minmax(0, 1fr);
+    }
+
+    .tag-card.is-editing .tag-card__actions {
+        justify-content: flex-start;
+    }
+
+    .tag-card__rename input {
+        flex: 1 1 220px;
+        min-width: 0;
+        height: 34px;
+        padding: 0 10px;
+        border-radius: 10px;
+        border: 1px solid #cbd5e1;
+        background: #fff;
+        font-size: 12px;
+    }
+
+    .tag-card__snapshots {
+        display: grid;
+        gap: 8px;
+        grid-template-columns: repeat(auto-fit, minmax(130px, 1fr));
+    }
+
+    .tag-snapshot-badge {
+        display: flex;
+        align-items: center;
+        gap: 8px;
+        min-width: 0;
+        padding: 6px 8px;
+        border-radius: 12px;
+        border: 1px solid #dbe4ee;
+        background: rgba(255, 255, 255, 0.86);
+        text-decoration: none;
+        color: #0f172a;
+    }
+
+    .tag-snapshot-badge img {
+        width: 16px;
+        height: 16px;
+        border-radius: 4px;
+        flex: 0 0 auto;
+        background: #f8fafc;
+    }
+
+    .tag-snapshot-badge span {
+        min-width: 0;
+        white-space: nowrap;
+        overflow: hidden;
+        text-overflow: ellipsis;
+        font-size: 11px;
+        font-weight: 600;
+    }
+
+    .tag-card__empty {
+        padding: 14px;
+        border-radius: 14px;
+        border: 1px dashed #cbd5e1;
+        background: #f8fafc;
+        color: #64748b;
+        font-size: 13px;
+    }
+
+    .tag-toast {
+        position: sticky;
+        top: 12px;
+        z-index: 30;
+        display: none;
+        width: fit-content;
+        max-width: min(100%, 420px);
+        padding: 12px 14px;
+        border-radius: 14px;
+        font-size: 13px;
+        font-weight: 700;
+        box-shadow: 0 14px 30px rgba(15, 23, 42, 0.12);
+    }
+
+    .tag-toast.is-visible {
+        display: block;
+    }
+
+    .tag-toast.is-success {
+        background: #dcfce7;
+        color: #166534;
+    }
+
+    .tag-toast.is-error {
+        background: #fee2e2;
+        color: #991b1b;
+    }
+
+    .tag-empty-state {
+        padding: 24px 18px;
+        border-radius: 16px;
+        border: 1px dashed #cbd5e1;
+        background: #fff;
+        text-align: center;
+        color: #64748b;
+        font-size: 13px;
+    }
+
+</style>
+{% endblock %}
+
+{% block content %}
+<div id="content-main">
+    <div
+        id="abx-tag-admin"
+        class="tag-admin-shell"
+        data-search-url="{{ tag_search_api_url }}"
+        data-create-url="{{ tag_create_api_url }}"
+    >
+        <section class="tag-admin-toolbar">
+            <div class="tag-admin-panel tag-admin-panel--search">
+                <div class="tag-search-form">
+                    <div class="tag-input-row">
+                        <input
+                            id="tag-live-search"
+                            type="search"
+                            placeholder="Search by tag name"
+                            value="{{ initial_query }}"
+                            autocomplete="off"
+                        >
+                    </div>
+                    <div class="tag-toolbar-meta">
+                        <span id="tag-query-label">{% if initial_query %}“{{ initial_query }}”{% else %}All tags{% endif %}</span>
+                    </div>
+                </div>
+            </div>
+
+            <div class="tag-admin-panel tag-admin-panel--filters">
+                <div class="tag-filter-grid">
+                    <label class="tag-select-field" for="tag-sort-select">
+                        <span>Sort</span>
+                        <select id="tag-sort-select">
+                            {% for value, label in tag_sort_choices %}
+                                <option value="{{ value }}"{% if value == initial_sort %} selected{% endif %}>{{ label }}</option>
+                            {% endfor %}
+                        </select>
+                    </label>
+                    <label class="tag-select-field" for="tag-created-by-select">
+                        <span>Created By</span>
+                        <select id="tag-created-by-select">
+                            <option value="">All users</option>
+                            {% for value, label in tag_created_by_choices %}
+                                <option value="{{ value }}"{% if value == initial_created_by %} selected{% endif %}>{{ label }}</option>
+                            {% endfor %}
+                        </select>
+                    </label>
+                    <label class="tag-select-field" for="tag-year-select">
+                        <span>Year</span>
+                        <select id="tag-year-select">
+                            <option value="">All years</option>
+                            {% for value in tag_year_choices %}
+                                <option value="{{ value }}"{% if value == initial_year %} selected{% endif %}>{{ value }}</option>
+                            {% endfor %}
+                        </select>
+                    </label>
+                </div>
+            </div>
+
+            <div class="tag-admin-panel tag-admin-panel--create">
+                <form id="tag-create-form" class="tag-create-form">
+                    {% csrf_token %}
+                    <div class="tag-input-row">
+                        <input
+                            id="tag-create-name"
+                            type="text"
+                            name="name"
+                            placeholder="New tag name"
+                            autocomplete="off"
+                            value=""
+                        >
+                        <button class="tag-button" type="submit">Create</button>
+                    </div>
+                </form>
+            </div>
+        </section>
+
+        <div id="tag-toast" class="tag-toast" aria-live="polite"></div>
+        <div id="tag-card-grid" class="tag-grid">
+            {% if initial_tag_cards %}
+                {% for card in initial_tag_cards %}
+                    <article
+                        class="tag-card"
+                        data-id="{{ card.id }}"
+                        data-filter-url="{{ card.filter_url }}"
+                        data-rename-url="{{ card.rename_url }}"
+                        data-delete-url="{{ card.delete_url }}"
+                        data-export-urls-url="{{ card.export_urls_url }}"
+                        data-export-jsonl-url="{{ card.export_jsonl_url }}"
+                    >
+                        <div class="tag-card__header">
+                            <div class="tag-card__title">
+                                <div class="tag-card__display">
+                                    <strong><a href="{{ card.filter_url }}" style="color:inherit;text-decoration:none;">{{ card.name }}</a></strong>
+                                </div>
+                                <div class="tag-card__rename">
+                                    <input type="text" value="{{ card.name }}" aria-label="Rename tag {{ card.name }}">
+                                    <button type="button" class="tag-chip-button" data-action="save-edit">Save</button>
+                                    <button type="button" class="tag-chip-button" data-action="cancel-edit">Cancel</button>
+                                </div>
+                            </div>
+                            <div class="tag-card__actions">
+                                <button type="button" class="tag-chip-button" data-action="edit" aria-label="Rename tag" title="Rename tag">✎</button>
+                                <button type="button" class="tag-chip-button" data-action="copy-urls">Copy URLs</button>
+                                <button type="button" class="tag-chip-button" data-action="download-jsonl">JSONL</button>
+                                <button type="button" class="tag-chip-button is-danger" data-action="delete">Delete</button>
+                                <span class="tag-card__count">{{ card.num_snapshots }}</span>
+                            </div>
+                        </div>
+                        <div class="tag-card__snapshots">
+                            {% if card.snapshots %}
+                                {% for snapshot in card.snapshots %}
+                                    <a class="tag-snapshot-badge" href="{{ snapshot.admin_url }}" title="{{ snapshot.url }}">
+                                        <img src="{{ snapshot.favicon_url }}" alt="" onerror="this.style.display='none'">
+                                        <span>{{ snapshot.title }}</span>
+                                    </a>
+                                {% endfor %}
+                            {% else %}
+                                <div class="tag-card__empty">No snapshots attached yet.</div>
+                            {% endif %}
+                        </div>
+                    </article>
+                {% endfor %}
+            {% else %}
+                <div class="tag-empty-state">No tags.</div>
+            {% endif %}
+        </div>
+    </div>
+</div>
+
+{{ initial_tag_cards|json_script:"abx-tag-cards-data" }}
+
+<script>
+document.addEventListener('DOMContentLoaded', function () {
+    const shell = document.getElementById('abx-tag-admin');
+    if (!shell) return;
+
+    const initialCards = JSON.parse(document.getElementById('abx-tag-cards-data').textContent || '[]');
+    const searchUrl = shell.dataset.searchUrl;
+    const createUrl = shell.dataset.createUrl;
+    const searchInput = document.getElementById('tag-live-search');
+    const sortSelect = document.getElementById('tag-sort-select');
+    const createdBySelect = document.getElementById('tag-created-by-select');
+    const yearSelect = document.getElementById('tag-year-select');
+    const createForm = document.getElementById('tag-create-form');
+    const createInput = document.getElementById('tag-create-name');
+    const grid = document.getElementById('tag-card-grid');
+    const queryLabel = document.getElementById('tag-query-label');
+    const toast = document.getElementById('tag-toast');
+    let cards = initialCards;
+    let searchTimeout = null;
+    let activeQuery = (searchInput?.value || '').trim();
+
+    function escapeHtml(value) {
+        const div = document.createElement('div');
+        div.textContent = value == null ? '' : String(value);
+        return div.innerHTML;
+    }
+
+    function slugify(value) {
+        return String(value || '')
+            .toLowerCase()
+            .trim()
+            .replace(/[^a-z0-9]+/g, '-')
+            .replace(/^-+|-+$/g, '') || 'tag';
+    }
+
+    function getCSRFToken() {
+        const input = document.querySelector('input[name="csrfmiddlewaretoken"]');
+        if (input) return input.value;
+        const cookies = document.cookie.split(';');
+        for (const cookieRaw of cookies) {
+            const cookie = cookieRaw.trim();
+            if (cookie.startsWith('csrftoken=')) return cookie.slice('csrftoken='.length);
+        }
+        return '';
+    }
+
+    function getApiKey() {
+        return (window.ARCHIVEBOX_API_KEY || '').trim();
+    }
+
+    function withApiKey(url) {
+        const apiKey = getApiKey();
+        if (!apiKey) return url;
+        const separator = url.includes('?') ? '&' : '?';
+        return url + separator + 'api_key=' + encodeURIComponent(apiKey);
+    }
+
+    function buildHeaders(isJsonBody) {
+        const headers = {};
+        if (isJsonBody) headers['Content-Type'] = 'application/json';
+        const csrfToken = getCSRFToken();
+        if (csrfToken) headers['X-CSRFToken'] = csrfToken;
+        const apiKey = getApiKey();
+        if (apiKey) headers['X-ArchiveBox-API-Key'] = apiKey;
+        return headers;
+    }
+
+    function setToast(message, tone) {
+        toast.textContent = message;
+        toast.className = 'tag-toast is-visible ' + (tone === 'error' ? 'is-error' : 'is-success');
+        window.clearTimeout(setToast._timer);
+        setToast._timer = window.setTimeout(function () {
+            toast.className = 'tag-toast';
+            toast.textContent = '';
+        }, 2600);
+    }
+
+    function getCurrentState(overrides) {
+        const next = overrides || {};
+        return {
+            query: typeof next.query === 'string' ? next.query.trim() : (searchInput?.value || '').trim(),
+            sort: typeof next.sort === 'string' ? next.sort : (sortSelect?.value || 'created_desc'),
+            created_by: typeof next.created_by === 'string' ? next.created_by : (createdBySelect?.value || ''),
+            year: typeof next.year === 'string' ? next.year : (yearSelect?.value || ''),
+        };
+    }
+
+    function syncSearchState(state) {
+        if (searchInput) searchInput.value = state.query;
+        if (sortSelect) sortSelect.value = state.sort;
+        if (createdBySelect) createdBySelect.value = state.created_by;
+        if (yearSelect) yearSelect.value = state.year;
+    }
+
+    function syncLocation(state) {
+        const url = new URL(window.location.href);
+        if (state.query) {
+            url.searchParams.set('q', state.query);
+        } else {
+            url.searchParams.delete('q');
+        }
+
+        if (state.sort && state.sort !== 'created_desc') {
+            url.searchParams.set('sort', state.sort);
+        } else {
+            url.searchParams.delete('sort');
+        }
+
+        if (state.created_by) {
+            url.searchParams.set('created_by', state.created_by);
+        } else {
+            url.searchParams.delete('created_by');
+        }
+
+        if (state.year) {
+            url.searchParams.set('year', state.year);
+        } else {
+            url.searchParams.delete('year');
+        }
+
+        window.history.replaceState({}, '', url.toString());
+    }
+
+    function setMeta(state, count) {
+        const baseLabel = state.query ? '"' + state.query + '"' : 'All tags';
+        queryLabel.textContent = baseLabel + ' · ' + count + ' shown';
+        activeQuery = state.query;
+    }
+
+    function renderCards(nextCards, state) {
+        cards = Array.isArray(nextCards) ? nextCards : [];
+        setMeta(state || getCurrentState(), cards.length);
+
+        if (!cards.length) {
+            grid.innerHTML = '<div class="tag-empty-state">No tags.</div>';
+            return;
+        }
+
+        grid.innerHTML = cards.map(function (card) {
+            const snapshotHtml = (card.snapshots || []).length
+                ? card.snapshots.map(function (snapshot) {
+                    return '' +
+                        '<a class="tag-snapshot-badge" href="' + escapeHtml(snapshot.admin_url) + '" title="' + escapeHtml(snapshot.url) + '">' +
+                            '<img src="' + escapeHtml(snapshot.favicon_url) + '" alt="" onerror="this.hidden=true">' +
+                            '<span>' + escapeHtml(snapshot.title) + '</span>' +
+                        '</a>';
+                }).join('')
+                : '<div class="tag-card__empty">No snapshots attached yet.</div>';
+
+            return '' +
+                '<article class="tag-card" data-id="' + escapeHtml(card.id) + '" data-filter-url="' + escapeHtml(card.filter_url) + '" data-rename-url="' + escapeHtml(card.rename_url) + '" data-delete-url="' + escapeHtml(card.delete_url) + '" data-export-urls-url="' + escapeHtml(card.export_urls_url) + '" data-export-jsonl-url="' + escapeHtml(card.export_jsonl_url) + '">' +
+                    '<div class="tag-card__header">' +
+                        '<div class="tag-card__title">' +
+                            '<div class="tag-card__display">' +
+                                '<strong>' + escapeHtml(card.name) + '</strong>' +
+                            '</div>' +
+                            '<div class="tag-card__rename">' +
+                                '<input type="text" value="' + escapeHtml(card.name) + '" aria-label="Rename tag ' + escapeHtml(card.name) + '">' +
+                                '<button type="button" class="tag-chip-button" data-action="save-edit">Save</button>' +
+                                '<button type="button" class="tag-chip-button" data-action="cancel-edit">Cancel</button>' +
+                            '</div>' +
+                        '</div>' +
+                        '<div class="tag-card__actions">' +
+                            '<button type="button" class="tag-chip-button" data-action="edit" aria-label="Rename tag" title="Rename tag">✎</button>' +
+                            '<button type="button" class="tag-chip-button" data-action="copy-urls">Copy URLs</button>' +
+                            '<button type="button" class="tag-chip-button" data-action="download-jsonl">JSONL</button>' +
+                            '<button type="button" class="tag-chip-button is-danger" data-action="delete">Delete</button>' +
+                            '<span class="tag-card__count">' + escapeHtml(card.num_snapshots) + '</span>' +
+                        '</div>' +
+                    '</div>' +
+                    '<div class="tag-card__snapshots">' + snapshotHtml + '</div>' +
+                '</article>';
+        }).join('');
+    }
+
+    async function fetchCards(state) {
+        const params = new URLSearchParams();
+        if (state.query) params.set('q', state.query);
+        if (state.sort) params.set('sort', state.sort);
+        if (state.created_by) params.set('created_by', state.created_by);
+        if (state.year) params.set('year', state.year);
+        const url = withApiKey(searchUrl + '?' + params.toString());
+        const response = await fetch(url, {
+            headers: buildHeaders(false),
+            credentials: 'same-origin',
+        });
+        if (!response.ok) {
+            const message = await response.text();
+            throw new Error(message || 'Failed to load matching tags');
+        }
+        const payload = await response.json();
+        return {
+            tags: payload.tags || [],
+            state: {
+                query: state.query,
+                sort: payload.sort || state.sort,
+                created_by: payload.created_by || '',
+                year: payload.year || '',
+            },
+        };
+    }
+
+    async function refreshCards(overrides) {
+        const requestedState = getCurrentState(overrides);
+        const result = await fetchCards(requestedState);
+        syncSearchState(result.state);
+        renderCards(result.tags, result.state);
+        syncLocation(result.state);
+        return result.tags;
+    }
+
+    async function submitJson(url, method, payload) {
+        const response = await fetch(withApiKey(url), {
+            method: method,
+            headers: buildHeaders(true),
+            credentials: 'same-origin',
+            body: JSON.stringify(payload || {}),
+        });
+        if (!response.ok) {
+            let message = 'Request failed';
+            try {
+                const data = await response.json();
+                message = data.detail || data.message || message;
+            } catch (_err) {
+                message = await response.text() || message;
+            }
+            throw new Error(message);
+        }
+        if (response.status === 204) return {};
+        return response.json();
+    }
+
+    async function copyTextFromUrl(url) {
+        const response = await fetch(withApiKey(url), {
+            headers: buildHeaders(false),
+            credentials: 'same-origin',
+        });
+        if (!response.ok) throw new Error('Failed to export URLs');
+        const text = await response.text();
+        await copyTextToClipboard(text);
+        return text;
+    }
+
+    async function copyTextToClipboard(text) {
+        if (navigator.clipboard && window.isSecureContext) {
+            try {
+                await navigator.clipboard.writeText(text);
+                return;
+            } catch (_error) {
+            }
+        }
+
+        const textarea = document.createElement('textarea');
+        textarea.value = text;
+        textarea.setAttribute('readonly', '');
+        textarea.style.position = 'fixed';
+        textarea.style.top = '-9999px';
+        textarea.style.left = '-9999px';
+        document.body.appendChild(textarea);
+        textarea.focus();
+        textarea.select();
+
+        const copied = document.execCommand('copy');
+        document.body.removeChild(textarea);
+        if (!copied) {
+            throw new Error('Clipboard write failed');
+        }
+    }
+
+    function getDownloadFilename(response, fallbackFilename) {
+        const disposition = response.headers.get('Content-Disposition') || '';
+        const utf8Match = disposition.match(/filename\\*=UTF-8''([^;]+)/i);
+        if (utf8Match && utf8Match[1]) {
+            return decodeURIComponent(utf8Match[1]);
+        }
+
+        const filenameMatch = disposition.match(/filename="?([^";]+)"?/i);
+        if (filenameMatch && filenameMatch[1]) {
+            return filenameMatch[1];
+        }
+
+        return fallbackFilename;
+    }
+
+    async function downloadFileFromUrl(url, fallbackFilename) {
+        const response = await fetch(withApiKey(url), {
+            headers: buildHeaders(false),
+            credentials: 'same-origin',
+        });
+        if (!response.ok) {
+            let message = 'Download failed';
+            try {
+                const data = await response.json();
+                message = data.detail || data.message || message;
+            } catch (_err) {
+                message = await response.text() || message;
+            }
+            throw new Error(message);
+        }
+
+        const blob = await response.blob();
+        const downloadUrl = URL.createObjectURL(blob);
+        const link = document.createElement('a');
+        link.href = downloadUrl;
+        link.download = getDownloadFilename(response, fallbackFilename);
+        document.body.appendChild(link);
+        link.click();
+        link.remove();
+        window.setTimeout(function () {
+            URL.revokeObjectURL(downloadUrl);
+        }, 1000);
+    }
+
+    createForm?.addEventListener('submit', async function (event) {
+        event.preventDefault();
+        const name = (createInput.value || '').trim();
+        if (!name) {
+            setToast('Enter a tag name first.', 'error');
+            createInput.focus();
+            return;
+        }
+
+        const button = createForm.querySelector('button[type="submit"]');
+        button.disabled = true;
+        try {
+            const result = await submitJson(createUrl, 'POST', { name: name });
+            createInput.value = '';
+            await refreshCards({ query: result.tag_name || name });
+            setToast(result.created ? 'Tag created.' : 'Existing tag loaded.', 'success');
+        } catch (error) {
+            setToast(error.message || 'Failed to create tag.', 'error');
+        } finally {
+            button.disabled = false;
+        }
+    });
+
+    searchInput?.addEventListener('input', function () {
+        window.clearTimeout(searchTimeout);
+        searchTimeout = window.setTimeout(async function () {
+            try {
+                await refreshCards();
+            } catch (error) {
+                setToast(error.message || 'Failed to search tags.', 'error');
+            }
+        }, 150);
+    });
+
+    [sortSelect, createdBySelect, yearSelect].forEach(function (field) {
+        field?.addEventListener('change', async function () {
+            try {
+                await refreshCards();
+            } catch (error) {
+                setToast(error.message || 'Failed to update tag filters.', 'error');
+            }
+        });
+    });
+
+    grid.addEventListener('click', async function (event) {
+        const actionButton = event.target.closest('[data-action]');
+        const snapshotLink = event.target.closest('.tag-snapshot-badge');
+        if (snapshotLink) return;
+
+        const cardEl = event.target.closest('.tag-card');
+        if (!cardEl) return;
+
+        if (!actionButton) {
+            window.location.href = cardEl.dataset.filterUrl;
+            return;
+        }
+
+        event.preventDefault();
+        event.stopPropagation();
+
+        const action = actionButton.dataset.action;
+        if (action === 'edit') {
+            cardEl.classList.add('is-editing');
+            const input = cardEl.querySelector('.tag-card__rename input');
+            if (input) {
+                input.focus();
+                input.select();
+            }
+            return;
+        }
+
+        if (action === 'cancel-edit') {
+            cardEl.classList.remove('is-editing');
+            return;
+        }
+
+        if (action === 'save-edit') {
+            const input = cardEl.querySelector('.tag-card__rename input');
+            const nextName = (input?.value || '').trim();
+            if (!nextName) {
+                setToast('Tag name is required.', 'error');
+                input?.focus();
+                return;
+            }
+
+            actionButton.disabled = true;
+            try {
+                await submitJson(cardEl.dataset.renameUrl, 'POST', { name: nextName });
+                await refreshCards();
+                setToast('Tag renamed.', 'success');
+            } catch (error) {
+                setToast(error.message || 'Rename failed.', 'error');
+            } finally {
+                actionButton.disabled = false;
+            }
+            return;
+        }
+
+        if (action === 'delete') {
+            const tagName = cardEl.querySelector('.tag-card__display strong')?.textContent || 'this tag';
+            if (!window.confirm('Delete "' + tagName + '"? This only removes the tag and its tag links.')) return;
+
+            actionButton.disabled = true;
+            try {
+                await fetch(withApiKey(cardEl.dataset.deleteUrl), {
+                    method: 'DELETE',
+                    headers: buildHeaders(false),
+                    credentials: 'same-origin',
+                }).then(async function (response) {
+                    if (!response.ok) {
+                        let message = 'Delete failed';
+                        try {
+                            const payload = await response.json();
+                            message = payload.detail || message;
+                        } catch (_err) {
+                            message = await response.text() || message;
+                        }
+                        throw new Error(message);
+                    }
+                });
+                await refreshCards();
+                setToast('Tag deleted.', 'success');
+            } catch (error) {
+                setToast(error.message || 'Delete failed.', 'error');
+            } finally {
+                actionButton.disabled = false;
+            }
+            return;
+        }
+
+        if (action === 'copy-urls') {
+            actionButton.disabled = true;
+            try {
+                await copyTextFromUrl(cardEl.dataset.exportUrlsUrl);
+            } catch (error) {
+                setToast(error.message || 'Failed to copy URLs.', 'error');
+            } finally {
+                actionButton.disabled = false;
+            }
+            return;
+        }
+
+        if (action === 'download-jsonl') {
+            actionButton.disabled = true;
+            try {
+                const tagName = cardEl.querySelector('.tag-card__display strong')?.textContent || 'tag';
+                await downloadFileFromUrl(cardEl.dataset.exportJsonlUrl, 'tag-' + slugify(tagName) + '-snapshots.jsonl');
+            } catch (error) {
+                setToast(error.message || 'Failed to download JSONL.', 'error');
+            } finally {
+                actionButton.disabled = false;
+            }
+        }
+    });
+
+    grid.addEventListener('keydown', function (event) {
+        if (event.key !== 'Enter') return;
+        const input = event.target.closest('.tag-card__rename input');
+        if (!input) return;
+        event.preventDefault();
+        const saveButton = input.closest('.tag-card__rename')?.querySelector('[data-action="save-edit"]');
+        saveButton?.click();
+    });
+
+    const initialState = getCurrentState();
+    renderCards(cards, initialState);
+    syncLocation(initialState);
+});
+</script>
+{% endblock %}
--- a/archivebox/templates/admin/personas/persona/change_form.html
+++ b/archivebox/templates/admin/personas/persona/change_form.html
@@ -0,0 +1,249 @@
+{% extends "admin/change_form.html" %}
+
+{% block bodyclass %}{{ block.super }} app-personas model-persona{% endblock %}
+
+{% block extrastyle %}
+{{ block.super }}
+<style>
+    .persona-import-hero {
+        margin: 0 0 22px;
+        padding: 22px 24px;
+        border-radius: 18px;
+        border: 1px solid #d8dee9;
+        background:
+            radial-gradient(circle at top right, rgba(67, 97, 238, 0.10), transparent 32%),
+            linear-gradient(135deg, #fff7ed 0%, #ffffff 45%, #ecfeff 100%);
+        box-shadow: 0 10px 30px rgba(15, 23, 42, 0.06);
+        display: grid;
+        gap: 18px;
+        grid-template-columns: minmax(0, 1.8fr) minmax(280px, 1fr);
+        align-items: start;
+    }
+
+    .persona-import-hero h2 {
+        margin: 0 0 8px;
+        font-size: 28px;
+        line-height: 1.1;
+        color: #111827;
+    }
+
+    .persona-import-hero p {
+        margin: 0;
+        color: #475569;
+        max-width: 70ch;
+        font-size: 14px;
+    }
+
+    .persona-import-hero__meta {
+        display: grid;
+        gap: 10px;
+        grid-template-columns: repeat(auto-fit, minmax(160px, 1fr));
+    }
+
+    .persona-import-hero__stat {
+        padding: 14px 16px;
+        border-radius: 14px;
+        background: rgba(255, 255, 255, 0.86);
+        border: 1px solid rgba(203, 213, 225, 0.85);
+    }
+
+    .persona-import-hero__stat span {
+        display: block;
+        font-size: 11px;
+        text-transform: uppercase;
+        letter-spacing: 0.08em;
+        font-weight: 700;
+        color: #64748b;
+        margin-bottom: 8px;
+    }
+
+    .persona-import-hero__stat strong,
+    .persona-import-hero__stat code {
+        font-size: 18px;
+        color: #0f172a;
+    }
+
+    .field-import_mode ul,
+    .field-import_discovered_profile ul {
+        margin: 0;
+        padding: 0;
+        list-style: none;
+        display: grid;
+        gap: 12px;
+    }
+
+    .field-import_mode ul {
+        grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
+    }
+
+    .field-import_discovered_profile ul {
+        grid-template-columns: repeat(auto-fit, minmax(260px, 1fr));
+        max-height: 460px;
+        overflow: auto;
+        padding-right: 4px;
+    }
+
+    .field-import_mode li,
+    .field-import_discovered_profile li {
+        margin: 0;
+    }
+
+    .field-import_mode label,
+    .field-import_discovered_profile label {
+        display: flex;
+        gap: 12px;
+        align-items: flex-start;
+        min-height: 100%;
+        padding: 14px 16px;
+        border-radius: 14px;
+        border: 1px solid #dbe4ee;
+        background: #fff;
+        box-shadow: 0 1px 2px rgba(15, 23, 42, 0.04);
+        cursor: pointer;
+        transition: transform 0.15s ease, border-color 0.15s ease, box-shadow 0.15s ease;
+    }
+
+    .field-import_mode label:hover,
+    .field-import_discovered_profile label:hover {
+        transform: translateY(-1px);
+        border-color: #7c3aed;
+        box-shadow: 0 8px 20px rgba(124, 58, 237, 0.10);
+    }
+
+    .field-import_mode input[type="radio"],
+    .field-import_discovered_profile input[type="radio"] {
+        margin-top: 3px;
+        flex: 0 0 auto;
+    }
+
+    .abx-import-mode-option,
+    .abx-profile-option {
+        display: grid;
+        gap: 6px;
+    }
+
+    .abx-import-mode-option strong,
+    .abx-profile-option strong {
+        color: #0f172a;
+        font-size: 15px;
+    }
+
+    .abx-import-mode-option span:last-child,
+    .abx-profile-option__meta {
+        color: #64748b;
+        font-size: 12px;
+        line-height: 1.5;
+    }
+
+    .abx-profile-option code {
+        font-size: 11px;
+        line-height: 1.5;
+        white-space: normal;
+        overflow-wrap: anywhere;
+        color: #334155;
+        background: #f8fafc;
+        border: 1px solid #e2e8f0;
+        border-radius: 10px;
+        padding: 8px 10px;
+    }
+
+    .abx-persona-path-list,
+    .abx-persona-artifacts {
+        display: grid;
+        gap: 10px;
+    }
+
+    .abx-persona-path-list div,
+    .abx-persona-artifact {
+        display: grid;
+        gap: 6px;
+        padding: 12px 14px;
+        border-radius: 12px;
+        border: 1px solid #e2e8f0;
+        background: #f8fafc;
+    }
+
+    .abx-persona-path-list code,
+    .abx-persona-artifact code {
+        white-space: normal;
+        overflow-wrap: anywhere;
+        font-size: 12px;
+    }
+
+    .abx-artifact-state {
+        display: inline-flex;
+        width: fit-content;
+        align-items: center;
+        border-radius: 999px;
+        padding: 2px 10px;
+        font-size: 11px;
+        font-weight: 700;
+        text-transform: uppercase;
+        letter-spacing: 0.04em;
+    }
+
+    .abx-artifact-state--yes {
+        background: #dcfce7;
+        color: #166534;
+    }
+
+    .abx-artifact-state--no {
+        background: #fee2e2;
+        color: #991b1b;
+    }
+
+    @media (max-width: 960px) {
+        .persona-import-hero {
+            grid-template-columns: 1fr;
+        }
+    }
+</style>
+{% endblock %}
+
+{% block extrahead %}
+{{ block.super }}
+<script>
+document.addEventListener('DOMContentLoaded', function () {
+    const modeInputs = Array.from(document.querySelectorAll('input[name="import_mode"]'));
+    const discoveredRow = document.querySelector('.form-row.field-import_discovered_profile');
+    const sourceRow = document.querySelector('.form-row.field-import_source');
+    const profileRow = document.querySelector('.form-row.field-import_profile_name');
+
+    const updateVisibility = () => {
+        const selected = modeInputs.find((input) => input.checked)?.value || 'none';
+        if (discoveredRow) discoveredRow.style.display = selected === 'discovered' ? '' : 'none';
+        if (sourceRow) sourceRow.style.display = selected === 'custom' ? '' : 'none';
+        if (profileRow) profileRow.style.display = selected === 'custom' ? '' : 'none';
+    };
+
+    modeInputs.forEach((input) => input.addEventListener('change', updateVisibility));
+    updateVisibility();
+});
+</script>
+{% endblock %}
+
+{% block form_top %}
+<section class="persona-import-hero">
+    <div>
+        <h2>Bootstrap a persona from a real browser session</h2>
+        <p>
+            Pick a local Chromium profile, paste an absolute profile path, or attach to a live CDP endpoint.
+            The form saves the Persona normally, then imports profile files, cookies, and optional tab storage into
+            the Persona's own directories.
+        </p>
+    </div>
+    <div class="persona-import-hero__meta">
+        <div class="persona-import-hero__stat">
+            <span>Detected profiles</span>
+            <strong>{{ detected_profile_count }}</strong>
+        </div>
+        <div class="persona-import-hero__stat">
+            <span>Persona artifacts</span>
+            <code>chrome_user_data</code>
+            <code>cookies.txt</code>
+            <code>auth.json</code>
+        </div>
+    </div>
+</section>
+{{ block.super }}
+{% endblock %}
--- a/archivebox/templates/admin/progress_monitor.html
+++ b/archivebox/templates/admin/progress_monitor.html
@@ -706,14 +706,14 @@
            ? Math.max(0, Math.min(100, extractor.progress))
            : null;
        const progressStyle = progress !== null ? ` style="width: ${progress}%;"` : '';
-        const pidHtml = extractor.pid ? `<span class="pid-label compact">pid ${extractor.pid}</span>` : '';
+        const pidHtml = extractor.status === 'started' && extractor.pid ? `<span class="pid-label compact">pid ${extractor.pid}</span>` : '';

        return `
            <span class="extractor-badge ${extractor.status || 'queued'}">
                <span class="progress-fill"${progressStyle}></span>
                <span class="badge-content">
                    <span class="badge-icon">${icon}</span>
-                    <span>${extractor.plugin || 'unknown'}</span>
+                    <span>${extractor.label || extractor.plugin || 'unknown'}</span>
                    ${pidHtml}
                </span>
            </span>
@@ -742,6 +742,23 @@
            `;
        }

+        const hasProcessEntries = (snapshot.all_plugins || []).some(extractor => extractor.source === 'process');
+        const hasArchiveResults = (snapshot.all_plugins || []).some(extractor => extractor.source === 'archiveresult');
+        const processOnly = hasProcessEntries && !hasArchiveResults;
+        const runningProcessCount = (snapshot.all_plugins || []).filter(extractor => extractor.source === 'process' && extractor.status === 'started').length;
+        const failedProcessCount = (snapshot.all_plugins || []).filter(extractor => extractor.source === 'process' && extractor.status === 'failed').length;
+        const snapshotMeta = (snapshot.total_plugins || 0) > 0
+            ? processOnly
+                ? runningProcessCount > 0
+                    ? `Running ${runningProcessCount}/${snapshot.total_plugins || 0} setup hooks`
+                    : failedProcessCount > 0
+                        ? `${failedProcessCount} setup hook${failedProcessCount === 1 ? '' : 's'} failed`
+                        : `${snapshot.completed_plugins || 0}/${snapshot.total_plugins || 0} setup hooks`
+                : hasProcessEntries
+                    ? `${snapshot.completed_plugins || 0}/${snapshot.total_plugins || 0} tasks${(snapshot.failed_plugins || 0) > 0 ? ` <span style="color:#f85149">(${snapshot.failed_plugins} failed)</span>` : ''}${runningProcessCount > 0 ? ` <span style="color:#d29922">(${runningProcessCount} hooks running)</span>` : ''}`
+                    : `${snapshot.completed_plugins || 0}/${snapshot.total_plugins || 0} extractors${(snapshot.failed_plugins || 0) > 0 ? ` <span style="color:#f85149">(${snapshot.failed_plugins} failed)</span>` : ''}`
+            : 'Waiting for extractors...';
+
        return `
            <div class="snapshot-item">
                <div class="snapshot-header">
@@ -750,9 +767,7 @@
                        <div class="snapshot-info">
                            <div class="snapshot-url">${formatUrl(snapshot.url)}</div>
                            <div class="snapshot-meta">
-                                ${(snapshot.total_plugins || 0) > 0
-                                    ? `${snapshot.completed_plugins || 0}/${snapshot.total_plugins || 0} extractors${(snapshot.failed_plugins || 0) > 0 ? ` <span style="color:#f85149">(${snapshot.failed_plugins} failed)</span>` : ''}`
-                                    : 'Waiting for extractors...'}
+                                ${snapshotMeta}
                            </div>
                        </div>
                        ${snapshotPidHtml}
@@ -762,7 +777,7 @@
                </div>
                <div class="snapshot-progress">
                    <div class="progress-bar-container">
-                        <div class="progress-bar snapshot ${snapshot.status === 'started' && (snapshot.progress || 0) === 0 ? 'indeterminate' : ''}"
+                        <div class="progress-bar snapshot ${((processOnly && runningProcessCount > 0) || (snapshot.status === 'started' && (snapshot.progress || 0) === 0)) ? 'indeterminate' : ''}"
                             style="width: ${snapshot.progress || 0}%"></div>
                    </div>
                </div>
@@ -784,6 +799,29 @@
        if (crawl.active_snapshots && crawl.active_snapshots.length > 0) {
            snapshotsHtml = crawl.active_snapshots.map(s => renderSnapshot(s, crawl.id)).join('');
        }
+        let setupHtml = '';
+        if (crawl.setup_plugins && crawl.setup_plugins.length > 0) {
+            const setupSummary = `${crawl.setup_completed_plugins || 0}/${crawl.setup_total_plugins || 0} setup tasks${(crawl.setup_failed_plugins || 0) > 0 ? ` <span style="color:#f85149">(${crawl.setup_failed_plugins} failed)</span>` : ''}`;
+            const sortedSetup = [...crawl.setup_plugins].sort((a, b) =>
+                (a.plugin || '').localeCompare(b.plugin || '')
+            );
+            setupHtml = `
+                <div class="snapshot-item">
+                    <div class="snapshot-header">
+                        <div class="snapshot-header-link">
+                            <span class="snapshot-icon">&#9881;</span>
+                            <div class="snapshot-info">
+                                <div class="snapshot-url">Crawl Setup</div>
+                                <div class="snapshot-meta">${setupSummary}</div>
+                            </div>
+                        </div>
+                    </div>
+                    <div class="extractor-list">
+                        ${sortedSetup.map(e => renderExtractor(e)).join('')}
+                    </div>
+                </div>
+            `;
+        }

        // Show warning if crawl is stuck (queued but can't start)
        let warningHtml = '';
@@ -847,6 +885,7 @@
                ${warningHtml}
                <div class="crawl-body">
                    <div class="snapshot-list">
+                        ${setupHtml}
                        ${snapshotsHtml}
                    </div>
                </div>
--- a/archivebox/templates/core/add.html
+++ b/archivebox/templates/core/add.html
--- a/archivebox/templates/core/base.html
+++ b/archivebox/templates/core/base.html
@@ -1,4 +1,4 @@
-{% load static tz admin_urls %}
+{% load static tz admin_urls core_tags %}

 <!DOCTYPE html>
 <html lang="en">
@@ -9,6 +9,10 @@
        <link rel="stylesheet" href="{% static 'admin/css/base.css' %}">
        <link rel="stylesheet" href="{% static 'admin.css' %}">
        <link rel="stylesheet" href="{% static 'bootstrap.min.css' %}">
+        {% api_token as api_token %}
+        <script>
+            window.ARCHIVEBOX_API_KEY = "{{ api_token|escapejs }}";
+        </script>
        
        <script src="{% static 'jquery.min.js' %}"></script>
        {% block extra_head %}
--- a/archivebox/templates/core/navigation.html
+++ b/archivebox/templates/core/navigation.html
@@ -6,7 +6,7 @@
    <a href="/admin/core/tag/">Tags</a> |
    <a href="/admin/core/archiveresult/?o=-1">Log</a> &nbsp; &nbsp;
    <a href="{% url 'Docs' %}" target="_blank" rel="noopener noreferrer">Docs</a> | 
-    <a href="/api">API</a> | 
+    <a href="/api/v1/docs">API</a> | 
    <a href="{% url 'public-index' %}">Public</a> | 
    <a href="/admin/">Admin</a>
     &nbsp; &nbsp;
--- a/archivebox/templates/core/snapshot.html
+++ b/archivebox/templates/core/snapshot.html
@@ -456,6 +456,9 @@
                text-overflow: ellipsis;
                white-space: nowrap;
            }
+            .thumb-card:has([data-compact]) .card-text {
+                display: none;
+            }
            .thumb-card:has([data-compact]) .thumbnail-text-header,
            .thumb-card:has([data-compact]) .thumbnail-compact-icon,
            .thumb-card:has([data-compact]) .thumbnail-compact-label {
@@ -620,8 +623,9 @@
            <div class="header-top container-fluid">
                <div class="row nav">
                    <div class="col-lg-2" style="line-height: 50px; vertical-align: middle">
-                        <a href="../../index.html" class="header-archivebox" title="Go to Main Index...">
-                            <img src="/static/archive.png" alt="Archive Icon">
+                        {% public_base_url as public_base %}
+                        <a href="{% if public_base %}{{ public_base }}/public/{% else %}/{% endif %}" class="header-archivebox" title="Go to Public Index...">
+                            <img src="{% if public_base %}{{ public_base }}/static/archive.png{% else %}/static/archive.png{% endif %}" alt="Archive Icon">
                            ArchiveBox
                        </a>
                    </div>
@@ -683,12 +687,10 @@
                        <div class="info-chunk">
                            <h5>🗃&nbsp; Snapshot: <a href="{% admin_base_url %}/admin/core/snapshot/{{snapshot_id|default:id}}/change/"><code style="color: rgba(255,255,255,0.6); font-weight: 200; font-size: 12px; background-color: #1a1a1a"><b>[{{timestamp}}]</b> <small>{{snapshot_id|default:id|truncatechars:24}}</small></code></a></h5>
                            <a href="{% snapshot_url snapshot 'index.json' %}" title="JSON summary of archived link.">JSON</a> | 
-                            <a href="{% snapshot_url snapshot 'warc/' %}" title="Any WARC archives for the page">WARC</a> | 
-                            <a href="{% snapshot_url snapshot 'media/' %}" title="Audio, Video, and Subtitle files.">Media</a> | 
-                            <a href="{% snapshot_url snapshot 'git/' %}" title="Any git repos at the url">Git</a> | 
+                            <a href="{% snapshot_base_url snapshot %}/?files=1" title="Browse the full SNAP_DIR for this snapshot">See all files...</a> | 
                            <a href="{% admin_base_url %}/admin/core/snapshot/?q={{snapshot_id|default:id}}" title="Go to the Snapshot admin to update, overwrite, or delete this Snapshot">Actions</a> | 
                            <a href="{% admin_base_url %}/admin/core/snapshot/{{snapshot_id|default:id}}/change/" title="Edit this snapshot in the Admin UI">Admin</a> | 
-                            <a href="{% snapshot_base_url snapshot %}/?files=1" title="Webserver-provided index of files directory.">See all files...</a><br/>
+                            <a href="https://web.archive.org/web/{{url}}" title="Search for a copy of the URL saved in Archive.org" target="_blank" rel="noreferrer">Archive.org</a><br/>
                        </div>
                    </div>
                </div>
@@ -713,12 +715,12 @@
                                                <a href="{{display_url}}" data-no-preview="1" title="Download output file" download>⬇️</a>
                                            {% endif %}
                                        </div>
+                                    <a href="{{ display_url }}" target="preview">
+                                        <h4 class="card-title">{% plugin_icon result_info.name %} {{ result_info.name|plugin_name|truncatechars:20 }}</h4>
+                                    </a>
                                        <a href="{{ display_url }}" title="Open in new tab..." target="_blank" rel="noopener">
                                            <p class="card-text"><code>{{ result_info.path }}</code></p>
                                        </a>
-                                    <a href="{{ display_url }}" target="preview">
-                                        <h4 class="card-title">{{ result_info.name|title }}</h4>
-                                    </a>
                                    {% if result_info.result %}
                                        {% with plugin_base=result_info.name|plugin_name %}
                                            {% if plugin_base == 'ytdlp' or plugin_base == 'yt-dlp' or plugin_base == 'youtube-dl' %}
--- a/archivebox/templates/core/snapshot_live.html
+++ b/archivebox/templates/core/snapshot_live.html
@@ -902,9 +902,9 @@
            <div class="header-top">
                <div class="header-nav">
                    <div class="header-col header-left" style="line-height: 58px; vertical-align: middle">
-                        <a href="/" class="header-archivebox" title="Go to Main Index...">
-                            {% web_base_url as web_base %}
-                            <img src="{% if web_base %}//{{ web_base|cut:'http://'|cut:'https://' }}/static/archive.png{% else %}{% static 'archive.png' %}{% endif %}" alt="Archive Icon">
+                        {% public_base_url as public_base %}
+                        <a href="{% if public_base %}{{ public_base }}/public/{% else %}/{% endif %}" class="header-archivebox" title="Go to Public Index...">
+                            <img src="{% if public_base %}{{ public_base }}/static/archive.png{% else %}{% static 'archive.png' %}{% endif %}" alt="Archive Icon">
                            ArchiveBox
                        </a>
                    </div>
@@ -996,8 +996,7 @@
                        <br/>
                        <div class="external-links">
                            📁 &nbsp;
-                            <a href="{% snapshot_base_url snapshot %}/?files=1" title="Browse files for this snapshot" target="_blank">FILES</a> &nbsp;|&nbsp; 🗃️ 
-                            <a href="{% snapshot_url snapshot warc_path %}" title="Download the ArchiveBox-generated WARC file" target="_blank">WARC</a>  &nbsp;|&nbsp; 
+                            <a href="{% snapshot_base_url snapshot %}/?files=1" title="Browse the full SNAP_DIR for this snapshot" target="_blank">See all files...</a> &nbsp;|&nbsp;
                            <a href="https://web.archive.org/web/{{url}}" title="Search for a copy of the URL saved in Archive.org" target="_blank" rel="noreferrer">🏛️ Archive.org</a>
                            <!--<a href="https://archive.md/{{url}}" title="Search for a copy of the URL saved in Archive.today" target="_blank" rel="noreferrer">Archive.today</a>  &nbsp;|&nbsp; -->
                            <!--<a href="https://ghostarchive.org/search?term={{url}}" title="Search for a copy of the URL saved in GhostArchive.org" target="_blank" rel="noreferrer">More...</a>-->
@@ -1010,7 +1009,7 @@
                    
                    
                    {% for result in archiveresults %}
-                        {% with display_path=result.path|default:result.result.embed_path display_url='' %}
+                        {% with display_path=result.path display_url='' %}
                        {% if display_path %}{% snapshot_url snapshot display_path as display_url %}{% endif %}
                        <div class="thumb-card{% if forloop.first %} selected-card{% endif %}"{% if display_url %} data-preview-url="{{display_url}}"{% endif %}>
                                <div class="thumb-body">
--- a/archivebox/templates/static/add.css
+++ b/archivebox/templates/static/add.css
@@ -78,6 +78,7 @@ textarea, select, input[type="text"] {
  box-shadow: 4px 4px 4px rgba(0,0,0,0.02);
  width: 100%;
  padding: 8px 12px;
+  font-family: inherit;
  font-size: 14px;
 }

@@ -85,6 +86,10 @@ textarea {
  min-height: 300px;
 }

+input[type="text"] {
+  min-height: 42px;
+}
+
 textarea[rows="3"] {
  min-height: 80px;
 }
@@ -153,6 +158,13 @@ select {
  margin-bottom: 20px;
 }

+.settings-row {
+  display: grid;
+  grid-template-columns: minmax(260px, 340px) minmax(420px, 1fr);
+  gap: 18px;
+  align-items: start;
+}
+
 .form-field label {
  display: block;
  font-size: 16px;
@@ -160,6 +172,234 @@ select {
  margin-bottom: 8px;
 }

+.field-header {
+  display: flex;
+  align-items: center;
+  gap: 10px;
+  margin-bottom: 8px;
+}
+
+.field-header label {
+  margin-bottom: 0;
+}
+
+.url-workbench {
+  display: grid;
+  grid-template-columns: minmax(0, 1fr) minmax(280px, 360px);
+  gap: 18px;
+  align-items: start;
+}
+
+.url-editor-column {
+  min-width: 0;
+}
+
+.url-editor-shell {
+  position: relative;
+}
+
+.url-editor-shell textarea[name="url"] {
+  position: relative;
+  z-index: 2;
+  background: transparent;
+  color: #1f2937;
+  -webkit-text-fill-color: #1f2937;
+  caret-color: #1f2937;
+  min-height: 240px;
+  height: 240px;
+  line-height: 1.5;
+  resize: vertical;
+}
+
+.url-editor-shell textarea[name="url"]::selection {
+  background: rgba(0, 72, 130, 0.18);
+}
+
+.url-highlight-layer {
+  position: absolute;
+  inset: 2px;
+  z-index: 1;
+  margin: 0;
+  padding: 8px 12px;
+  overflow: auto;
+  pointer-events: none;
+  white-space: pre-wrap;
+  overflow-wrap: anywhere;
+  word-break: break-word;
+  font-family: inherit;
+  font-size: 14px;
+  line-height: 1.5;
+  color: transparent;
+  background: transparent;
+  border-radius: 2px;
+  scrollbar-width: none;
+}
+
+.url-highlight-layer::-webkit-scrollbar {
+  display: none;
+}
+
+.url-highlight-segment {
+  border-radius: 3px;
+}
+
+.detected-urls-panel {
+  display: flex;
+  flex-direction: column;
+  min-height: 240px;
+  padding: 12px 14px;
+  background: linear-gradient(180deg, #fff 0%, #f6f8fb 100%);
+  border: 1px solid #d7e2eb;
+  border-radius: 8px;
+  overflow: hidden;
+}
+
+.detected-urls-header {
+  display: flex;
+  align-items: baseline;
+  justify-content: space-between;
+  gap: 12px;
+  margin-bottom: 10px;
+}
+
+.detected-urls-summary {
+  font-size: 12px;
+  color: #5f6c78;
+}
+
+.detected-urls-list {
+  flex: 1;
+  min-height: 0;
+  display: grid;
+  align-content: start;
+  gap: 8px;
+  overflow: auto;
+  padding-right: 4px;
+}
+
+.detected-urls-empty {
+  padding: 8px 0;
+  color: #6b7280;
+  font-size: 13px;
+  line-height: 1.5;
+}
+
+.detected-url-item {
+  display: grid;
+  gap: 8px;
+  padding: 10px 12px;
+  border-left: 4px solid var(--detected-url-border, #d0d7de);
+  border-radius: 6px;
+  background: linear-gradient(90deg, var(--detected-url-bg, rgba(0, 0, 0, 0.03)), rgba(255, 255, 255, 0.96) 28%);
+}
+
+.detected-url-topline {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  gap: 8px;
+}
+
+.detected-url-controls {
+  display: flex;
+  flex-wrap: nowrap;
+  gap: 6px;
+  min-width: 0;
+}
+
+.detected-url-number {
+  width: 20px;
+  height: 20px;
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+  border-radius: 999px;
+  background: rgba(15, 23, 42, 0.08);
+  color: #24303b;
+  font-size: 10px;
+  font-weight: 700;
+}
+
+.detected-url-body {
+  min-width: 0;
+}
+
+.detected-url-value {
+  display: block;
+  font-size: 12px;
+  line-height: 1.45;
+  color: #1f2937;
+  overflow-wrap: anywhere;
+}
+
+.detected-url-toggle-btn {
+  flex: 0 0 auto;
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+  padding: 4px 8px;
+  min-height: 24px;
+  border: 1px solid rgba(148, 163, 184, 0.4);
+  border-radius: 999px;
+  background: rgba(148, 163, 184, 0.12);
+  color: #64748b;
+  font-size: 11px;
+  font-weight: 700;
+  line-height: 1;
+  white-space: nowrap;
+  transition: background-color 120ms ease, border-color 120ms ease, color 120ms ease;
+  cursor: pointer;
+}
+
+.detected-url-toggle-btn:hover {
+  background: rgba(15, 23, 42, 0.08);
+}
+
+.detected-url-toggle-btn-inactive:hover {
+  border-color: rgba(180, 35, 24, 0.28);
+  background: rgba(180, 35, 24, 0.10);
+  color: #b42318;
+}
+
+.detected-url-toggle-btn-active:hover {
+  border-color: rgba(22, 101, 52, 0.28);
+  background: rgba(22, 101, 52, 0.10);
+  color: #166534;
+}
+
+.detected-url-toggle-btn-disabled,
+.detected-url-toggle-btn-disabled:hover {
+  border-color: rgba(203, 213, 225, 0.55);
+  background: rgba(226, 232, 240, 0.45);
+  color: #94a3b8;
+  cursor: not-allowed;
+}
+
+.detected-url-message {
+  margin-top: 4px;
+  font-size: 11px;
+  color: #617080;
+  line-height: 1.45;
+}
+
+.detected-url-allowlisted .detected-url-value {
+  color: #166534;
+}
+
+.detected-url-denied .detected-url-value {
+  color: #b42318;
+  text-decoration: line-through;
+  text-decoration-thickness: 1.5px;
+}
+
+.detected-url-denied .detected-url-message {
+  color: #b42318;
+}
+
+.detected-url-filtered .detected-url-value {
+  color: #6b7280;
+}
+
 .form-field .help-text {
  font-size: 12px;
  color: #666;
@@ -173,7 +413,137 @@ select {
  margin-top: 4px;
 }

-/* Checkbox fields (for overwrite, update, index_only) */
+.tag-editor-container {
+  display: flex;
+  flex-wrap: wrap;
+  align-items: center;
+  gap: 6px;
+  padding: 8px 12px;
+  min-height: 44px;
+  background: #fff;
+  border: 2px solid #004882;
+  border-radius: 4px;
+  box-shadow: 4px 4px 4px rgba(0,0,0,0.02);
+  cursor: text;
+}
+
+.tag-editor-container:focus-within {
+  border-color: #2c7ec1;
+}
+
+.tag-pills {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 6px;
+  align-items: center;
+}
+
+.tag-pill {
+  display: inline-flex;
+  align-items: center;
+  gap: 4px;
+  padding: 4px 8px 4px 10px;
+  background: var(--tag-bg, #e2e8f0);
+  color: var(--tag-fg, #1e293b);
+  border-radius: 16px;
+  border: 1px solid var(--tag-border, #cbd5e1);
+  font-size: 13px;
+  font-weight: 500;
+}
+
+.tag-remove-btn {
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+  width: 16px;
+  height: 16px;
+  padding: 0;
+  margin: 0;
+  border: 1px solid rgba(15, 23, 42, 0.12);
+  border-radius: 50%;
+  background: rgba(15, 23, 42, 0.08);
+  color: inherit;
+  font-size: 14px;
+  line-height: 1;
+  cursor: pointer;
+}
+
+.tag-inline-input {
+  flex: 1;
+  min-width: 120px;
+  padding: 4px 0;
+  border: none !important;
+  box-shadow: none !important;
+  outline: none;
+  background: transparent;
+}
+
+.tag-inline-input::placeholder {
+  color: #7c8b98;
+}
+
+.url-filters-widget textarea {
+  min-height: 58px;
+  font-family: monospace;
+  font-size: 13px;
+}
+
+.url-filters-field > label {
+  display: none;
+}
+
+.url-filters-grid {
+  display: grid;
+  grid-template-columns: repeat(2, minmax(0, 1fr));
+  gap: 12px;
+}
+
+.url-filter-label-row {
+  display: flex;
+  align-items: baseline;
+  flex-wrap: nowrap;
+  gap: 10px;
+  width: 100%;
+  margin-bottom: 6px;
+}
+
+.url-filters-column .url-filter-label {
+  display: block;
+  font-size: 14px;
+  margin-bottom: 0;
+}
+
+.url-filter-label-main {
+  font-weight: 600;
+  white-space: nowrap;
+}
+
+.url-filter-label-note {
+  display: inline-block;
+  flex: 0 0 auto;
+  margin-left: auto;
+  font-size: 12px;
+  color: #7a7a7a;
+  font-weight: 400;
+  font-style: italic;
+  text-align: right;
+  white-space: nowrap;
+}
+
+.url-filters-toggle {
+  display: inline-flex !important;
+  align-items: center;
+  gap: 8px;
+  margin-top: 10px;
+  font-size: 14px !important;
+  font-weight: 600;
+}
+
+.url-filters-toggle input[type="checkbox"] {
+  width: auto;
+  margin: 0;
+}
+
 .checkbox-field {
  display: flex;
  align-items: center;
@@ -193,7 +563,6 @@ select {
 /* URL Counter */
 .url-counter {
  display: inline-block;
-  margin-top: 8px;
  padding: 4px 10px;
  font-size: 13px;
  font-weight: 600;
@@ -209,13 +578,27 @@ select {
  border-color: #c3e6cb;
 }

+@media (max-width: 1020px) {
+  .settings-row {
+    grid-template-columns: 1fr;
+  }
+
+  .url-workbench {
+    grid-template-columns: 1fr;
+  }
+
+  .url-filters-grid {
+    grid-template-columns: 1fr;
+  }
+}
+
 /* Plugin Presets */
 .plugin-presets {
  display: flex;
  flex-wrap: wrap;
  align-items: center;
  gap: 8px;
-  margin-bottom: 20px;
+  margin-bottom: 18px;
  padding: 15px;
  background-color: #f8f9fa;
  border: 1px solid #dee2e6;
@@ -254,11 +637,18 @@ select {

 /* Plugin groups */
 .plugin-group {
-  margin-bottom: 20px;
-  padding: 15px;
+  padding: 14px 16px;
  background-color: white;
  border: 1px solid #ddd;
  border-radius: 6px;
+  min-width: 0;
+}
+
+.plugin-groups-grid {
+  display: grid;
+  grid-template-columns: repeat(2, minmax(280px, 1fr));
+  gap: 16px;
+  align-items: start;
 }

 .plugin-group-header {
@@ -268,6 +658,7 @@ select {
  margin-bottom: 12px;
  padding-bottom: 8px;
  border-bottom: 2px solid #004882;
+  gap: 12px;
 }

 .plugin-group-header label {
@@ -277,6 +668,12 @@ select {
  margin: 0;
 }

+.plugin-group-note {
+  font-size: 12px;
+  color: #7a7a7a;
+  white-space: nowrap;
+}
+
 .select-all-btn {
  padding: 4px 12px;
  font-size: 12px;
@@ -293,42 +690,105 @@ select {

 .plugin-checkboxes {
  display: grid;
-  grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
-  gap: 8px;
+  grid-template-columns: 1fr;
+  gap: 6px;
 }

-.plugin-checkboxes ul {
-  list-style-type: none;
-  padding: 0;
-  margin: 0;
-  display: contents;
+.plugin-checkboxes > div {
+  display: grid;
+  grid-template-columns: repeat(2, minmax(0, 1fr));
+  gap: 6px 10px;
 }

-.plugin-checkboxes li {
+.plugin-checkboxes > div > div {
  display: flex;
  align-items: center;
  gap: 8px;
-  padding: 6px;
+  padding: 6px 8px;
+  border: 1px solid #e3e8ef;
+  background-color: #fff;
  border-radius: 4px;
  transition: background-color 0.2s;
 }

-.plugin-checkboxes li:hover {
+.plugin-checkboxes > div > div:hover {
  background-color: #f5f5f5;
 }

 .plugin-checkboxes input[type="checkbox"] {
+  grid-column: 2;
+  grid-row: 1 / span 2;
  margin: 0;
+  margin-top: 2px;
  width: auto;
+  flex: 0 0 auto;
 }

-.plugin-checkboxes label {
+#add-form .plugin-checkboxes label {
+  display: grid !important;
+  grid-template-columns: 18px 16px minmax(0, 1fr);
+  column-gap: 8px;
+  row-gap: 3px;
+  align-items: start;
+  width: 100%;
  margin: 0;
  font-size: 14px;
  font-weight: normal;
  cursor: pointer;
 }

+.plugin-choice-name {
+  grid-column: 3;
+  grid-row: 1;
+  font-weight: 500;
+  color: #1f2937;
+}
+
+#add-form .plugin-choice-icon {
+  grid-column: 1;
+  grid-row: 1 / span 2;
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+  color: #7a7a7a;
+  flex: 0 0 auto;
+}
+
+#add-form .plugin-choice-icon .abx-output-icon {
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+}
+
+#add-form .plugin-choice-icon svg {
+  width: 18px;
+  height: 18px;
+}
+
+#add-form .plugin-choice-description {
+  grid-column: 3;
+  grid-row: 2;
+  margin-left: 0;
+  display: inline-block;
+  font-size: 12px;
+  color: #7a7a7a !important;
+  text-decoration: none !important;
+  text-align: left;
+}
+
+#add-form .plugin-checkboxes label a.plugin-choice-description:link,
+#add-form .plugin-checkboxes label a.plugin-choice-description:visited,
+#add-form .plugin-checkboxes label a.plugin-choice-description:active {
+  color: #7a7a7a !important;
+  text-decoration: none !important;
+}
+
+#add-form .plugin-checkboxes label a.plugin-choice-description:hover,
+#add-form .plugin-checkboxes label a.plugin-choice-description:focus {
+  color: #4b5563 !important;
+  text-decoration: underline !important;
+}
+
 /* Advanced section (collapsible) */
 .advanced-section {
  background-color: white;
@@ -388,6 +848,14 @@ input:focus, select:focus, textarea:focus, button:focus {
    grid-template-columns: 1fr;
  }

+  .plugin-groups-grid {
+    grid-template-columns: 1fr;
+  }
+
+  .plugin-checkboxes > div {
+    grid-template-columns: 1fr;
+  }
+
  .plugin-group-header {
    flex-direction: column;
    align-items: flex-start;
--- a/archivebox/templates/static/admin.css
+++ b/archivebox/templates/static/admin.css
@@ -477,6 +477,10 @@ body.model-snapshot.change-list #content .object-tools {
    max-width: 220px;
 }

+#content td.field-tags_inline .tag-editor-inline.readonly {
+    padding-right: 0;
+}
+
 #content th.field-tags_inline,
 #content td.field-tags_inline {
    max-width: 220px;
@@ -610,6 +614,56 @@ body.model-snapshot.change-list #content .object-tools {
    border-radius: 4px;
 }

+body.model-archiveresult.change-list #result_list td.field-cmd_str {
+    width: 300px !important;
+    max-width: 300px !important;
+    min-width: 300px !important;
+}
+
+body.model-archiveresult.change-list #result_list td.field-cmd_str > div,
+body.model-archiveresult.change-list #result_list td.field-cmd_str code {
+    max-width: 300px !important;
+}
+
+body.model-archiveresult.change-list #result_list {
+    table-layout: fixed;
+    width: 100%;
+}
+
+body.model-archiveresult.change-list #result_list th.column-cmd_str,
+body.model-archiveresult.change-list #result_list td.field-cmd_str {
+    width: 300px !important;
+    max-width: 300px !important;
+    min-width: 300px !important;
+    overflow: hidden !important;
+    box-sizing: border-box;
+}
+
+body.model-archiveresult.change-list #result_list th.column-process_link,
+body.model-archiveresult.change-list #result_list td.field-process_link {
+    width: 72px;
+    white-space: nowrap;
+}
+
+body.model-archiveresult.change-list #result_list th.column-machine_link,
+body.model-archiveresult.change-list #result_list td.field-machine_link {
+    width: 180px;
+}
+
+body.model-archiveresult.change-list #result_list td.field-snapshot_info a {
+    display: block;
+    overflow: hidden;
+    text-overflow: ellipsis;
+}
+
+body.model-archiveresult.change-list #result_list td.field-cmd_str > div,
+body.model-archiveresult.change-list #result_list td.field-cmd_str code {
+    width: 300px !important;
+    min-width: 300px !important;
+    max-width: 300px !important;
+    box-sizing: border-box;
+}
+
 body.filters-collapsed #content #changelist-filter {
    display: none !important;
 }
@@ -637,10 +691,49 @@ body.filters-collapsed .filtered div.xfull {
    font-variant: small-caps;
 }

-#result_list tbody td.field-status {
+#result_list tbody td.field-status,
+#result_list tbody td.field-status_badge {
    font-variant: small-caps;
 }

+body.model-archiveresult.filters-collapsed.change-list #changelist .changelist-form-container {
+    gap: 0 !important;
+}
+
+body.model-archiveresult.filters-collapsed.change-list #changelist .changelist-form-container > div,
+body.model-archiveresult.filters-collapsed.change-list #changelist .results,
+body.model-archiveresult.filters-collapsed.change-list #changelist .paginator,
+body.model-archiveresult.filters-collapsed.change-list #changelist #toolbar,
+body.model-archiveresult.filters-collapsed.change-list #changelist #changelist-form,
+body.model-archiveresult.filters-collapsed.change-list #changelist #result_list {
+    width: 100% !important;
+    max-width: 100% !important;
+    margin-right: 0 !important;
+}
+
+body.model-archiveresult.change-list #result_list tbody tr {
+    transition: background-color 0.15s ease, opacity 0.15s ease;
+}
+
+body.model-archiveresult.change-list #result_list tbody tr:has(td.field-status_badge .status-badge.started),
+body.model-archiveresult.change-list #result_list tbody tr:has(td.field-status_badge .status-badge.backoff) {
+    background: rgba(251, 191, 36, 0.14);
+}
+
+body.model-archiveresult.change-list #result_list tbody tr:has(td.field-status_badge .status-badge.failed) {
+    background: rgba(239, 68, 68, 0.12);
+}
+
+body.model-archiveresult.change-list #result_list tbody tr:has(td.field-status_badge .status-badge.succeeded) {
+    background: rgba(34, 197, 94, 0.11);
+}
+
+body.model-archiveresult.change-list #result_list tbody tr:has(td.field-status_badge .status-badge.skipped),
+body.model-archiveresult.change-list #result_list tbody tr:has(td.field-status_badge .status-badge.noresults) {
+    background: rgba(148, 163, 184, 0.10);
+    opacity: 0.82;
+}
+
 .inline-group .tabular td.original p {
    margin-top: -28px;
 }
@@ -697,6 +790,7 @@ tbody .output-link:hover {opacity: 1;}
 .status-badge.failed { background: #fee2e2; color: #ef4444; }
 .status-badge.backoff { background: #fef3c7; color: #f59e0b; }
 .status-badge.skipped { background: #f3f4f6; color: #6b7280; }
+.status-badge.noresults { background: #f1f5f9; color: #64748b; }

 /* Progress Bar */
 .snapshot-progress-bar {
--- a/archivebox/tests/test_add_view.py
+++ b/archivebox/tests/test_add_view.py
@@ -0,0 +1,195 @@
+import re
+
+import pytest
+from django.contrib.auth import get_user_model
+from django.urls import reverse
+
+from archivebox.config.common import SERVER_CONFIG, SEARCH_BACKEND_CONFIG
+from archivebox.core.models import Tag
+from archivebox.crawls.models import Crawl
+
+
+pytestmark = pytest.mark.django_db
+
+User = get_user_model()
+WEB_HOST = 'web.archivebox.localhost:8000'
+ADMIN_HOST = 'admin.archivebox.localhost:8000'
+
+
+@pytest.fixture
+def admin_user(db):
+    return User.objects.create_superuser(
+        username='addviewadmin',
+        email='addviewadmin@test.com',
+        password='testpassword',
+    )
+
+
+def test_add_view_renders_tag_editor_and_url_filter_fields(client, admin_user, monkeypatch):
+    monkeypatch.setattr(SERVER_CONFIG, 'PUBLIC_ADD_VIEW', True)
+
+    response = client.get(reverse('add'), HTTP_HOST=WEB_HOST)
+    body = response.content.decode()
+
+    assert response.status_code == 200
+    assert 'tag-editor-container' in body
+    assert 'name="url_filters_allowlist"' in body
+    assert 'name="url_filters_denylist"' in body
+    assert 'Same domain only' in body
+    assert 'name="persona"' in body
+    assert 'Overwrite existing snapshots' not in body
+    assert 'Update/retry previously failed URLs' not in body
+    assert 'Index only dry run (add crawl but don&#x27;t archive yet)' in body
+    assert 'name="notes"' in body
+    assert '<input type="text" name="notes"' in body
+    assert body.index('name="persona"') < body.index('<h3>Crawl Plugins</h3>')
+    assert 'data-url-regex=' in body
+    assert 'id="url-highlight-layer"' in body
+    assert 'id="detected-urls-list"' in body
+    assert 'detected-url-toggle-btn' in body
+
+
+def test_add_view_checks_configured_search_backend_by_default(client, monkeypatch):
+    monkeypatch.setattr(SERVER_CONFIG, 'PUBLIC_ADD_VIEW', True)
+    monkeypatch.setattr(SEARCH_BACKEND_CONFIG, 'SEARCH_BACKEND_ENGINE', 'sqlite')
+
+    response = client.get(reverse('add'), HTTP_HOST=WEB_HOST)
+    body = response.content.decode()
+
+    assert response.status_code == 200
+    assert re.search(
+        r'<input type="checkbox" name="search_plugins" value="search_backend_sqlite"[^>]* checked\b',
+        body,
+    )
+    assert "const requiredSearchPlugin = 'search_backend_sqlite';" in body
+
+
+def test_add_view_creates_crawl_with_tag_and_url_filter_overrides(client, admin_user, monkeypatch):
+    monkeypatch.setattr(SERVER_CONFIG, 'PUBLIC_ADD_VIEW', True)
+    client.force_login(admin_user)
+
+    response = client.post(
+        reverse('add'),
+        data={
+            'url': 'https://example.com\nhttps://cdn.example.com/asset.js',
+            'tag': 'alpha,beta',
+            'depth': '1',
+            'url_filters_allowlist': 'example.com\n*.example.com',
+            'url_filters_denylist': 'cdn.example.com',
+            'notes': 'Created from /add/',
+            'schedule': '',
+            'persona': 'Default',
+            'index_only': '',
+            'config': '{}',
+        },
+        HTTP_HOST=WEB_HOST,
+    )
+
+    assert response.status_code == 302
+
+    crawl = Crawl.objects.order_by('-created_at').first()
+    assert crawl is not None
+    assert crawl.tags_str == 'alpha,beta'
+    assert crawl.notes == 'Created from /add/'
+    assert crawl.config.get('DEFAULT_PERSONA') == 'Default'
+    assert crawl.config['URL_ALLOWLIST'] == 'example.com\n*.example.com'
+    assert crawl.config['URL_DENYLIST'] == 'cdn.example.com'
+    assert 'OVERWRITE' not in crawl.config
+    assert 'ONLY_NEW' not in crawl.config
+
+
+def test_add_view_extracts_urls_from_mixed_text_input(client, admin_user, monkeypatch):
+    monkeypatch.setattr(SERVER_CONFIG, 'PUBLIC_ADD_VIEW', True)
+    client.force_login(admin_user)
+
+    response = client.post(
+        reverse('add'),
+        data={
+            'url': '\n'.join([
+                'https://sweeting.me,https://google.com',
+                'Notes: [ArchiveBox](https://github.com/ArchiveBox/ArchiveBox), https://news.ycombinator.com',
+                '[Wiki](https://en.wikipedia.org/wiki/Classification_(machine_learning))',
+                '{"items":["https://example.com/three"]}',
+                'csv,https://example.com/four',
+            ]),
+            'tag': '',
+            'depth': '0',
+            'url_filters_allowlist': '',
+            'url_filters_denylist': '',
+            'notes': '',
+            'schedule': '',
+            'persona': 'Default',
+            'index_only': '',
+            'config': '{}',
+        },
+        HTTP_HOST=WEB_HOST,
+    )
+
+    assert response.status_code == 302
+
+    crawl = Crawl.objects.order_by('-created_at').first()
+    assert crawl is not None
+    assert crawl.urls == '\n'.join([
+        'https://sweeting.me',
+        'https://google.com',
+        'https://github.com/ArchiveBox/ArchiveBox',
+        'https://news.ycombinator.com',
+        'https://en.wikipedia.org/wiki/Classification_(machine_learning)',
+        'https://example.com/three',
+        'https://example.com/four',
+    ])
+
+
+def test_add_view_exposes_api_token_for_tag_widget_autocomplete(client, admin_user, monkeypatch):
+    monkeypatch.setattr(SERVER_CONFIG, 'PUBLIC_ADD_VIEW', True)
+    client.force_login(admin_user)
+
+    response = client.get(reverse('add'), HTTP_HOST=WEB_HOST)
+
+    assert response.status_code == 200
+    assert b'window.ARCHIVEBOX_API_KEY' in response.content
+
+
+def test_tags_autocomplete_requires_auth_when_public_snapshots_list_disabled(client, settings):
+    settings.PUBLIC_SNAPSHOTS_LIST = False
+    settings.PUBLIC_INDEX = False
+    Tag.objects.create(name='archive')
+
+    response = client.get(
+        reverse('api-1:tags_autocomplete'),
+        {'q': 'a'},
+        HTTP_HOST=ADMIN_HOST,
+    )
+
+    assert response.status_code == 401
+
+
+def test_tags_autocomplete_allows_public_access_when_public_snapshots_list_enabled(client, settings):
+    settings.PUBLIC_SNAPSHOTS_LIST = True
+    settings.PUBLIC_INDEX = False
+    Tag.objects.create(name='archive')
+
+    response = client.get(
+        reverse('api-1:tags_autocomplete'),
+        {'q': 'a'},
+        HTTP_HOST=ADMIN_HOST,
+    )
+
+    assert response.status_code == 200
+    assert response.json()['tags'][0]['name'] == 'archive'
+
+
+def test_tags_autocomplete_allows_authenticated_user_when_public_snapshots_list_disabled(client, admin_user, settings):
+    settings.PUBLIC_SNAPSHOTS_LIST = False
+    settings.PUBLIC_INDEX = False
+    Tag.objects.create(name='archive')
+    client.force_login(admin_user)
+
+    response = client.get(
+        reverse('api-1:tags_autocomplete'),
+        {'q': 'a'},
+        HTTP_HOST=ADMIN_HOST,
+    )
+
+    assert response.status_code == 200
+    assert response.json()['tags'][0]['name'] == 'archive'
--- a/archivebox/tests/test_admin_config_widget.py
+++ b/archivebox/tests/test_admin_config_widget.py
@@ -0,0 +1,151 @@
+from archivebox.base_models.admin import KeyValueWidget
+
+
+def test_key_value_widget_renders_enum_autocomplete_metadata(monkeypatch):
+    monkeypatch.setattr(
+        KeyValueWidget,
+        '_get_config_options',
+        lambda self: {
+            'CHROME_WAIT_FOR': {
+                'plugin': 'chrome',
+                'type': 'string',
+                'default': 'networkidle2',
+                'description': 'Page load completion condition',
+                'enum': ['domcontentloaded', 'load', 'networkidle0', 'networkidle2'],
+            },
+        },
+    )
+
+    html = str(
+        KeyValueWidget().render(
+            'config',
+            {'CHROME_WAIT_FOR': 'load'},
+            attrs={'id': 'id_config'},
+        )
+    )
+
+    assert '"enum": ["domcontentloaded", "load", "networkidle0", "networkidle2"]' in html
+    assert 'class="kv-value-options"' in html
+    assert 'class="kv-help"' in html
+    assert 'configureValueInput_id_config' in html
+    assert 'describeMeta_id_config' in html
+    assert 'validateValueAgainstMeta_id_config' in html
+
+
+def test_key_value_widget_renders_numeric_and_pattern_constraints(monkeypatch):
+    monkeypatch.setattr(
+        KeyValueWidget,
+        '_get_config_options',
+        lambda self: {
+            'TIMEOUT': {
+                'plugin': 'base',
+                'type': 'integer',
+                'default': 60,
+                'description': 'Timeout in seconds',
+                'minimum': 5,
+                'maximum': 120,
+            },
+            'CHROME_RESOLUTION': {
+                'plugin': 'chrome',
+                'type': 'string',
+                'default': '1440,2000',
+                'description': 'Viewport resolution',
+                'pattern': '^\\d+,\\d+$',
+            },
+        },
+    )
+
+    html = str(KeyValueWidget().render('config', {}, attrs={'id': 'id_config'}))
+
+    assert '"minimum": 5' in html
+    assert '"maximum": 120' in html
+    assert '"pattern": "^\\\\d+,\\\\d+$"' in html
+    assert 'Expected: ' in html
+    assert 'Example: ' in html
+    assert 'setValueValidationState_id_config' in html
+    assert 'coerceValueForStorage_id_config' in html
+
+
+def test_key_value_widget_accepts_common_boolean_spellings(monkeypatch):
+    monkeypatch.setattr(
+        KeyValueWidget,
+        '_get_config_options',
+        lambda self: {
+            'DEBUG': {
+                'plugin': 'base',
+                'type': 'boolean',
+                'default': False,
+                'description': 'Enable debug mode',
+            },
+        },
+    )
+
+    html = str(KeyValueWidget().render('config', {'DEBUG': 'True'}, attrs={'id': 'id_config'}))
+
+    assert "enumValues = ['True', 'False']" in html
+    assert "raw.toLowerCase()" in html
+    assert "lowered === 'true' || raw === '1'" in html
+    assert "lowered === 'false' || raw === '0'" in html
+
+
+def test_key_value_widget_shows_array_and_object_examples_and_binary_rules(monkeypatch):
+    monkeypatch.setattr(
+        KeyValueWidget,
+        '_get_config_options',
+        lambda self: {
+            'WGET_ARGS_EXTRA': {
+                'plugin': 'wget',
+                'type': 'array',
+                'default': [],
+                'description': 'Extra arguments to append to wget command',
+            },
+            'SAVE_ALLOWLIST': {
+                'plugin': 'base',
+                'type': 'object',
+                'default': {},
+                'description': 'Regex allowlist mapped to enabled methods',
+            },
+            'WGET_BINARY': {
+                'plugin': 'wget',
+                'type': 'string',
+                'default': 'wget',
+                'description': 'Path to wget binary',
+            },
+        },
+    )
+
+    html = str(KeyValueWidget().render('config', {}, attrs={'id': 'id_config'}))
+
+    assert 'Example: ["--extra-arg"]' in html
+    assert 'Example: {"^https://example\\\\.com": ["wget"]}' in html
+    assert 'Example: wget or /usr/bin/wget' in html
+    assert 'validateBinaryValue_id_config' in html
+    assert "meta.key.endsWith('_BINARY')" in html
+    assert "Binary paths cannot contain quotes" in html
+
+
+def test_key_value_widget_falls_back_to_binary_validation_for_unknown_binary_keys(monkeypatch):
+    monkeypatch.setattr(
+        KeyValueWidget,
+        '_get_config_options',
+        lambda self: {
+            'CHROME_BINARY': {
+                'plugin': 'base',
+                'type': 'string',
+                'default': '',
+                'description': 'Resolved Chromium/Chrome binary path shared across plugins',
+            },
+        },
+    )
+
+    html = str(
+        KeyValueWidget().render(
+            'config',
+            {'NODE_BINARY': '/opt/homebrew/bin/node'},
+            attrs={'id': 'id_config'},
+        )
+    )
+
+    assert 'function getMetaForKey_id_config' in html
+    assert "if (key.endsWith('_BINARY'))" in html
+    assert 'Path to binary executable' in html
--- a/archivebox/tests/test_admin_links.py
+++ b/archivebox/tests/test_admin_links.py
@@ -0,0 +1,127 @@
+import pytest
+from django.contrib.admin.sites import AdminSite
+from uuid import uuid4
+
+
+pytestmark = pytest.mark.django_db
+
+
+def _create_snapshot():
+    from archivebox.base_models.models import get_or_create_system_user_pk
+    from archivebox.crawls.models import Crawl
+    from archivebox.core.models import Snapshot
+
+    crawl = Crawl.objects.create(
+        urls="https://example.com",
+        created_by_id=get_or_create_system_user_pk(),
+    )
+    return Snapshot.objects.create(
+        url="https://example.com",
+        crawl=crawl,
+        status=Snapshot.StatusChoices.STARTED,
+    )
+
+
+def _create_machine():
+    from archivebox.machine.models import Machine
+
+    return Machine.objects.create(
+        guid=f'test-guid-{uuid4()}',
+        hostname='test-host',
+        hw_in_docker=False,
+        hw_in_vm=False,
+        hw_manufacturer='Test',
+        hw_product='Test Product',
+        hw_uuid=f'test-hw-{uuid4()}',
+        os_arch='arm64',
+        os_family='darwin',
+        os_platform='macOS',
+        os_release='14.0',
+        os_kernel='Darwin',
+        stats={},
+        config={},
+    )
+
+
+def _create_iface(machine):
+    from archivebox.machine.models import NetworkInterface
+
+    return NetworkInterface.objects.create(
+        machine=machine,
+        mac_address='00:11:22:33:44:66',
+        ip_public='203.0.113.11',
+        ip_local='10.0.0.11',
+        dns_server='1.1.1.1',
+        hostname='test-host',
+        iface='en0',
+        isp='Test ISP',
+        city='Test City',
+        region='Test Region',
+        country='Test Country',
+    )
+
+
+def test_archiveresult_admin_links_plugin_and_process():
+    from archivebox.core.admin_archiveresults import ArchiveResultAdmin
+    from archivebox.core.models import ArchiveResult
+    from archivebox.machine.models import Process
+
+    snapshot = _create_snapshot()
+    iface = _create_iface(_create_machine())
+    process = Process.objects.create(
+        machine=iface.machine,
+        iface=iface,
+        process_type=Process.TypeChoices.HOOK,
+        pwd=str(snapshot.output_dir / 'wget'),
+        cmd=['/tmp/on_Snapshot__06_wget.finite.bg.py', '--url=https://example.com'],
+        status=Process.StatusChoices.EXITED,
+    )
+    result = ArchiveResult.objects.create(
+        snapshot=snapshot,
+        plugin='wget',
+        hook_name='on_Snapshot__06_wget.finite.bg.py',
+        process=process,
+        status=ArchiveResult.StatusChoices.SUCCEEDED,
+    )
+
+    admin = ArchiveResultAdmin(ArchiveResult, AdminSite())
+
+    plugin_html = str(admin.plugin_with_icon(result))
+    process_html = str(admin.process_link(result))
+
+    assert '/admin/environment/plugins/builtin.wget/' in plugin_html
+    assert f'/admin/machine/process/{process.id}/change' in process_html
+
+
+def test_process_admin_links_binary_and_iface():
+    from archivebox.machine.admin import ProcessAdmin
+    from archivebox.machine.models import Binary, Process
+
+    machine = _create_machine()
+    iface = _create_iface(machine)
+    binary = Binary.objects.create(
+        machine=machine,
+        name='wget',
+        abspath='/usr/local/bin/wget',
+        version='1.21.2',
+        binprovider='env',
+        binproviders='env',
+        status=Binary.StatusChoices.INSTALLED,
+    )
+    process = Process.objects.create(
+        machine=machine,
+        iface=iface,
+        binary=binary,
+        process_type=Process.TypeChoices.HOOK,
+        pwd='/tmp/wget',
+        cmd=['/tmp/on_Snapshot__06_wget.finite.bg.py', '--url=https://example.com'],
+        status=Process.StatusChoices.EXITED,
+    )
+
+    admin = ProcessAdmin(Process, AdminSite())
+
+    binary_html = str(admin.binary_link(process))
+    iface_html = str(admin.iface_link(process))
+
+    assert f'/admin/machine/binary/{binary.id}/change' in binary_html
+    assert f'/admin/machine/networkinterface/{iface.id}/change' in iface_html
--- a/archivebox/tests/test_admin_views.py
+++ b/archivebox/tests/test_admin_views.py
@@ -9,11 +9,13 @@ Tests cover:
 """

 import pytest
+import uuid
 from typing import cast
 from django.test import override_settings
 from django.urls import reverse
 from django.contrib.auth import get_user_model
 from django.contrib.auth.models import UserManager
+from django.utils import timezone

 pytestmark = pytest.mark.django_db

@@ -195,6 +197,232 @@ class TestAdminSnapshotListView:
        assert b'snapshot-view-list' in response.content
        assert b'snapshot-view-grid' in response.content

+    def test_binary_change_view_renders(self, client, admin_user, db):
+        """Binary admin change form should load without FieldError."""
+        from archivebox.machine.models import Machine, Binary
+
+        machine = Machine.objects.create(
+            guid=f'test-guid-{uuid.uuid4()}',
+            hostname='test-host',
+            hw_in_docker=False,
+            hw_in_vm=False,
+            hw_manufacturer='Test',
+            hw_product='Test Product',
+            hw_uuid=f'test-hw-{uuid.uuid4()}',
+            os_arch='x86_64',
+            os_family='darwin',
+            os_platform='darwin',
+            os_release='test',
+            os_kernel='test-kernel',
+            stats={},
+        )
+        binary = Binary.objects.create(
+            machine=machine,
+            name='gallery-dl',
+            binproviders='env',
+            binprovider='env',
+            abspath='/opt/homebrew/bin/gallery-dl',
+            version='1.26.9',
+            sha256='abc123',
+            status=Binary.StatusChoices.INSTALLED,
+        )
+
+        client.login(username='testadmin', password='testpassword')
+        url = f'/admin/machine/binary/{binary.pk}/change/'
+        response = client.get(url, HTTP_HOST=ADMIN_HOST)
+
+        assert response.status_code == 200
+        assert b'gallery-dl' in response.content
+
+    def test_change_view_renders_real_redo_failed_action(self, client, admin_user, snapshot):
+        client.login(username='testadmin', password='testpassword')
+        url = reverse('admin:core_snapshot_change', args=[snapshot.pk])
+        response = client.get(url, HTTP_HOST=ADMIN_HOST)
+
+        assert response.status_code == 200
+        assert f'/admin/core/snapshot/{snapshot.pk}/redo-failed/'.encode() in response.content
+
+    def test_redo_failed_action_requeues_snapshot(self, client, admin_user, snapshot, monkeypatch):
+        import archivebox.core.admin_snapshots as admin_snapshots
+
+        queued = []
+
+        def fake_bg_archive_snapshot(obj, overwrite=False, methods=None):
+            queued.append((str(obj.pk), overwrite, methods))
+            return 1
+
+        monkeypatch.setattr(admin_snapshots, 'bg_archive_snapshot', fake_bg_archive_snapshot)
+
+        client.login(username='testadmin', password='testpassword')
+        url = reverse('admin:core_snapshot_redo_failed', args=[snapshot.pk])
+        response = client.post(url, HTTP_HOST=ADMIN_HOST)
+
+        assert response.status_code == 302
+        assert queued == [(str(snapshot.pk), False, None)]
+        assert response['Location'].endswith(f'/admin/core/snapshot/{snapshot.pk}/change/')
+
+
+class TestArchiveResultAdminListView:
+    def test_list_view_renders_readonly_tags_and_noresults_status(self, client, admin_user, snapshot):
+        from archivebox.core.models import ArchiveResult, Tag
+
+        tag = Tag.objects.create(name='Alpha Research')
+        snapshot.tags.add(tag)
+        ArchiveResult.objects.create(
+            snapshot=snapshot,
+            plugin='title',
+            status=ArchiveResult.StatusChoices.NORESULTS,
+            output_str='No title found',
+        )
+
+        client.login(username='testadmin', password='testpassword')
+        response = client.get(reverse('admin:core_archiveresult_changelist'), HTTP_HOST=ADMIN_HOST)
+
+        assert response.status_code == 200
+        assert b'Alpha Research' in response.content
+        assert b'tag-editor-inline readonly' in response.content
+        assert b'No Results' in response.content
+
+    def test_archiveresult_model_has_no_retry_at_field(self):
+        from archivebox.core.models import ArchiveResult
+
+        assert 'retry_at' not in {field.name for field in ArchiveResult._meta.fields}
+
+
+class TestLiveProgressView:
+    def test_live_progress_routes_crawl_process_rows_to_crawl_setup(self, client, admin_user, snapshot, db):
+        import archivebox.machine.models as machine_models
+        from archivebox.machine.models import Machine, Process
+
+        machine_models._CURRENT_MACHINE = None
+        machine = Machine.current()
+        Process.objects.create(
+            machine=machine,
+            process_type=Process.TypeChoices.HOOK,
+            status=Process.StatusChoices.RUNNING,
+            pid=43210,
+            cmd=['/plugins/chrome/on_Crawl__91_chrome_wait.js', '--url=https://example.com'],
+            env={
+                'CRAWL_ID': str(snapshot.crawl_id),
+                'SNAPSHOT_ID': str(snapshot.id),
+            },
+            started_at=timezone.now(),
+        )
+
+        client.login(username='testadmin', password='testpassword')
+        response = client.get(reverse('live_progress'), HTTP_HOST=ADMIN_HOST)
+
+        assert response.status_code == 200
+        payload = response.json()
+        active_crawl = next(crawl for crawl in payload['active_crawls'] if crawl['id'] == str(snapshot.crawl_id))
+        setup_entry = next(item for item in active_crawl['setup_plugins'] if item['source'] == 'process')
+        active_snapshot = next(item for item in active_crawl['active_snapshots'] if item['id'] == str(snapshot.id))
+        assert setup_entry['label'] == 'chrome wait'
+        assert setup_entry['status'] == 'started'
+        assert active_crawl['worker_pid'] == 43210
+        assert active_snapshot['all_plugins'] == []
+
+    def test_live_progress_uses_snapshot_process_rows_before_archiveresults(self, client, admin_user, snapshot, db):
+        import archivebox.machine.models as machine_models
+        from archivebox.machine.models import Machine, Process
+
+        machine_models._CURRENT_MACHINE = None
+        machine = Machine.current()
+        Process.objects.create(
+            machine=machine,
+            process_type=Process.TypeChoices.HOOK,
+            status=Process.StatusChoices.RUNNING,
+            pid=43211,
+            cmd=['/plugins/title/on_Snapshot__10_title.py', '--url=https://example.com'],
+            env={
+                'CRAWL_ID': str(snapshot.crawl_id),
+                'SNAPSHOT_ID': str(snapshot.id),
+            },
+            started_at=timezone.now(),
+        )
+
+        client.login(username='testadmin', password='testpassword')
+        response = client.get(reverse('live_progress'), HTTP_HOST=ADMIN_HOST)
+
+        assert response.status_code == 200
+        payload = response.json()
+        active_crawl = next(crawl for crawl in payload['active_crawls'] if crawl['id'] == str(snapshot.crawl_id))
+        active_snapshot = next(item for item in active_crawl['active_snapshots'] if item['id'] == str(snapshot.id))
+        assert active_snapshot['all_plugins'][0]['source'] == 'process'
+        assert active_snapshot['all_plugins'][0]['label'] == 'title'
+        assert active_snapshot['all_plugins'][0]['status'] == 'started'
+        assert active_snapshot['worker_pid'] == 43211
+
+    def test_live_progress_merges_process_rows_with_archiveresults_when_present(self, client, admin_user, snapshot, db):
+        import archivebox.machine.models as machine_models
+        from archivebox.core.models import ArchiveResult
+        from archivebox.machine.models import Machine, Process
+
+        machine_models._CURRENT_MACHINE = None
+        machine = Machine.current()
+        Process.objects.create(
+            machine=machine,
+            process_type=Process.TypeChoices.HOOK,
+            status=Process.StatusChoices.RUNNING,
+            pid=54321,
+            cmd=['/plugins/chrome/on_Snapshot__11_chrome_wait.js', '--url=https://example.com'],
+            env={
+                'CRAWL_ID': str(snapshot.crawl_id),
+                'SNAPSHOT_ID': str(snapshot.id),
+            },
+            started_at=timezone.now(),
+        )
+        ArchiveResult.objects.create(
+            snapshot=snapshot,
+            plugin='title',
+            status=ArchiveResult.StatusChoices.STARTED,
+        )
+
+        client.login(username='testadmin', password='testpassword')
+        response = client.get(reverse('live_progress'), HTTP_HOST=ADMIN_HOST)
+
+        assert response.status_code == 200
+        payload = response.json()
+        active_crawl = next(crawl for crawl in payload['active_crawls'] if crawl['id'] == str(snapshot.crawl_id))
+        active_snapshot = next(item for item in active_crawl['active_snapshots'] if item['id'] == str(snapshot.id))
+        sources = {item['source'] for item in active_snapshot['all_plugins']}
+        plugins = {item['plugin'] for item in active_snapshot['all_plugins']}
+        assert sources == {'archiveresult', 'process'}
+        assert 'title' in plugins
+        assert 'chrome' in plugins
+
+    def test_live_progress_omits_pid_for_exited_process_rows(self, client, admin_user, snapshot, db):
+        import archivebox.machine.models as machine_models
+        from archivebox.machine.models import Machine, Process
+
+        machine_models._CURRENT_MACHINE = None
+        machine = Machine.current()
+        Process.objects.create(
+            machine=machine,
+            process_type=Process.TypeChoices.HOOK,
+            status=Process.StatusChoices.EXITED,
+            exit_code=0,
+            pid=99999,
+            cmd=['/plugins/title/on_Snapshot__10_title.py', '--url=https://example.com'],
+            env={
+                'CRAWL_ID': str(snapshot.crawl_id),
+                'SNAPSHOT_ID': str(snapshot.id),
+            },
+            started_at=timezone.now(),
+            ended_at=timezone.now(),
+        )
+
+        client.login(username='testadmin', password='testpassword')
+        response = client.get(reverse('live_progress'), HTTP_HOST=ADMIN_HOST)
+
+        assert response.status_code == 200
+        payload = response.json()
+        active_crawl = next(crawl for crawl in payload['active_crawls'] if crawl['id'] == str(snapshot.crawl_id))
+        active_snapshot = next(item for item in active_crawl['active_snapshots'] if item['id'] == str(snapshot.id))
+        process_entry = next(item for item in active_snapshot['all_plugins'] if item['source'] == 'process')
+        assert process_entry['status'] == 'succeeded'
+        assert 'pid' not in process_entry
+

 class TestAdminSnapshotSearch:
    """Tests for admin snapshot search functionality."""
--- a/archivebox/tests/test_archive_result_service.py
+++ b/archivebox/tests/test_archive_result_service.py
@@ -0,0 +1,305 @@
+from pathlib import Path
+from uuid import uuid4
+
+import pytest
+from django.db import connection
+
+from abx_dl.events import ProcessCompletedEvent, ProcessStartedEvent
+from abx_dl.orchestrator import create_bus
+
+
+pytestmark = pytest.mark.django_db
+
+
+def _cleanup_machine_process_rows() -> None:
+    with connection.cursor() as cursor:
+        cursor.execute("DELETE FROM machine_process")
+
+
+def _create_snapshot():
+    from archivebox.base_models.models import get_or_create_system_user_pk
+    from archivebox.crawls.models import Crawl
+    from archivebox.core.models import Snapshot
+
+    crawl = Crawl.objects.create(
+        urls="https://example.com",
+        created_by_id=get_or_create_system_user_pk(),
+    )
+    return Snapshot.objects.create(
+        url="https://example.com",
+        crawl=crawl,
+        status=Snapshot.StatusChoices.STARTED,
+    )
+
+
+def _create_machine():
+    from archivebox.machine.models import Machine
+
+    return Machine.objects.create(
+        guid=f'test-guid-{uuid4()}',
+        hostname='test-host',
+        hw_in_docker=False,
+        hw_in_vm=False,
+        hw_manufacturer='Test',
+        hw_product='Test Product',
+        hw_uuid=f'test-hw-{uuid4()}',
+        os_arch='arm64',
+        os_family='darwin',
+        os_platform='macOS',
+        os_release='14.0',
+        os_kernel='Darwin',
+        stats={},
+        config={},
+    )
+
+
+def _create_iface(machine):
+    from archivebox.machine.models import NetworkInterface
+
+    return NetworkInterface.objects.create(
+        machine=machine,
+        mac_address='00:11:22:33:44:55',
+        ip_public='203.0.113.10',
+        ip_local='10.0.0.10',
+        dns_server='1.1.1.1',
+        hostname='test-host',
+        iface='en0',
+        isp='Test ISP',
+        city='Test City',
+        region='Test Region',
+        country='Test Country',
+    )
+
+
+def test_process_completed_projects_inline_archiveresult():
+    from archivebox.core.models import ArchiveResult
+    from archivebox.services.archive_result_service import ArchiveResultService, _collect_output_metadata
+    from archivebox.services.process_service import ProcessService
+
+    snapshot = _create_snapshot()
+    plugin_dir = Path(snapshot.output_dir) / "wget"
+    plugin_dir.mkdir(parents=True, exist_ok=True)
+    (plugin_dir / "index.html").write_text("<html>ok</html>")
+
+    bus = create_bus(name="test_inline_archiveresult")
+    process_service = ProcessService(bus)
+    service = ArchiveResultService(bus, process_service=process_service)
+
+    event = ProcessCompletedEvent(
+        plugin_name="wget",
+        hook_name="on_Snapshot__06_wget.finite.bg",
+        stdout='{"snapshot_id":"%s","type":"ArchiveResult","status":"succeeded","output_str":"wget/index.html"}\n' % snapshot.id,
+        stderr="",
+        exit_code=0,
+        output_dir=str(plugin_dir),
+        output_files=["index.html"],
+        process_id="proc-inline",
+        snapshot_id=str(snapshot.id),
+        start_ts="2026-03-22T12:00:00+00:00",
+        end_ts="2026-03-22T12:00:01+00:00",
+    )
+
+    output_files, output_size, output_mimetypes = _collect_output_metadata(plugin_dir)
+    service._project_from_process_completed(
+        event,
+        {
+            "snapshot_id": str(snapshot.id),
+            "plugin": "wget",
+            "hook_name": "on_Snapshot__06_wget.finite.bg",
+            "status": "succeeded",
+            "output_str": "wget/index.html",
+        },
+        output_files,
+        output_size,
+        output_mimetypes,
+    )
+
+    result = ArchiveResult.objects.get(snapshot=snapshot, plugin="wget", hook_name="on_Snapshot__06_wget.finite.bg")
+    assert result.status == ArchiveResult.StatusChoices.SUCCEEDED
+    assert result.output_str == "wget/index.html"
+    assert "index.html" in result.output_files
+    _cleanup_machine_process_rows()
+
+
+def test_process_completed_projects_synthetic_failed_archiveresult():
+    from archivebox.core.models import ArchiveResult
+    from archivebox.services.archive_result_service import ArchiveResultService, _collect_output_metadata
+    from archivebox.services.process_service import ProcessService
+
+    snapshot = _create_snapshot()
+    plugin_dir = Path(snapshot.output_dir) / "chrome"
+    plugin_dir.mkdir(parents=True, exist_ok=True)
+
+    bus = create_bus(name="test_synthetic_archiveresult")
+    process_service = ProcessService(bus)
+    service = ArchiveResultService(bus, process_service=process_service)
+
+    event = ProcessCompletedEvent(
+        plugin_name="chrome",
+        hook_name="on_Snapshot__11_chrome_wait",
+        stdout="",
+        stderr="Hook timed out after 60 seconds",
+        exit_code=-1,
+        output_dir=str(plugin_dir),
+        output_files=[],
+        process_id="proc-failed",
+        snapshot_id=str(snapshot.id),
+        start_ts="2026-03-22T12:00:00+00:00",
+        end_ts="2026-03-22T12:01:00+00:00",
+    )
+
+    output_files, output_size, output_mimetypes = _collect_output_metadata(plugin_dir)
+    service._project_from_process_completed(
+        event,
+        {
+            "plugin": "chrome",
+            "hook_name": "on_Snapshot__11_chrome_wait",
+            "status": "failed",
+            "output_str": "Hook timed out after 60 seconds",
+            "error": "Hook timed out after 60 seconds",
+        },
+        output_files,
+        output_size,
+        output_mimetypes,
+    )
+
+    result = ArchiveResult.objects.get(snapshot=snapshot, plugin="chrome", hook_name="on_Snapshot__11_chrome_wait")
+    assert result.status == ArchiveResult.StatusChoices.FAILED
+    assert result.output_str == "Hook timed out after 60 seconds"
+    assert "Hook timed out" in result.notes
+    _cleanup_machine_process_rows()
+
+
+def test_process_completed_projects_noresults_archiveresult():
+    from archivebox.core.models import ArchiveResult
+    from archivebox.services.archive_result_service import ArchiveResultService, _collect_output_metadata
+    from archivebox.services.process_service import ProcessService
+
+    snapshot = _create_snapshot()
+    plugin_dir = Path(snapshot.output_dir) / "title"
+    plugin_dir.mkdir(parents=True, exist_ok=True)
+
+    bus = create_bus(name="test_noresults_archiveresult")
+    process_service = ProcessService(bus)
+    service = ArchiveResultService(bus, process_service=process_service)
+
+    event = ProcessCompletedEvent(
+        plugin_name="title",
+        hook_name="on_Snapshot__54_title.js",
+        stdout='{"snapshot_id":"%s","type":"ArchiveResult","status":"noresults","output_str":"No title found"}\n' % snapshot.id,
+        stderr="",
+        exit_code=0,
+        output_dir=str(plugin_dir),
+        output_files=[],
+        process_id="proc-noresults",
+        snapshot_id=str(snapshot.id),
+        start_ts="2026-03-22T12:00:00+00:00",
+        end_ts="2026-03-22T12:00:01+00:00",
+    )
+
+    output_files, output_size, output_mimetypes = _collect_output_metadata(plugin_dir)
+    service._project_from_process_completed(
+        event,
+        {
+            "snapshot_id": str(snapshot.id),
+            "plugin": "title",
+            "hook_name": "on_Snapshot__54_title.js",
+            "status": "noresults",
+            "output_str": "No title found",
+        },
+        output_files,
+        output_size,
+        output_mimetypes,
+    )
+
+    result = ArchiveResult.objects.get(snapshot=snapshot, plugin="title", hook_name="on_Snapshot__54_title.js")
+    assert result.status == ArchiveResult.StatusChoices.NORESULTS
+    assert result.output_str == "No title found"
+    _cleanup_machine_process_rows()
+
+
+def test_process_started_hydrates_binary_and_iface_from_existing_binary_records(monkeypatch):
+    from archivebox.machine.models import Binary, NetworkInterface
+    from archivebox.services.process_service import ProcessService
+
+    machine = _create_machine()
+    iface = _create_iface(machine)
+    monkeypatch.setattr(NetworkInterface, 'current', classmethod(lambda cls, refresh=False: iface))
+
+    binary = Binary.objects.create(
+        machine=machine,
+        name='postlight-parser',
+        abspath='/tmp/postlight-parser',
+        version='2.2.3',
+        binprovider='npm',
+        binproviders='npm',
+        status=Binary.StatusChoices.INSTALLED,
+    )
+
+    bus = create_bus(name="test_process_started_binary_hydration")
+    service = ProcessService(bus)
+    event = ProcessStartedEvent(
+        plugin_name="mercury",
+        hook_name="on_Snapshot__57_mercury.py",
+        hook_path="/plugins/mercury/on_Snapshot__57_mercury.py",
+        hook_args=["--url=https://example.com"],
+        output_dir="/tmp/mercury",
+        env={
+            "MERCURY_BINARY": binary.abspath,
+            "NODE_BINARY": "/tmp/node",
+        },
+        timeout=60,
+        pid=4321,
+        process_id="proc-mercury",
+        snapshot_id="",
+        start_ts="2026-03-22T12:00:00+00:00",
+    )
+
+    service._project_started(event)
+
+    process = service._get_or_create_process(event)
+    assert process.binary_id == binary.id
+    assert process.iface_id == iface.id
+
+
+def test_process_started_uses_node_binary_for_js_hooks_without_plugin_binary(monkeypatch):
+    from archivebox.machine.models import Binary, NetworkInterface
+    from archivebox.services.process_service import ProcessService
+
+    machine = _create_machine()
+    iface = _create_iface(machine)
+    monkeypatch.setattr(NetworkInterface, 'current', classmethod(lambda cls, refresh=False: iface))
+
+    node = Binary.objects.create(
+        machine=machine,
+        name='node',
+        abspath='/tmp/node',
+        version='22.0.0',
+        binprovider='env',
+        binproviders='env',
+        status=Binary.StatusChoices.INSTALLED,
+    )
+
+    bus = create_bus(name="test_process_started_node_fallback")
+    service = ProcessService(bus)
+    event = ProcessStartedEvent(
+        plugin_name="parse_dom_outlinks",
+        hook_name="on_Snapshot__75_parse_dom_outlinks.js",
+        hook_path="/plugins/parse_dom_outlinks/on_Snapshot__75_parse_dom_outlinks.js",
+        hook_args=["--url=https://example.com"],
+        output_dir="/tmp/parse-dom-outlinks",
+        env={
+            "NODE_BINARY": node.abspath,
+        },
+        timeout=60,
+        pid=9876,
+        process_id="proc-parse-dom-outlinks",
+        snapshot_id="",
+        start_ts="2026-03-22T12:00:00+00:00",
+    )
+
+    service._project_started(event)
+
+    process = service._get_or_create_process(event)
+    assert process.binary_id == node.id
+    assert process.iface_id == iface.id
--- a/archivebox/tests/test_cli_add.py
+++ b/archivebox/tests/test_cli_add.py
@@ -44,6 +44,27 @@ def test_add_single_url_creates_snapshot_in_db(tmp_path, process, disable_extrac
    assert snapshots[0][0] == 'https://example.com'


+def test_add_bg_creates_root_snapshot_rows_immediately(tmp_path, process, disable_extractors_dict):
+    """Background add should create root snapshots immediately so the queue is visible in the DB."""
+    os.chdir(tmp_path)
+    result = subprocess.run(
+        ['archivebox', 'add', '--bg', '--depth=0', 'https://example.com'],
+        capture_output=True,
+        env=disable_extractors_dict,
+    )
+
+    assert result.returncode == 0
+
+    conn = sqlite3.connect("index.sqlite3")
+    c = conn.cursor()
+    snapshots = c.execute("SELECT url, status FROM core_snapshot").fetchall()
+    conn.close()
+
+    assert len(snapshots) == 1
+    assert snapshots[0][0] == 'https://example.com'
+    assert snapshots[0][1] == 'queued'
+
+
 def test_add_creates_crawl_record(tmp_path, process, disable_extractors_dict):
    """Test that add command creates a Crawl record in the database."""
    os.chdir(tmp_path)
@@ -217,6 +238,32 @@ def test_add_records_selected_persona_on_crawl(tmp_path, process, disable_extrac
    assert persona_id
    assert default_persona == 'Default'
    assert (tmp_path / "personas" / "Default" / "chrome_user_data").is_dir()
+
+
+def test_add_records_url_filter_overrides_on_crawl(tmp_path, process, disable_extractors_dict):
+    os.chdir(tmp_path)
+    result = subprocess.run(
+        [
+            'archivebox', 'add', '--index-only', '--depth=0',
+            '--domain-allowlist=example.com,*.example.com',
+            '--domain-denylist=static.example.com',
+            'https://example.com',
+        ],
+        capture_output=True,
+        env=disable_extractors_dict,
+    )
+
+    assert result.returncode == 0
+
+    conn = sqlite3.connect("index.sqlite3")
+    c = conn.cursor()
+    allowlist, denylist = c.execute(
+        "SELECT json_extract(config, '$.URL_ALLOWLIST'), json_extract(config, '$.URL_DENYLIST') FROM crawls_crawl LIMIT 1"
+    ).fetchone()
+    conn.close()
+
+    assert allowlist == 'example.com,*.example.com'
+    assert denylist == 'static.example.com'
    assert (tmp_path / "personas" / "Default" / "chrome_extensions").is_dir()


--- a/archivebox/tests/test_cli_archiveresult.py
+++ b/archivebox/tests/test_cli_archiveresult.py
@@ -16,6 +16,13 @@ from archivebox.tests.conftest import (
    create_test_url,
 )

+PROJECTOR_TEST_ENV = {
+    'PLUGINS': 'favicon',
+    'SAVE_FAVICON': 'True',
+    'USE_COLOR': 'False',
+    'SHOW_PROGRESS': 'False',
+}
+

 class TestArchiveResultCreate:
    """Tests for `archivebox archiveresult create`."""
@@ -38,13 +45,14 @@ class TestArchiveResultCreate:
        assert code == 0, f"Command failed: {stderr}"

        records = parse_jsonl_output(stdout2)
-        # Should have the Snapshot passed through and ArchiveResult created
+        # Should have the Snapshot passed through and an ArchiveResult request emitted
        types = [r.get('type') for r in records]
        assert 'Snapshot' in types
        assert 'ArchiveResult' in types

        ar = next(r for r in records if r['type'] == 'ArchiveResult')
        assert ar['plugin'] == 'title'
+        assert 'id' not in ar

    def test_create_with_specific_plugin(self, initialized_archive):
        """Create archive result for specific plugin."""
@@ -122,15 +130,33 @@ class TestArchiveResultList:

    def test_list_filter_by_status(self, initialized_archive):
        """Filter archive results by status."""
-        # Create snapshot and archive result
+        # Create snapshot and materialize an archive result via the runner
        url = create_test_url()
        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
        snapshot = parse_jsonl_output(stdout1)[0]
-        run_archivebox_cmd(
-            ['archiveresult', 'create', '--plugin=title'],
+        stdout2, _, _ = run_archivebox_cmd(
+            ['archiveresult', 'create', '--plugin=favicon'],
            stdin=json.dumps(snapshot),
            data_dir=initialized_archive,
        )
+        run_archivebox_cmd(
+            ['run'],
+            stdin=stdout2,
+            data_dir=initialized_archive,
+            timeout=120,
+            env=PROJECTOR_TEST_ENV,
+        )
+        created = parse_jsonl_output(
+            run_archivebox_cmd(
+                ['archiveresult', 'list', '--plugin=favicon'],
+                data_dir=initialized_archive,
+            )[0]
+        )[0]
+        run_archivebox_cmd(
+            ['archiveresult', 'update', '--status=queued'],
+            stdin=json.dumps(created),
+            data_dir=initialized_archive,
+        )

        stdout, stderr, code = run_archivebox_cmd(
            ['archiveresult', 'list', '--status=queued'],
@@ -147,21 +173,28 @@ class TestArchiveResultList:
        url = create_test_url()
        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
        snapshot = parse_jsonl_output(stdout1)[0]
-        run_archivebox_cmd(
-            ['archiveresult', 'create', '--plugin=title'],
+        stdout2, _, _ = run_archivebox_cmd(
+            ['archiveresult', 'create', '--plugin=favicon'],
            stdin=json.dumps(snapshot),
            data_dir=initialized_archive,
        )
+        run_archivebox_cmd(
+            ['run'],
+            stdin=stdout2,
+            data_dir=initialized_archive,
+            timeout=120,
+            env=PROJECTOR_TEST_ENV,
+        )

        stdout, stderr, code = run_archivebox_cmd(
-            ['archiveresult', 'list', '--plugin=title'],
+            ['archiveresult', 'list', '--plugin=favicon'],
            data_dir=initialized_archive,
        )

        assert code == 0
        records = parse_jsonl_output(stdout)
        for r in records:
-            assert r['plugin'] == 'title'
+            assert r['plugin'] == 'favicon'

    def test_list_with_limit(self, initialized_archive):
        """Limit number of results."""
@@ -170,11 +203,18 @@ class TestArchiveResultList:
            url = create_test_url()
            stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
            snapshot = parse_jsonl_output(stdout1)[0]
-            run_archivebox_cmd(
-                ['archiveresult', 'create', '--plugin=title'],
+            stdout2, _, _ = run_archivebox_cmd(
+                ['archiveresult', 'create', '--plugin=favicon'],
                stdin=json.dumps(snapshot),
                data_dir=initialized_archive,
            )
+            run_archivebox_cmd(
+                ['run'],
+                stdin=stdout2,
+                data_dir=initialized_archive,
+                timeout=120,
+                env=PROJECTOR_TEST_ENV,
+            )

        stdout, stderr, code = run_archivebox_cmd(
            ['archiveresult', 'list', '--limit=2'],
@@ -196,11 +236,22 @@ class TestArchiveResultUpdate:
        snapshot = parse_jsonl_output(stdout1)[0]

        stdout2, _, _ = run_archivebox_cmd(
-            ['archiveresult', 'create', '--plugin=title'],
+            ['archiveresult', 'create', '--plugin=favicon'],
            stdin=json.dumps(snapshot),
            data_dir=initialized_archive,
        )
-        ar = next(r for r in parse_jsonl_output(stdout2) if r.get('type') == 'ArchiveResult')
+        stdout_run, _, _ = run_archivebox_cmd(
+            ['run'],
+            stdin=stdout2,
+            data_dir=initialized_archive,
+            timeout=120,
+            env=PROJECTOR_TEST_ENV,
+        )
+        stdout_list, _, _ = run_archivebox_cmd(
+            ['archiveresult', 'list', '--plugin=favicon'],
+            data_dir=initialized_archive,
+        )
+        ar = parse_jsonl_output(stdout_list)[0]

        stdout3, stderr, code = run_archivebox_cmd(
            ['archiveresult', 'update', '--status=failed'],
@@ -225,11 +276,22 @@ class TestArchiveResultDelete:
        snapshot = parse_jsonl_output(stdout1)[0]

        stdout2, _, _ = run_archivebox_cmd(
-            ['archiveresult', 'create', '--plugin=title'],
+            ['archiveresult', 'create', '--plugin=favicon'],
            stdin=json.dumps(snapshot),
            data_dir=initialized_archive,
        )
-        ar = next(r for r in parse_jsonl_output(stdout2) if r.get('type') == 'ArchiveResult')
+        stdout_run, _, _ = run_archivebox_cmd(
+            ['run'],
+            stdin=stdout2,
+            data_dir=initialized_archive,
+            timeout=120,
+            env=PROJECTOR_TEST_ENV,
+        )
+        stdout_list, _, _ = run_archivebox_cmd(
+            ['archiveresult', 'list', '--plugin=favicon'],
+            data_dir=initialized_archive,
+        )
+        ar = parse_jsonl_output(stdout_list)[0]

        stdout, stderr, code = run_archivebox_cmd(
            ['archiveresult', 'delete'],
@@ -247,11 +309,22 @@ class TestArchiveResultDelete:
        snapshot = parse_jsonl_output(stdout1)[0]

        stdout2, _, _ = run_archivebox_cmd(
-            ['archiveresult', 'create', '--plugin=title'],
+            ['archiveresult', 'create', '--plugin=favicon'],
            stdin=json.dumps(snapshot),
            data_dir=initialized_archive,
        )
-        ar = next(r for r in parse_jsonl_output(stdout2) if r.get('type') == 'ArchiveResult')
+        stdout_run, _, _ = run_archivebox_cmd(
+            ['run'],
+            stdin=stdout2,
+            data_dir=initialized_archive,
+            timeout=120,
+            env=PROJECTOR_TEST_ENV,
+        )
+        stdout_list, _, _ = run_archivebox_cmd(
+            ['archiveresult', 'list', '--plugin=favicon'],
+            data_dir=initialized_archive,
+        )
+        ar = parse_jsonl_output(stdout_list)[0]

        stdout, stderr, code = run_archivebox_cmd(
            ['archiveresult', 'delete', '--yes'],
--- a/archivebox/tests/test_cli_crawl.py
+++ b/archivebox/tests/test_cli_crawl.py
@@ -83,7 +83,7 @@ class TestCrawlCreate:

        assert code == 0
        records = parse_jsonl_output(stdout)
-        assert 'test-tag' in records[0].get('tags_str', '')
+        assert 'test-tag' in records[0].get('tags', '')

    def test_create_pass_through_other_types(self, initialized_archive):
        """Pass-through records of other types unchanged."""
--- a/archivebox/tests/test_cli_piping.py
+++ b/archivebox/tests/test_cli_piping.py
@@ -173,6 +173,20 @@ def test_collect_urls_from_plugins_reads_only_parser_outputs(tmp_path):
    assert collect_urls_from_plugins(tmp_path / "nonexistent") == []


+def test_collect_urls_from_plugins_trims_markdown_suffixes(tmp_path):
+    from archivebox.hooks import collect_urls_from_plugins
+
+    (tmp_path / "parse_html_urls").mkdir()
+    (tmp_path / "parse_html_urls" / "urls.jsonl").write_text(
+        '{"url":"https://docs.sweeting.me/s/youtube-favorites)**"}\n',
+        encoding="utf-8",
+    )
+
+    urls = collect_urls_from_plugins(tmp_path)
+    assert len(urls) == 1
+    assert urls[0]["url"] == "https://docs.sweeting.me/s/youtube-favorites"
+
+
 def test_crawl_create_stdout_pipes_into_run(initialized_archive):
    """`archivebox crawl create | archivebox run` should queue and materialize snapshots."""
    url = create_test_url()
@@ -269,8 +283,13 @@ def test_archiveresult_list_stdout_pipes_into_run(initialized_archive):
    )
    assert ar_create_code == 0, ar_create_stderr

-    created_records = parse_jsonl_output(ar_create_stdout)
-    archiveresult = next(record for record in created_records if record.get("type") == "ArchiveResult")
+    run_archivebox_cmd(
+        ["run"],
+        stdin=ar_create_stdout,
+        data_dir=initialized_archive,
+        timeout=120,
+        env=PIPE_TEST_ENV,
+    )

    list_stdout, list_stderr, list_code = run_archivebox_cmd(
        ["archiveresult", "list", "--plugin=favicon"],
@@ -278,6 +297,8 @@ def test_archiveresult_list_stdout_pipes_into_run(initialized_archive):
    )
    assert list_code == 0, list_stderr
    _assert_stdout_is_jsonl_only(list_stdout)
+    listed_records = parse_jsonl_output(list_stdout)
+    archiveresult = next(record for record in listed_records if record.get("type") == "ArchiveResult")

    run_stdout, run_stderr, run_code = run_archivebox_cmd(
        ["run"],
--- a/archivebox/tests/test_cli_run.py
+++ b/archivebox/tests/test_cli_run.py
@@ -8,6 +8,9 @@ Tests cover:
 """

 import json
+import sys
+
+import pytest

 from archivebox.tests.conftest import (
    run_archivebox_cmd,
@@ -266,3 +269,182 @@ class TestRunEmpty:

        assert code == 0
        assert 'No records to process' in stderr
+
+
+class TestRunDaemonMode:
+    def test_run_daemon_processes_stdin_before_runner(self, monkeypatch):
+        from archivebox.cli import archivebox_run
+
+        class FakeStdin:
+            def isatty(self):
+                return False
+
+        monkeypatch.setattr(sys, "stdin", FakeStdin())
+        calls = []
+        monkeypatch.setattr(
+            archivebox_run,
+            "process_stdin_records",
+            lambda: calls.append("stdin") or 0,
+        )
+        monkeypatch.setattr(
+            archivebox_run,
+            "run_runner",
+            lambda daemon=False: calls.append(f"runner:{daemon}") or 0,
+        )
+
+        with pytest.raises(SystemExit) as exit_info:
+            archivebox_run.main.callback(daemon=True, crawl_id=None, snapshot_id=None, binary_id=None)
+
+        assert exit_info.value.code == 0
+        assert calls == ["stdin", "runner:True"]
+
+    def test_run_daemon_skips_runner_if_stdin_processing_fails(self, monkeypatch):
+        from archivebox.cli import archivebox_run
+
+        class FakeStdin:
+            def isatty(self):
+                return False
+
+        monkeypatch.setattr(sys, "stdin", FakeStdin())
+        monkeypatch.setattr(archivebox_run, "process_stdin_records", lambda: 1)
+        monkeypatch.setattr(
+            archivebox_run,
+            "run_runner",
+            lambda daemon=False: (_ for _ in ()).throw(AssertionError("runner should not start after stdin failure")),
+        )
+
+        with pytest.raises(SystemExit) as exit_info:
+            archivebox_run.main.callback(daemon=True, crawl_id=None, snapshot_id=None, binary_id=None)
+
+        assert exit_info.value.code == 1
+
+
+@pytest.mark.django_db
+class TestRecoverOrphanedCrawls:
+    def test_recover_orphaned_crawl_requeues_started_crawl_without_active_processes(self):
+        from archivebox.base_models.models import get_or_create_system_user_pk
+        from archivebox.crawls.models import Crawl
+        from archivebox.core.models import Snapshot
+        from archivebox.services.runner import recover_orphaned_crawls
+
+        crawl = Crawl.objects.create(
+            urls='https://example.com',
+            created_by_id=get_or_create_system_user_pk(),
+            status=Crawl.StatusChoices.STARTED,
+            retry_at=None,
+        )
+        Snapshot.objects.create(
+            url='https://example.com',
+            crawl=crawl,
+            status=Snapshot.StatusChoices.QUEUED,
+            retry_at=None,
+        )
+
+        recovered = recover_orphaned_crawls()
+
+        crawl.refresh_from_db()
+        assert recovered == 1
+        assert crawl.status == Crawl.StatusChoices.STARTED
+        assert crawl.retry_at is not None
+
+    def test_recover_orphaned_crawl_skips_active_child_processes(self):
+        import archivebox.machine.models as machine_models
+        from django.utils import timezone
+
+        from archivebox.base_models.models import get_or_create_system_user_pk
+        from archivebox.crawls.models import Crawl
+        from archivebox.core.models import Snapshot
+        from archivebox.machine.models import Machine, Process
+        from archivebox.services.runner import recover_orphaned_crawls
+
+        crawl = Crawl.objects.create(
+            urls='https://example.com',
+            created_by_id=get_or_create_system_user_pk(),
+            status=Crawl.StatusChoices.STARTED,
+            retry_at=None,
+        )
+        snapshot = Snapshot.objects.create(
+            url='https://example.com',
+            crawl=crawl,
+            status=Snapshot.StatusChoices.QUEUED,
+            retry_at=None,
+        )
+
+        machine_models._CURRENT_MACHINE = None
+        machine = Machine.current()
+        Process.objects.create(
+            machine=machine,
+            process_type=Process.TypeChoices.HOOK,
+            status=Process.StatusChoices.RUNNING,
+            cmd=['/plugins/chrome/on_Crawl__91_chrome_wait.js'],
+            env={
+                'CRAWL_ID': str(crawl.id),
+                'SNAPSHOT_ID': str(snapshot.id),
+            },
+            started_at=timezone.now(),
+        )
+
+        recovered = recover_orphaned_crawls()
+
+        crawl.refresh_from_db()
+        assert recovered == 0
+        assert crawl.retry_at is None
+
+    def test_recover_orphaned_crawl_seals_when_all_snapshots_are_already_sealed(self):
+        from archivebox.base_models.models import get_or_create_system_user_pk
+        from archivebox.crawls.models import Crawl
+        from archivebox.core.models import Snapshot
+        from archivebox.services.runner import recover_orphaned_crawls
+
+        crawl = Crawl.objects.create(
+            urls='https://example.com',
+            created_by_id=get_or_create_system_user_pk(),
+            status=Crawl.StatusChoices.STARTED,
+            retry_at=None,
+        )
+        Snapshot.objects.create(
+            url='https://example.com',
+            crawl=crawl,
+            status=Snapshot.StatusChoices.SEALED,
+            retry_at=None,
+        )
+
+        recovered = recover_orphaned_crawls()
+
+        crawl.refresh_from_db()
+        assert recovered == 1
+        assert crawl.status == Crawl.StatusChoices.SEALED
+        assert crawl.retry_at is None
+
+
+@pytest.mark.django_db
+class TestRecoverOrphanedSnapshots:
+    def test_recover_orphaned_snapshot_requeues_started_snapshot_without_active_processes(self):
+        from archivebox.base_models.models import get_or_create_system_user_pk
+        from archivebox.crawls.models import Crawl
+        from archivebox.core.models import Snapshot
+        from archivebox.services.runner import recover_orphaned_snapshots
+
+        crawl = Crawl.objects.create(
+            urls='https://example.com',
+            created_by_id=get_or_create_system_user_pk(),
+            status=Crawl.StatusChoices.SEALED,
+            retry_at=None,
+        )
+        snapshot = Snapshot.objects.create(
+            url='https://example.com',
+            crawl=crawl,
+            status=Snapshot.StatusChoices.STARTED,
+            retry_at=None,
+        )
+
+        recovered = recover_orphaned_snapshots()
+
+        snapshot.refresh_from_db()
+        crawl.refresh_from_db()
+
+        assert recovered == 1
+        assert snapshot.status == Snapshot.StatusChoices.QUEUED
+        assert snapshot.retry_at is not None
+        assert crawl.status == Crawl.StatusChoices.QUEUED
+        assert crawl.retry_at is not None
--- a/archivebox/tests/test_cli_server.py
+++ b/archivebox/tests/test_cli_server.py
@@ -6,6 +6,15 @@ Verify server can start (basic smoke tests only, no full server testing).

 import os
 import subprocess
+import sys
+from unittest.mock import Mock
+
+
+def test_sqlite_connections_use_explicit_30_second_busy_timeout():
+    from archivebox.core.settings import SQLITE_CONNECTION_OPTIONS
+
+    assert SQLITE_CONNECTION_OPTIONS["OPTIONS"]["timeout"] == 30
+    assert "PRAGMA busy_timeout = 30000;" in SQLITE_CONNECTION_OPTIONS["OPTIONS"]["init_command"]


 def test_server_shows_usage_info(tmp_path, process):
@@ -39,3 +48,64 @@ def test_server_init_flag(tmp_path, process):

    assert result.returncode == 0
    assert '--init' in result.stdout or 'init' in result.stdout.lower()
+
+
+def test_runner_worker_uses_current_interpreter():
+    """The supervised runner should use the active Python environment, not PATH."""
+    from archivebox.workers.supervisord_util import RUNNER_WORKER
+
+    assert RUNNER_WORKER["command"] == f"{sys.executable} -m archivebox run --daemon"
+
+
+def test_reload_workers_use_current_interpreter_and_supervisord_managed_runner():
+    from archivebox.workers.supervisord_util import RUNNER_WATCH_WORKER, RUNSERVER_WORKER
+
+    runserver = RUNSERVER_WORKER("127.0.0.1", "8000", reload=True, pidfile="/tmp/runserver.pid")
+    watcher = RUNNER_WATCH_WORKER("/tmp/runserver.pid")
+
+    assert runserver["name"] == "worker_runserver"
+    assert runserver["command"] == f"{sys.executable} -m archivebox manage runserver 127.0.0.1:8000"
+    assert 'ARCHIVEBOX_RUNSERVER="1"' in runserver["environment"]
+    assert 'ARCHIVEBOX_AUTORELOAD="1"' in runserver["environment"]
+    assert 'ARCHIVEBOX_RUNSERVER_PIDFILE="/tmp/runserver.pid"' in runserver["environment"]
+
+    assert watcher["name"] == "worker_runner_watch"
+    assert watcher["command"] == f"{sys.executable} -m archivebox manage runner_watch --pidfile=/tmp/runserver.pid"
+
+
+def test_stop_existing_background_runner_cleans_up_and_stops_orchestrators():
+    from archivebox.cli.archivebox_server import stop_existing_background_runner
+
+    runner_a = Mock()
+    runner_a.kill_tree = Mock()
+    runner_a.terminate = Mock()
+    runner_b = Mock()
+    runner_b.kill_tree = Mock(side_effect=RuntimeError("boom"))
+    runner_b.terminate = Mock()
+
+    process_model = Mock()
+    process_model.StatusChoices.RUNNING = "running"
+    process_model.TypeChoices.ORCHESTRATOR = "orchestrator"
+    queryset = Mock()
+    queryset.order_by.return_value = [runner_a, runner_b]
+    process_model.objects.filter.return_value = queryset
+
+    supervisor = Mock()
+    stop_worker = Mock()
+    log = Mock()
+
+    stopped = stop_existing_background_runner(
+        machine=Mock(),
+        process_model=process_model,
+        supervisor=supervisor,
+        stop_worker_fn=stop_worker,
+        log=log,
+    )
+
+    assert stopped == 2
+    assert process_model.cleanup_stale_running.call_count == 2
+    stop_worker.assert_any_call(supervisor, "worker_runner")
+    stop_worker.assert_any_call(supervisor, "worker_runner_watch")
+    runner_a.kill_tree.assert_called_once_with(graceful_timeout=2.0)
+    runner_b.terminate.assert_called_once_with(graceful_timeout=2.0)
+    log.assert_called_once()
--- a/archivebox/tests/test_cli_snapshot.py
+++ b/archivebox/tests/test_cli_snapshot.py
@@ -74,7 +74,7 @@ class TestSnapshotCreate:

        assert code == 0
        records = parse_jsonl_output(stdout)
-        assert 'test-tag' in records[0].get('tags_str', '')
+        assert 'test-tag' in records[0].get('tags', '')

    def test_create_pass_through_other_types(self, initialized_archive):
        """Pass-through records of other types unchanged."""
--- a/archivebox/tests/test_config_views.py
+++ b/archivebox/tests/test_config_views.py
@@ -0,0 +1,326 @@
+from datetime import timedelta
+from types import SimpleNamespace
+
+import pytest
+from django.test import RequestFactory
+from django.utils import timezone
+
+from archivebox.config import views as config_views
+from archivebox.core import views as core_views
+from archivebox.machine.models import Binary
+
+
+pytestmark = pytest.mark.django_db
+
+
+def test_get_db_binaries_by_name_collapses_youtube_dl_aliases(monkeypatch):
+    now = timezone.now()
+    records = [
+        SimpleNamespace(
+            name='youtube-dl',
+            version='',
+            binprovider='',
+            abspath='/usr/bin/youtube-dl',
+            status=Binary.StatusChoices.INSTALLED,
+            modified_at=now,
+        ),
+        SimpleNamespace(
+            name='yt-dlp',
+            version='2026.03.01',
+            binprovider='pip',
+            abspath='/usr/bin/yt-dlp',
+            status=Binary.StatusChoices.INSTALLED,
+            modified_at=now + timedelta(seconds=1),
+        ),
+    ]
+
+    monkeypatch.setattr(config_views.Binary, 'objects', SimpleNamespace(all=lambda: records))
+
+    binaries = config_views.get_db_binaries_by_name()
+
+    assert 'yt-dlp' in binaries
+    assert 'youtube-dl' not in binaries
+    assert binaries['yt-dlp'].version == '2026.03.01'
+
+
+def test_binaries_list_view_uses_db_version_and_hides_youtube_dl_alias(monkeypatch):
+    request = RequestFactory().get('/admin/environment/binaries/')
+    request.user = SimpleNamespace(is_superuser=True)
+
+    db_binary = SimpleNamespace(
+        name='youtube-dl',
+        version='2026.03.01',
+        binprovider='pip',
+        abspath='/usr/bin/yt-dlp',
+        status=Binary.StatusChoices.INSTALLED,
+        sha256='',
+        modified_at=timezone.now(),
+    )
+
+    monkeypatch.setattr(config_views, 'get_db_binaries_by_name', lambda: {'yt-dlp': db_binary})
+
+    context = config_views.binaries_list_view.__wrapped__(request)
+
+    assert len(context['table']['Binary Name']) == 1
+    assert str(context['table']['Binary Name'][0].link_item) == 'yt-dlp'
+    assert context['table']['Found Version'][0] == '✅ 2026.03.01'
+    assert context['table']['Provided By'][0] == 'pip'
+    assert context['table']['Found Abspath'][0] == '/usr/bin/yt-dlp'
+
+
+def test_binaries_list_view_only_shows_persisted_records(monkeypatch):
+    request = RequestFactory().get('/admin/environment/binaries/')
+    request.user = SimpleNamespace(is_superuser=True)
+
+    monkeypatch.setattr(config_views, 'get_db_binaries_by_name', lambda: {})
+
+    context = config_views.binaries_list_view.__wrapped__(request)
+
+    assert context['table']['Binary Name'] == []
+    assert context['table']['Found Version'] == []
+    assert context['table']['Provided By'] == []
+    assert context['table']['Found Abspath'] == []
+
+
+def test_binary_detail_view_uses_canonical_db_record(monkeypatch):
+    request = RequestFactory().get('/admin/environment/binaries/youtube-dl/')
+    request.user = SimpleNamespace(is_superuser=True)
+
+    db_binary = SimpleNamespace(
+        id='019d14cc-6c40-7793-8ff1-0f8bb050e8a3',
+        name='yt-dlp',
+        version='2026.03.01',
+        binprovider='pip',
+        abspath='/usr/bin/yt-dlp',
+        sha256='abc123',
+        status=Binary.StatusChoices.INSTALLED,
+        modified_at=timezone.now(),
+    )
+
+    monkeypatch.setattr(config_views, 'get_db_binaries_by_name', lambda: {'yt-dlp': db_binary})
+
+    context = config_views.binary_detail_view.__wrapped__(request, key='youtube-dl')
+    section = context['data'][0]
+
+    assert context['title'] == 'yt-dlp'
+    assert section['fields']['name'] == 'yt-dlp'
+    assert section['fields']['version'] == '2026.03.01'
+    assert section['fields']['binprovider'] == 'pip'
+    assert section['fields']['abspath'] == '/usr/bin/yt-dlp'
+    assert '/admin/machine/binary/019d14cc-6c40-7793-8ff1-0f8bb050e8a3/change/?_changelist_filters=q%3Dyt-dlp' in section['description']
+
+
+def test_binary_detail_view_marks_unrecorded_binary(monkeypatch):
+    request = RequestFactory().get('/admin/environment/binaries/wget/')
+    request.user = SimpleNamespace(is_superuser=True)
+
+    monkeypatch.setattr(config_views, 'get_db_binaries_by_name', lambda: {})
+
+    context = config_views.binary_detail_view.__wrapped__(request, key='wget')
+    section = context['data'][0]
+
+    assert section['description'] == 'No persisted Binary record found'
+    assert section['fields']['status'] == 'unrecorded'
+    assert section['fields']['binprovider'] == 'not recorded'
+
+
+def test_plugin_detail_view_renders_config_in_dedicated_sections(monkeypatch):
+    request = RequestFactory().get('/admin/environment/plugins/builtin.example/')
+    request.user = SimpleNamespace(is_superuser=True)
+
+    plugin_config = {
+        'title': 'Example Plugin',
+        'description': 'Example config used to verify plugin metadata rendering.',
+        'type': 'object',
+        'required_plugins': ['chrome'],
+        'required_binaries': ['example-cli'],
+        'output_mimetypes': ['text/plain', 'application/json'],
+        'properties': {
+            'EXAMPLE_ENABLED': {
+                'type': 'boolean',
+                'description': 'Enable the example plugin.',
+                'x-fallback': 'CHECK_SSL_VALIDITY',
+            },
+            'EXAMPLE_BINARY': {
+                'type': 'string',
+                'default': 'gallery-dl',
+                'description': 'Filesystem path for example output.',
+                'x-aliases': ['USE_EXAMPLE_BINARY'],
+            },
+        },
+    }
+
+    monkeypatch.setattr(config_views, 'get_filesystem_plugins', lambda: {
+        'builtin.example': {
+            'id': 'builtin.example',
+            'name': 'example',
+            'source': 'builtin',
+            'path': '/plugins/example',
+            'hooks': ['on_Snapshot__01_example.py'],
+            'config': plugin_config,
+        }
+    })
+    monkeypatch.setattr(config_views, 'get_machine_admin_url', lambda: '/admin/machine/machine/test-machine/change/')
+
+    context = config_views.plugin_detail_view.__wrapped__(request, key='builtin.example')
+
+    assert context['title'] == 'example'
+    assert len(context['data']) == 5
+
+    summary_section, hooks_section, metadata_section, config_section, properties_section = context['data']
+
+    assert summary_section['fields'] == {
+        'id': 'builtin.example',
+        'name': 'example',
+        'source': 'builtin',
+    }
+    assert '/plugins/example' in summary_section['description']
+    assert 'https://archivebox.github.io/abx-plugins/#example' in summary_section['description']
+
+    assert hooks_section['name'] == 'Hooks'
+    assert hooks_section['fields'] == {}
+    assert 'https://github.com/ArchiveBox/abx-plugins/tree/main/abx_plugins/plugins/example/on_Snapshot__01_example.py' in hooks_section['description']
+    assert 'on_Snapshot__01_example.py' in hooks_section['description']
+
+    assert metadata_section['name'] == 'Plugin Metadata'
+    assert metadata_section['fields'] == {}
+    assert 'Example Plugin' in metadata_section['description']
+    assert 'Example config used to verify plugin metadata rendering.' in metadata_section['description']
+    assert 'https://archivebox.github.io/abx-plugins/#chrome' in metadata_section['description']
+    assert '/admin/environment/binaries/example-cli/' in metadata_section['description']
+    assert 'text/plain' in metadata_section['description']
+    assert 'application/json' in metadata_section['description']
+
+    assert config_section['name'] == 'config.json'
+    assert config_section['fields'] == {}
+    assert '<pre style=' in config_section['description']
+    assert 'EXAMPLE_ENABLED' in config_section['description']
+    assert '<span style="color: #0550ae;">"properties"</span>' in config_section['description']
+
+    assert properties_section['name'] == 'Config Properties'
+    assert properties_section['fields'] == {}
+    assert '/admin/machine/machine/test-machine/change/' in properties_section['description']
+    assert '/admin/machine/binary/' in properties_section['description']
+    assert '/admin/environment/binaries/' in properties_section['description']
+    assert 'EXAMPLE_ENABLED' in properties_section['description']
+    assert 'boolean' in properties_section['description']
+    assert 'Enable the example plugin.' in properties_section['description']
+    assert '/admin/environment/config/EXAMPLE_ENABLED/' in properties_section['description']
+    assert '/admin/environment/config/CHECK_SSL_VALIDITY/' in properties_section['description']
+    assert '/admin/environment/config/USE_EXAMPLE_BINARY/' in properties_section['description']
+    assert '/admin/environment/binaries/gallery-dl/' in properties_section['description']
+    assert 'EXAMPLE_BINARY' in properties_section['description']
+
+
+def test_get_config_definition_link_keeps_core_config_search_link(monkeypatch):
+    monkeypatch.setattr(core_views, 'find_plugin_for_config_key', lambda key: None)
+
+    url, label = core_views.get_config_definition_link('CHECK_SSL_VALIDITY')
+
+    assert 'github.com/search' in url
+    assert 'CHECK_SSL_VALIDITY' in url
+    assert label == 'archivebox/config'
+
+
+def test_get_config_definition_link_uses_plugin_config_json_for_plugin_options(monkeypatch):
+    plugin_dir = core_views.BUILTIN_PLUGINS_DIR / 'parse_dom_outlinks'
+
+    monkeypatch.setattr(core_views, 'find_plugin_for_config_key', lambda key: 'parse_dom_outlinks')
+    monkeypatch.setattr(core_views, 'iter_plugin_dirs', lambda: [plugin_dir])
+
+    url, label = core_views.get_config_definition_link('PARSE_DOM_OUTLINKS_ENABLED')
+
+    assert url == 'https://github.com/ArchiveBox/abx-plugins/tree/main/abx_plugins/plugins/parse_dom_outlinks/config.json'
+    assert label == 'abx_plugins/plugins/parse_dom_outlinks/config.json'
+
+
+def test_live_config_value_view_renames_source_field_and_uses_plugin_definition_link(monkeypatch):
+    request = RequestFactory().get('/admin/environment/config/PARSE_DOM_OUTLINKS_ENABLED/')
+    request.user = SimpleNamespace(is_superuser=True)
+
+    monkeypatch.setattr(core_views, 'get_all_configs', lambda: {})
+    monkeypatch.setattr(core_views, 'get_flat_config', lambda: {})
+    monkeypatch.setattr(core_views, 'get_config', lambda: {'PARSE_DOM_OUTLINKS_ENABLED': True})
+    monkeypatch.setattr(core_views, 'find_config_default', lambda key: 'True')
+    monkeypatch.setattr(core_views, 'find_config_type', lambda key: 'bool')
+    monkeypatch.setattr(core_views, 'find_config_source', lambda key, merged: 'Default')
+    monkeypatch.setattr(core_views, 'key_is_safe', lambda key: True)
+    monkeypatch.setattr(core_views.CONSTANTS, 'CONFIG_FILE', SimpleNamespace(exists=lambda: False))
+
+    from archivebox.machine.models import Machine
+    from archivebox.config.configset import BaseConfigSet
+
+    monkeypatch.setattr(Machine, 'current', classmethod(lambda cls: SimpleNamespace(id='machine-id', config={})))
+    monkeypatch.setattr(BaseConfigSet, 'load_from_file', classmethod(lambda cls, path: {}))
+    monkeypatch.setattr(
+        core_views,
+        'get_config_definition_link',
+        lambda key: (
+            'https://github.com/ArchiveBox/abx-plugins/tree/main/abx_plugins/plugins/parse_dom_outlinks/config.json',
+            'abx_plugins/plugins/parse_dom_outlinks/config.json',
+        ),
+    )
+
+    context = core_views.live_config_value_view.__wrapped__(request, key='PARSE_DOM_OUTLINKS_ENABLED')
+    section = context['data'][0]
+
+    assert 'Currently read from' in section['fields']
+    assert 'Source' not in section['fields']
+    assert section['fields']['Currently read from'] == 'Default'
+    assert 'abx_plugins/plugins/parse_dom_outlinks/config.json' in section['help_texts']['Type']
+
+
+def test_find_config_source_prefers_environment_over_machine_and_file(monkeypatch):
+    monkeypatch.setenv('CHECK_SSL_VALIDITY', 'false')
+
+    from archivebox.machine.models import Machine
+    from archivebox.config.configset import BaseConfigSet
+
+    monkeypatch.setattr(
+        Machine,
+        'current',
+        classmethod(lambda cls: SimpleNamespace(id='machine-id', config={'CHECK_SSL_VALIDITY': 'true'})),
+    )
+    monkeypatch.setattr(
+        BaseConfigSet,
+        'load_from_file',
+        classmethod(lambda cls, path: {'CHECK_SSL_VALIDITY': 'true'}),
+    )
+
+    assert core_views.find_config_source('CHECK_SSL_VALIDITY', {'CHECK_SSL_VALIDITY': False}) == 'Environment'
+
+
+def test_live_config_value_view_priority_text_matches_runtime_precedence(monkeypatch):
+    request = RequestFactory().get('/admin/environment/config/CHECK_SSL_VALIDITY/')
+    request.user = SimpleNamespace(is_superuser=True)
+
+    monkeypatch.setattr(core_views, 'get_all_configs', lambda: {})
+    monkeypatch.setattr(core_views, 'get_flat_config', lambda: {'CHECK_SSL_VALIDITY': True})
+    monkeypatch.setattr(core_views, 'get_config', lambda: {'CHECK_SSL_VALIDITY': False})
+    monkeypatch.setattr(core_views, 'find_config_default', lambda key: 'True')
+    monkeypatch.setattr(core_views, 'find_config_type', lambda key: 'bool')
+    monkeypatch.setattr(core_views, 'key_is_safe', lambda key: True)
+
+    from archivebox.machine.models import Machine
+    from archivebox.config.configset import BaseConfigSet
+
+    monkeypatch.setattr(
+        Machine,
+        'current',
+        classmethod(lambda cls: SimpleNamespace(id='machine-id', config={'CHECK_SSL_VALIDITY': 'true'})),
+    )
+    monkeypatch.setattr(
+        BaseConfigSet,
+        'load_from_file',
+        classmethod(lambda cls, path: {'CHECK_SSL_VALIDITY': 'true'}),
+    )
+    monkeypatch.setattr(core_views.CONSTANTS, 'CONFIG_FILE', SimpleNamespace(exists=lambda: True))
+    monkeypatch.setenv('CHECK_SSL_VALIDITY', 'false')
+
+    context = core_views.live_config_value_view.__wrapped__(request, key='CHECK_SSL_VALIDITY')
+    section = context['data'][0]
+
+    assert section['fields']['Currently read from'] == 'Environment'
+    help_text = section['help_texts']['Currently read from']
+    assert help_text.index('Environment') < help_text.index('Machine') < help_text.index('Config File') < help_text.index('Default')
+    assert 'Configuration Sources (highest priority first):' in section['help_texts']['Value']
--- a/archivebox/tests/test_crawl_admin.py
+++ b/archivebox/tests/test_crawl_admin.py
@@ -0,0 +1,220 @@
+from typing import cast
+
+import pytest
+from django.contrib.auth import get_user_model
+from django.contrib.auth.models import UserManager
+from django.urls import reverse
+
+from archivebox.crawls.admin import CrawlAdminForm
+from archivebox.crawls.models import Crawl
+from archivebox.core.models import Snapshot
+
+
+pytestmark = pytest.mark.django_db
+
+
+User = get_user_model()
+ADMIN_HOST = 'admin.archivebox.localhost:8000'
+
+
+@pytest.fixture
+def admin_user(db):
+    return cast(UserManager, User.objects).create_superuser(
+        username='crawladmin',
+        email='crawladmin@test.com',
+        password='testpassword',
+    )
+
+
+@pytest.fixture
+def crawl(admin_user):
+    return Crawl.objects.create(
+        urls='https://example.com\nhttps://example.org',
+        tags_str='alpha,beta',
+        created_by=admin_user,
+    )
+
+
+def test_crawl_admin_change_view_renders_tag_editor_widget(client, admin_user, crawl):
+    client.login(username='crawladmin', password='testpassword')
+
+    response = client.get(
+        reverse('admin:crawls_crawl_change', args=[crawl.pk]),
+        HTTP_HOST=ADMIN_HOST,
+    )
+
+    assert response.status_code == 200
+    assert b'name="tags_editor"' in response.content
+    assert b'tag-editor-container' in response.content
+    assert b'alpha' in response.content
+    assert b'beta' in response.content
+
+
+def test_crawl_admin_add_view_renders_url_filter_alias_fields(client, admin_user):
+    client.login(username='crawladmin', password='testpassword')
+
+    response = client.get(
+        reverse('admin:crawls_crawl_add'),
+        HTTP_HOST=ADMIN_HOST,
+    )
+
+    assert response.status_code == 200
+    assert b'name="url_filters_allowlist"' in response.content
+    assert b'name="url_filters_denylist"' in response.content
+    assert b'Same domain only' in response.content
+
+
+def test_crawl_admin_form_saves_tags_editor_to_tags_str(crawl, admin_user):
+    form = CrawlAdminForm(
+        data={
+            'created_at': crawl.created_at.strftime('%Y-%m-%d %H:%M:%S'),
+            'urls': crawl.urls,
+            'config': '{}',
+            'max_depth': '0',
+            'tags_editor': 'alpha, beta, Alpha, gamma',
+            'url_filters_allowlist': 'example.com\n*.example.com',
+            'url_filters_denylist': 'static.example.com',
+            'persona_id': '',
+            'label': '',
+            'notes': '',
+            'schedule': '',
+            'status': crawl.status,
+            'retry_at': crawl.retry_at.strftime('%Y-%m-%d %H:%M:%S'),
+            'created_by': str(admin_user.pk),
+            'num_uses_failed': '0',
+            'num_uses_succeeded': '0',
+        },
+        instance=crawl,
+    )
+
+    assert form.is_valid(), form.errors
+
+    updated = form.save()
+    updated.refresh_from_db()
+    assert updated.tags_str == 'alpha,beta,gamma'
+    assert updated.config['URL_ALLOWLIST'] == 'example.com\n*.example.com'
+    assert updated.config['URL_DENYLIST'] == 'static.example.com'
+
+
+def test_crawl_admin_delete_snapshot_action_removes_snapshot_and_url(client, admin_user):
+    crawl = Crawl.objects.create(
+        urls='https://example.com/remove-me',
+        created_by=admin_user,
+    )
+    snapshot = Snapshot.objects.create(
+        crawl=crawl,
+        url='https://example.com/remove-me',
+    )
+
+    client.login(username='crawladmin', password='testpassword')
+    response = client.post(
+        reverse('admin:crawls_crawl_snapshot_delete', args=[crawl.pk, snapshot.pk]),
+        HTTP_HOST=ADMIN_HOST,
+    )
+
+    assert response.status_code == 200
+    assert response.json()['ok'] is True
+    assert not Snapshot.objects.filter(pk=snapshot.pk).exists()
+
+    crawl.refresh_from_db()
+    assert 'https://example.com/remove-me' not in crawl.urls
+
+
+def test_crawl_admin_exclude_domain_action_prunes_urls_and_pending_snapshots(client, admin_user):
+    crawl = Crawl.objects.create(
+        urls='\n'.join([
+            'https://cdn.example.com/asset.js',
+            'https://cdn.example.com/second.js',
+            'https://example.com/root',
+        ]),
+        created_by=admin_user,
+    )
+    queued_snapshot = Snapshot.objects.create(
+        crawl=crawl,
+        url='https://cdn.example.com/asset.js',
+        status=Snapshot.StatusChoices.QUEUED,
+    )
+    preserved_snapshot = Snapshot.objects.create(
+        crawl=crawl,
+        url='https://example.com/root',
+        status=Snapshot.StatusChoices.SEALED,
+    )
+
+    client.login(username='crawladmin', password='testpassword')
+    response = client.post(
+        reverse('admin:crawls_crawl_snapshot_exclude_domain', args=[crawl.pk, queued_snapshot.pk]),
+        HTTP_HOST=ADMIN_HOST,
+    )
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload['ok'] is True
+    assert payload['domain'] == 'cdn.example.com'
+
+    crawl.refresh_from_db()
+    assert crawl.get_url_denylist(use_effective_config=False) == ['cdn.example.com']
+    assert 'https://cdn.example.com/asset.js' not in crawl.urls
+    assert 'https://cdn.example.com/second.js' not in crawl.urls
+    assert 'https://example.com/root' in crawl.urls
+    assert not Snapshot.objects.filter(pk=queued_snapshot.pk).exists()
+    assert Snapshot.objects.filter(pk=preserved_snapshot.pk).exists()
+
+
+def test_snapshot_from_json_trims_markdown_suffixes_on_discovered_urls(crawl):
+    snapshot = Snapshot.from_json(
+        {'url': 'https://docs.sweeting.me/s/youtube-favorites)**'},
+        overrides={'crawl': crawl},
+        queue_for_extraction=False,
+    )
+
+    assert snapshot is not None
+    assert snapshot.url == 'https://docs.sweeting.me/s/youtube-favorites'
+
+
+def test_create_snapshots_from_urls_respects_url_allowlist_and_denylist(admin_user):
+    crawl = Crawl.objects.create(
+        urls='\n'.join([
+            'https://example.com/root',
+            'https://static.example.com/app.js',
+            'https://other.test/page',
+        ]),
+        created_by=admin_user,
+        config={
+            'URL_ALLOWLIST': 'example.com',
+            'URL_DENYLIST': 'static.example.com',
+        },
+    )
+
+    created = crawl.create_snapshots_from_urls()
+
+    assert [snapshot.url for snapshot in created] == ['https://example.com/root']
+
+
+def test_url_filter_regex_lists_preserve_commas_and_split_on_newlines_only(admin_user):
+    crawl = Crawl.objects.create(
+        urls='\n'.join([
+            'https://example.com/root',
+            'https://example.com/path,with,commas',
+            'https://other.test/page',
+        ]),
+        created_by=admin_user,
+        config={
+            'URL_ALLOWLIST': r'^https://example\.com/(root|path,with,commas)$' + '\n' + r'^https://other\.test/page$',
+            'URL_DENYLIST': r'^https://example\.com/path,with,commas$',
+        },
+    )
+
+    assert crawl.get_url_allowlist(use_effective_config=False) == [
+        r'^https://example\.com/(root|path,with,commas)$',
+        r'^https://other\.test/page$',
+    ]
+    assert crawl.get_url_denylist(use_effective_config=False) == [
+        r'^https://example\.com/path,with,commas$',
+    ]
+
+    created = crawl.create_snapshots_from_urls()
+
+    assert [snapshot.url for snapshot in created] == [
+        'https://example.com/root',
+        'https://other.test/page',
+    ]
--- a/archivebox/tests/test_machine_models.py
+++ b/archivebox/tests/test_machine_models.py
@@ -14,7 +14,7 @@ Tests cover:
 import os
 from datetime import timedelta
 from typing import cast
-from unittest.mock import patch
+from unittest.mock import Mock, patch

 import pytest
 from django.test import TestCase
@@ -89,11 +89,45 @@ class TestMachineModel(TestCase):
        assert result is not None
        self.assertEqual(result.config.get('WGET_BINARY'), '/usr/bin/wget')

+    def test_machine_from_jsonl_strips_legacy_chromium_version(self):
+        """Machine.from_json() should ignore legacy browser version keys."""
+        Machine.current()  # Ensure machine exists
+        record = {
+            'config': {
+                'WGET_BINARY': '/usr/bin/wget',
+                'CHROMIUM_VERSION': '123.4.5',
+            },
+        }
+
+        result = Machine.from_json(record)
+
+        self.assertIsNotNone(result)
+        assert result is not None
+        self.assertEqual(result.config.get('WGET_BINARY'), '/usr/bin/wget')
+        self.assertNotIn('CHROMIUM_VERSION', result.config)
+
    def test_machine_from_jsonl_invalid(self):
        """Machine.from_json() should return None for invalid records."""
        result = Machine.from_json({'invalid': 'record'})
        self.assertIsNone(result)

+    def test_machine_current_strips_legacy_chromium_version(self):
+        """Machine.current() should clean legacy browser version keys from persisted config."""
+        import archivebox.machine.models as models
+
+        machine = Machine.current()
+        machine.config = {
+            'CHROME_BINARY': '/tmp/chromium',
+            'CHROMIUM_VERSION': '123.4.5',
+        }
+        machine.save(update_fields=['config'])
+        models._CURRENT_MACHINE = machine
+
+        refreshed = Machine.current()
+
+        self.assertEqual(refreshed.config.get('CHROME_BINARY'), '/tmp/chromium')
+        self.assertNotIn('CHROMIUM_VERSION', refreshed.config)
+
    def test_machine_manager_current(self):
        """Machine.objects.current() should return current machine."""
        machine = Machine.current()
@@ -131,6 +165,36 @@ class TestNetworkInterfaceModel(TestCase):
        interface = NetworkInterface.current()
        self.assertIsNotNone(interface)

+    def test_networkinterface_current_refresh_creates_new_interface_when_properties_change(self):
+        """Refreshing should persist a new NetworkInterface row when the host network fingerprint changes."""
+        import archivebox.machine.models as models
+
+        first = {
+            'mac_address': 'aa:bb:cc:dd:ee:01',
+            'ip_public': '1.1.1.1',
+            'ip_local': '192.168.1.10',
+            'dns_server': '8.8.8.8',
+            'hostname': 'host-a',
+            'iface': 'en0',
+            'isp': 'ISP A',
+            'city': 'City',
+            'region': 'Region',
+            'country': 'Country',
+        }
+        second = {
+            **first,
+            'ip_public': '2.2.2.2',
+            'ip_local': '10.0.0.5',
+        }
+
+        with patch.object(models, 'get_host_network', side_effect=[first, second]):
+            interface1 = NetworkInterface.current(refresh=True)
+            interface2 = NetworkInterface.current(refresh=True)
+
+        self.assertNotEqual(interface1.id, interface2.id)
+        self.assertEqual(interface1.machine_id, interface2.machine_id)
+        self.assertEqual(NetworkInterface.objects.filter(machine=interface1.machine).count(), 2)
+

 class TestBinaryModel(TestCase):
    """Test the Binary model."""
@@ -360,6 +424,8 @@ class TestProcessCurrent(TestCase):
        self.assertEqual(proc.pid, os.getpid())
        self.assertEqual(proc.status, Process.StatusChoices.RUNNING)
        self.assertIsNotNone(proc.machine)
+        self.assertIsNotNone(proc.iface)
+        self.assertEqual(proc.iface.machine_id, proc.machine_id)
        self.assertIsNotNone(proc.started_at)

    def test_process_current_caches(self):
@@ -375,6 +441,12 @@ class TestProcessCurrent(TestCase):
            result = Process._detect_process_type()
            self.assertEqual(result, Process.TypeChoices.ORCHESTRATOR)

+    def test_process_detect_type_runner_watch(self):
+        """runner_watch should be classified as a worker, not the orchestrator itself."""
+        with patch('sys.argv', ['archivebox', 'manage', 'runner_watch', '--pidfile=/tmp/runserver.pid']):
+            result = Process._detect_process_type()
+            self.assertEqual(result, Process.TypeChoices.WORKER)
+
    def test_process_detect_type_cli(self):
        """_detect_process_type should detect CLI commands."""
        with patch('sys.argv', ['archivebox', 'add', 'http://example.com']):
@@ -387,6 +459,27 @@ class TestProcessCurrent(TestCase):
            result = Process._detect_process_type()
            self.assertEqual(result, Process.TypeChoices.BINARY)

+    def test_process_proc_allows_interpreter_wrapped_script(self):
+        """Process.proc should accept a script recorded in DB when wrapped by an interpreter in psutil."""
+        proc = Process.objects.create(
+            machine=Machine.current(),
+            cmd=['/tmp/on_Crawl__90_chrome_launch.daemon.bg.js', '--url=https://example.com/'],
+            pid=12345,
+            status=Process.StatusChoices.RUNNING,
+            started_at=timezone.now(),
+        )
+
+        os_proc = Mock()
+        os_proc.create_time.return_value = proc.started_at.timestamp()
+        os_proc.cmdline.return_value = [
+            'node',
+            '/tmp/on_Crawl__90_chrome_launch.daemon.bg.js',
+            '--url=https://example.com/',
+        ]
+
+        with patch('archivebox.machine.models.psutil.Process', return_value=os_proc):
+            self.assertIs(proc.proc, os_proc)
+

 class TestProcessHierarchy(TestCase):
    """Test Process parent/child relationships."""
--- a/archivebox/tests/test_persona_admin.py
+++ b/archivebox/tests/test_persona_admin.py
@@ -0,0 +1,191 @@
+import pytest
+from typing import cast
+
+from django.contrib.auth import get_user_model
+from django.contrib.auth.models import UserManager
+from django.urls import reverse
+
+from archivebox.personas.importers import (
+    PersonaImportResult,
+    discover_persona_template_profiles,
+    import_persona_from_source,
+    resolve_browser_profile_source,
+    resolve_custom_import_source,
+)
+
+
+pytestmark = pytest.mark.django_db
+
+User = get_user_model()
+ADMIN_HOST = "admin.archivebox.localhost:8000"
+
+
+@pytest.fixture
+def admin_user(db):
+    return cast(UserManager, User.objects).create_superuser(
+        username="personaadmin",
+        email="personaadmin@test.com",
+        password="testpassword",
+    )
+
+
+def _make_profile_source(tmp_path):
+    user_data_dir = tmp_path / "Chrome User Data"
+    profile_dir = user_data_dir / "Default"
+    profile_dir.mkdir(parents=True)
+    (profile_dir / "Preferences").write_text("{}")
+    return resolve_browser_profile_source(
+        browser="chrome",
+        user_data_dir=user_data_dir,
+        profile_dir="Default",
+        browser_binary="/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
+    )
+
+
+def test_resolve_custom_import_source_accepts_exact_profile_dir(tmp_path):
+    user_data_dir = tmp_path / "Brave User Data"
+    profile_dir = user_data_dir / "Profile 2"
+    profile_dir.mkdir(parents=True)
+    (profile_dir / "Preferences").write_text("{}")
+
+    source = resolve_custom_import_source(str(profile_dir))
+
+    assert source.kind == "browser-profile"
+    assert source.user_data_dir == user_data_dir.resolve()
+    assert source.profile_dir == "Profile 2"
+
+
+def test_resolve_custom_import_source_accepts_cdp_url():
+    source = resolve_custom_import_source("ws://127.0.0.1:9222/devtools/browser/test-session")
+
+    assert source.kind == "cdp"
+    assert source.cdp_url == "ws://127.0.0.1:9222/devtools/browser/test-session"
+
+
+def test_discover_persona_template_profiles_finds_chrome_profile_dirs(tmp_path):
+    personas_dir = tmp_path / "personas"
+    chrome_profile = personas_dir / "ExistingPersona" / "chrome_profile"
+    default_profile = chrome_profile / "Default"
+    default_profile.mkdir(parents=True)
+    (default_profile / "Preferences").write_text("{}")
+
+    discovered = discover_persona_template_profiles(personas_dir=personas_dir)
+
+    assert len(discovered) == 1
+    assert discovered[0].browser == "persona"
+    assert discovered[0].source_name == "ExistingPersona"
+    assert discovered[0].profile_dir == "Default"
+    assert discovered[0].user_data_dir == chrome_profile.resolve()
+
+
+def test_discover_persona_template_profiles_finds_home_abx_personas(monkeypatch, tmp_path):
+    from archivebox.config.constants import CONSTANTS
+
+    monkeypatch.setattr(CONSTANTS, "PERSONAS_DIR", tmp_path / "missing-data-personas")
+    monkeypatch.setattr("archivebox.personas.importers.Path.home", lambda: tmp_path)
+
+    chrome_profile = tmp_path / ".config" / "abx" / "personas" / "HomePersona" / "chrome_profile"
+    default_profile = chrome_profile / "Default"
+    default_profile.mkdir(parents=True)
+    (default_profile / "Preferences").write_text("{}")
+
+    discovered = discover_persona_template_profiles()
+
+    assert len(discovered) == 1
+    assert discovered[0].browser == "persona"
+    assert discovered[0].source_name == "HomePersona"
+    assert discovered[0].profile_dir == "Default"
+    assert discovered[0].user_data_dir == chrome_profile.resolve()
+
+
+def test_persona_admin_add_view_renders_import_ui(client, admin_user, monkeypatch, tmp_path):
+    source = _make_profile_source(tmp_path)
+    monkeypatch.setattr("archivebox.personas.forms.discover_local_browser_profiles", lambda: [source])
+    monkeypatch.setattr("archivebox.personas.admin.discover_local_browser_profiles", lambda: [source])
+
+    client.login(username="personaadmin", password="testpassword")
+    response = client.get(reverse("admin:personas_persona_add"), HTTP_HOST=ADMIN_HOST)
+
+    assert response.status_code == 200
+    assert b"Bootstrap a persona from a real browser session" in response.content
+    assert b"Google Chrome / Default" in response.content
+    assert b"auth.json" in response.content
+
+
+def test_import_persona_from_source_copies_user_agent_to_persona_config(admin_user, monkeypatch, tmp_path):
+    from archivebox.personas.models import Persona
+
+    source = _make_profile_source(tmp_path)
+    persona = Persona.objects.create(name="AgentPersona", created_by=admin_user)
+
+    def fake_export_browser_state(**kwargs):
+        return True, {"user_agent": "Mozilla/5.0 Test Imported UA"}, "ok"
+
+    monkeypatch.setattr("archivebox.personas.importers.export_browser_state", fake_export_browser_state)
+
+    result = import_persona_from_source(
+        persona,
+        source,
+        copy_profile=False,
+        import_cookies=False,
+        capture_storage=False,
+    )
+
+    persona.refresh_from_db()
+    assert result.user_agent_imported is True
+    assert persona.config["USER_AGENT"] == "Mozilla/5.0 Test Imported UA"
+
+
+def test_persona_admin_add_post_runs_shared_importer(client, admin_user, monkeypatch, tmp_path):
+    from archivebox.personas.models import Persona
+
+    source = _make_profile_source(tmp_path)
+    monkeypatch.setattr("archivebox.personas.forms.discover_local_browser_profiles", lambda: [source])
+    monkeypatch.setattr("archivebox.personas.admin.discover_local_browser_profiles", lambda: [source])
+
+    calls = {}
+
+    def fake_import(persona, selected_source, **kwargs):
+        calls["persona_name"] = persona.name
+        calls["source"] = selected_source
+        calls["kwargs"] = kwargs
+        (persona.path / "cookies.txt").parent.mkdir(parents=True, exist_ok=True)
+        (persona.path / "cookies.txt").write_text("# Netscape HTTP Cookie File\n")
+        (persona.path / "auth.json").write_text('{"TYPE":"auth","cookies":[],"localStorage":{},"sessionStorage":{}}\n')
+        return PersonaImportResult(
+            source=selected_source,
+            profile_copied=True,
+            cookies_imported=True,
+            storage_captured=True,
+        )
+
+    monkeypatch.setattr("archivebox.personas.forms.import_persona_from_source", fake_import)
+
+    client.login(username="personaadmin", password="testpassword")
+    response = client.post(
+        reverse("admin:personas_persona_add"),
+        {
+            "name": "ImportedPersona",
+            "created_by": str(admin_user.pk),
+            "config": "{}",
+            "import_mode": "discovered",
+            "import_discovered_profile": source.choice_value,
+            "import_copy_profile": "on",
+            "import_extract_cookies": "on",
+            "import_capture_storage": "on",
+            "_save": "Save",
+        },
+        HTTP_HOST=ADMIN_HOST,
+    )
+
+    assert response.status_code == 302
+    persona = Persona.objects.get(name="ImportedPersona")
+    assert calls["persona_name"] == "ImportedPersona"
+    assert calls["source"].profile_dir == "Default"
+    assert calls["kwargs"] == {
+        "copy_profile": True,
+        "import_cookies": True,
+        "capture_storage": True,
+    }
+    assert persona.COOKIES_FILE.endswith("cookies.txt")
+    assert persona.AUTH_STORAGE_FILE.endswith("auth.json")
--- a/archivebox/tests/test_runner.py
+++ b/archivebox/tests/test_runner.py
@@ -0,0 +1,640 @@
+import asyncio
+import subprocess
+from types import SimpleNamespace
+
+import pytest
+from django.test import RequestFactory
+
+
+pytestmark = pytest.mark.django_db
+
+
+class _DummyBus:
+    def __init__(self, name: str):
+        self.name = name
+
+    async def stop(self):
+        return None
+
+
+class _DummyService:
+    def __init__(self, *args, **kwargs):
+        pass
+
+
+class _DummyAbxServices:
+    def __init__(self):
+        self.process = SimpleNamespace(wait_for_background_monitors=self._wait)
+
+    async def _wait(self):
+        return None
+
+
+async def _call_sync(func, *args, **kwargs):
+    return func(*args, **kwargs)
+
+
+def test_run_snapshot_uses_isolated_bus_per_snapshot(monkeypatch):
+    from archivebox.base_models.models import get_or_create_system_user_pk
+    from archivebox.crawls.models import Crawl
+    from archivebox.core.models import Snapshot
+    from archivebox.services import runner as runner_module
+
+    crawl = Crawl.objects.create(
+        urls='https://blog.sweeting.me\nhttps://sweeting.me',
+        created_by_id=get_or_create_system_user_pk(),
+    )
+    snapshot_a = Snapshot.objects.create(
+        url='https://blog.sweeting.me',
+        crawl=crawl,
+        status=Snapshot.StatusChoices.QUEUED,
+    )
+    snapshot_b = Snapshot.objects.create(
+        url='https://sweeting.me',
+        crawl=crawl,
+        status=Snapshot.StatusChoices.QUEUED,
+    )
+
+    created_buses: list[_DummyBus] = []
+
+    def fake_create_bus(*, name, total_timeout=3600.0, **kwargs):
+        bus = _DummyBus(name)
+        created_buses.append(bus)
+        return bus
+
+    monkeypatch.setattr(runner_module, 'create_bus', fake_create_bus)
+    monkeypatch.setattr(runner_module, 'discover_plugins', lambda: {})
+    monkeypatch.setattr(runner_module, 'ProcessService', _DummyService)
+    monkeypatch.setattr(runner_module, 'MachineService', _DummyService)
+    monkeypatch.setattr(runner_module, 'BinaryService', _DummyService)
+    monkeypatch.setattr(runner_module, 'TagService', _DummyService)
+    monkeypatch.setattr(runner_module, 'CrawlService', _DummyService)
+    monkeypatch.setattr(runner_module, 'SnapshotService', _DummyService)
+    monkeypatch.setattr(runner_module, 'ArchiveResultService', _DummyService)
+    monkeypatch.setattr(runner_module, 'setup_abx_services', lambda *args, **kwargs: _DummyAbxServices())
+
+    download_calls = []
+
+    async def fake_download(*, url, bus, config_overrides, snapshot, **kwargs):
+        download_calls.append(
+            {
+                'url': url,
+                'bus': bus,
+                'snapshot_id': config_overrides['SNAPSHOT_ID'],
+                'source_url': config_overrides['SOURCE_URL'],
+                'abx_snapshot_id': snapshot.id,
+            }
+        )
+        await asyncio.sleep(0)
+        return []
+
+    monkeypatch.setattr(runner_module, 'download', fake_download)
+
+    crawl_runner = runner_module.CrawlRunner(crawl)
+    snapshot_data = {
+        str(snapshot_a.id): {
+            'id': str(snapshot_a.id),
+            'url': snapshot_a.url,
+            'title': snapshot_a.title,
+            'timestamp': snapshot_a.timestamp,
+            'bookmarked_at': snapshot_a.bookmarked_at.isoformat() if snapshot_a.bookmarked_at else "",
+            'created_at': snapshot_a.created_at.isoformat() if snapshot_a.created_at else "",
+            'tags': snapshot_a.tags_str(),
+            'depth': snapshot_a.depth,
+            'parent_snapshot_id': str(snapshot_a.parent_snapshot_id) if snapshot_a.parent_snapshot_id else None,
+            'output_dir': str(snapshot_a.output_dir),
+            'config': crawl_runner._snapshot_config(snapshot_a),
+        },
+        str(snapshot_b.id): {
+            'id': str(snapshot_b.id),
+            'url': snapshot_b.url,
+            'title': snapshot_b.title,
+            'timestamp': snapshot_b.timestamp,
+            'bookmarked_at': snapshot_b.bookmarked_at.isoformat() if snapshot_b.bookmarked_at else "",
+            'created_at': snapshot_b.created_at.isoformat() if snapshot_b.created_at else "",
+            'tags': snapshot_b.tags_str(),
+            'depth': snapshot_b.depth,
+            'parent_snapshot_id': str(snapshot_b.parent_snapshot_id) if snapshot_b.parent_snapshot_id else None,
+            'output_dir': str(snapshot_b.output_dir),
+            'config': crawl_runner._snapshot_config(snapshot_b),
+        },
+    }
+    monkeypatch.setattr(crawl_runner, '_load_snapshot_run_data', lambda snapshot_id: snapshot_data[snapshot_id])
+
+    async def run_both():
+        await asyncio.gather(
+            crawl_runner._run_snapshot(str(snapshot_a.id)),
+            crawl_runner._run_snapshot(str(snapshot_b.id)),
+        )
+
+    asyncio.run(run_both())
+
+    assert len(download_calls) == 2
+    assert {call['snapshot_id'] for call in download_calls} == {str(snapshot_a.id), str(snapshot_b.id)}
+    assert {call['source_url'] for call in download_calls} == {snapshot_a.url, snapshot_b.url}
+    assert len({id(call['bus']) for call in download_calls}) == 2
+    assert len(created_buses) == 3  # 1 crawl bus + 2 isolated snapshot buses
+
+
+def test_ensure_background_runner_starts_when_none_running(monkeypatch):
+    import archivebox.machine.models as machine_models
+    from archivebox.services import runner as runner_module
+
+    popen_calls = []
+
+    class DummyPopen:
+        def __init__(self, args, **kwargs):
+            popen_calls.append((args, kwargs))
+
+    monkeypatch.setattr(machine_models.Process, 'cleanup_stale_running', classmethod(lambda cls, machine=None: 0))
+    monkeypatch.setattr(machine_models.Machine, 'current', classmethod(lambda cls: SimpleNamespace(id='machine-1')))
+    monkeypatch.setattr(
+        machine_models.Process.objects,
+        'filter',
+        lambda **kwargs: SimpleNamespace(exists=lambda: False),
+    )
+    monkeypatch.setattr(runner_module.subprocess, 'Popen', DummyPopen)
+
+    started = runner_module.ensure_background_runner(allow_under_pytest=True)
+
+    assert started is True
+    assert len(popen_calls) == 1
+    assert popen_calls[0][0] == [runner_module.sys.executable, '-m', 'archivebox', 'run', '--daemon']
+    assert popen_calls[0][1]['stdin'] is subprocess.DEVNULL
+
+
+def test_ensure_background_runner_skips_when_orchestrator_running(monkeypatch):
+    import archivebox.machine.models as machine_models
+    from archivebox.services import runner as runner_module
+
+    monkeypatch.setattr(machine_models.Process, 'cleanup_stale_running', classmethod(lambda cls, machine=None: 0))
+    monkeypatch.setattr(machine_models.Machine, 'current', classmethod(lambda cls: SimpleNamespace(id='machine-1')))
+    monkeypatch.setattr(
+        machine_models.Process.objects,
+        'filter',
+        lambda **kwargs: SimpleNamespace(exists=lambda: True),
+    )
+    monkeypatch.setattr(
+        runner_module.subprocess,
+        'Popen',
+        lambda *args, **kwargs: (_ for _ in ()).throw(AssertionError('runner should not be spawned')),
+    )
+
+    started = runner_module.ensure_background_runner(allow_under_pytest=True)
+
+    assert started is False
+
+
+def test_runner_prepare_refreshes_network_interface_and_attaches_current_process(monkeypatch):
+    from archivebox.base_models.models import get_or_create_system_user_pk
+    from archivebox.crawls.models import Crawl
+    from archivebox.services import runner as runner_module
+
+    crawl = Crawl.objects.create(
+        urls='https://example.com',
+        created_by_id=get_or_create_system_user_pk(),
+    )
+
+    class _Iface:
+        id = 'iface-1'
+        machine = SimpleNamespace(id='machine-1')
+        machine_id = 'machine-1'
+
+    saved_updates = []
+
+    class _Proc:
+        iface_id = None
+        machine_id = 'machine-1'
+        iface = None
+        machine = None
+
+        def save(self, *, update_fields):
+            saved_updates.append(tuple(update_fields))
+
+    proc = _Proc()
+
+    monkeypatch.setattr(runner_module, 'discover_plugins', lambda: {})
+    monkeypatch.setattr(runner_module, 'create_bus', lambda **kwargs: _DummyBus(kwargs['name']))
+    monkeypatch.setattr(runner_module, 'ProcessService', _DummyService)
+    monkeypatch.setattr(runner_module, 'MachineService', _DummyService)
+    monkeypatch.setattr(runner_module, 'BinaryService', _DummyService)
+    monkeypatch.setattr(runner_module, 'TagService', _DummyService)
+    monkeypatch.setattr(runner_module, 'CrawlService', _DummyService)
+    monkeypatch.setattr(runner_module, 'SnapshotService', _DummyService)
+    monkeypatch.setattr(runner_module, 'ArchiveResultService', _DummyService)
+
+    from archivebox.machine.models import NetworkInterface, Process
+    from archivebox.config import configset as configset_module
+
+    refresh_calls = []
+    monkeypatch.setattr(NetworkInterface, 'current', classmethod(lambda cls, refresh=False: refresh_calls.append(refresh) or _Iface()))
+    monkeypatch.setattr(Process, 'current', classmethod(lambda cls: proc))
+    monkeypatch.setattr(configset_module, 'get_config', lambda **kwargs: {})
+
+    crawl_runner = runner_module.CrawlRunner(crawl)
+    crawl_runner._prepare()
+
+    assert refresh_calls == [True]
+    assert proc.iface is not None
+    assert proc.machine == proc.iface.machine
+    assert saved_updates == [('iface', 'machine', 'modified_at')]
+
+
+def test_create_crawl_api_queues_crawl_without_spawning_runner(monkeypatch):
+    from django.contrib.auth import get_user_model
+    from archivebox.api.v1_crawls import CrawlCreateSchema, create_crawl
+
+    user = get_user_model().objects.create_superuser(
+        username='runner-api-admin',
+        email='runner-api-admin@example.com',
+        password='testpassword',
+    )
+    request = RequestFactory().post('/api/v1/crawls')
+    request.user = user
+
+    crawl = create_crawl(
+        request,
+        CrawlCreateSchema(
+            urls=['https://example.com'],
+            max_depth=0,
+            tags=[],
+            tags_str='',
+            label='',
+            notes='',
+            config={},
+        ),
+    )
+
+    assert str(crawl.id)
+    assert crawl.status == 'queued'
+    assert crawl.retry_at is not None
+
+
+def test_crawl_runner_does_not_seal_unfinished_crawl(monkeypatch):
+    import asgiref.sync
+    from archivebox.base_models.models import get_or_create_system_user_pk
+    from archivebox.crawls.models import Crawl
+    from archivebox.core.models import Snapshot
+    from archivebox.services import runner as runner_module
+
+    crawl = Crawl.objects.create(
+        urls='https://example.com',
+        created_by_id=get_or_create_system_user_pk(),
+        status=Crawl.StatusChoices.STARTED,
+    )
+    snapshot = Snapshot.objects.create(
+        url='https://example.com',
+        crawl=crawl,
+        status=Snapshot.StatusChoices.STARTED,
+    )
+
+    monkeypatch.setattr(runner_module, '_attach_bus_trace', lambda bus: None)
+    monkeypatch.setattr(runner_module, '_stop_bus_trace', lambda bus: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module, 'setup_abx_services', lambda *args, **kwargs: _DummyAbxServices())
+    monkeypatch.setenv("DJANGO_ALLOW_ASYNC_UNSAFE", "true")
+    monkeypatch.setattr(
+        asgiref.sync,
+        'sync_to_async',
+        lambda func, thread_sensitive=True: (lambda *args, **kwargs: _call_sync(func, *args, **kwargs)),
+    )
+    monkeypatch.setattr(Crawl.objects, 'get', lambda id: crawl)
+    monkeypatch.setattr(crawl, 'is_finished', lambda: False)
+    monkeypatch.setattr(crawl, 'save', lambda *args, **kwargs: None)
+    monkeypatch.setattr(runner_module.CrawlRunner, '_prepare', lambda self: None)
+    monkeypatch.setattr(runner_module.CrawlRunner, '_create_live_ui', lambda self: None)
+    monkeypatch.setattr(runner_module.CrawlRunner, '_initial_snapshot_ids', lambda self: [str(snapshot.id)])
+    monkeypatch.setattr(runner_module.CrawlRunner, '_run_crawl_setup', lambda self, snapshot_id: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, 'enqueue_snapshot', lambda self, snapshot_id: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, '_wait_for_snapshot_tasks', lambda self: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, '_run_crawl_cleanup', lambda self, snapshot_id: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, '_cleanup_persona', lambda self: None)
+
+    asyncio.run(runner_module.CrawlRunner(crawl, snapshot_ids=[str(snapshot.id)]).run())
+
+    assert crawl.status != Crawl.StatusChoices.SEALED
+    assert crawl.retry_at is not None
+
+
+def test_crawl_runner_finalizes_with_sync_to_async_for_is_finished(monkeypatch):
+    import asgiref.sync
+    from archivebox.base_models.models import get_or_create_system_user_pk
+    from archivebox.crawls.models import Crawl
+    from archivebox.core.models import Snapshot
+    from archivebox.services import runner as runner_module
+
+    crawl = Crawl.objects.create(
+        urls='https://example.com',
+        created_by_id=get_or_create_system_user_pk(),
+        status=Crawl.StatusChoices.STARTED,
+    )
+    snapshot = Snapshot.objects.create(
+        url='https://example.com',
+        crawl=crawl,
+        status=Snapshot.StatusChoices.STARTED,
+    )
+
+    monkeypatch.setattr(runner_module, 'create_bus', lambda *args, **kwargs: _DummyBus('runner'))
+    monkeypatch.setattr(runner_module, 'discover_plugins', lambda: {})
+    monkeypatch.setattr(runner_module, 'ProcessService', _DummyService)
+    monkeypatch.setattr(runner_module, 'MachineService', _DummyService)
+    monkeypatch.setattr(runner_module, 'BinaryService', _DummyService)
+    monkeypatch.setattr(runner_module, 'TagService', _DummyService)
+    monkeypatch.setattr(runner_module, 'CrawlService', _DummyService)
+    monkeypatch.setattr(runner_module, 'SnapshotService', _DummyService)
+    monkeypatch.setattr(runner_module, 'ArchiveResultService', _DummyService)
+    monkeypatch.setattr(runner_module, '_attach_bus_trace', lambda bus: None)
+    monkeypatch.setattr(runner_module, '_stop_bus_trace', lambda bus: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module, 'setup_abx_services', lambda *args, **kwargs: _DummyAbxServices())
+    monkeypatch.setattr(Crawl.objects, 'get', lambda id: crawl)
+    monkeypatch.setattr(crawl, 'save', lambda *args, **kwargs: None)
+    monkeypatch.setattr(crawl, 'cleanup', lambda: None)
+    monkeypatch.setattr(runner_module.CrawlRunner, '_prepare', lambda self: None)
+    monkeypatch.setattr(runner_module.CrawlRunner, '_create_live_ui', lambda self: None)
+    monkeypatch.setattr(runner_module.CrawlRunner, '_initial_snapshot_ids', lambda self: [str(snapshot.id)])
+    monkeypatch.setattr(runner_module.CrawlRunner, '_run_crawl_setup', lambda self, snapshot_id: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, 'enqueue_snapshot', lambda self, snapshot_id: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, '_wait_for_snapshot_tasks', lambda self: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, '_run_crawl_cleanup', lambda self, snapshot_id: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, '_cleanup_persona', lambda self: None)
+
+    sync_to_async_wrapped: list[str] = []
+    sync_to_async_active = False
+
+    def fake_sync_to_async(func, thread_sensitive=True):
+        async def wrapper(*args, **kwargs):
+            nonlocal sync_to_async_active
+            sync_to_async_wrapped.append(getattr(func, '__name__', repr(func)))
+            previous = sync_to_async_active
+            sync_to_async_active = True
+            try:
+                return func(*args, **kwargs)
+            finally:
+                sync_to_async_active = previous
+        return wrapper
+
+    def guarded_is_finished():
+        assert sync_to_async_active is True
+        return False
+
+    monkeypatch.setattr(asgiref.sync, 'sync_to_async', fake_sync_to_async)
+    monkeypatch.setattr(crawl, 'is_finished', guarded_is_finished)
+
+    asyncio.run(runner_module.CrawlRunner(crawl, snapshot_ids=[str(snapshot.id)]).run())
+
+    crawl.refresh_from_db()
+    assert crawl.status == Crawl.StatusChoices.STARTED
+    assert crawl.retry_at is not None
+    assert 'guarded_is_finished' in sync_to_async_wrapped
+
+
+def test_wait_for_snapshot_tasks_surfaces_already_failed_task():
+    from archivebox.base_models.models import get_or_create_system_user_pk
+    from archivebox.crawls.models import Crawl
+    from archivebox.services import runner as runner_module
+
+    crawl = Crawl.objects.create(
+        urls='https://example.com',
+        created_by_id=get_or_create_system_user_pk(),
+    )
+    crawl_runner = runner_module.CrawlRunner(crawl)
+
+    async def run_test():
+        task = asyncio.get_running_loop().create_future()
+        task.set_exception(RuntimeError('snapshot failed'))
+        crawl_runner.snapshot_tasks['snap-1'] = task
+        with pytest.raises(RuntimeError, match='snapshot failed'):
+            await crawl_runner._wait_for_snapshot_tasks()
+
+    asyncio.run(run_test())
+
+
+def test_wait_for_snapshot_tasks_returns_after_completed_tasks_are_pruned():
+    from archivebox.base_models.models import get_or_create_system_user_pk
+    from archivebox.crawls.models import Crawl
+    from archivebox.services import runner as runner_module
+
+    crawl = Crawl.objects.create(
+        urls='https://example.com',
+        created_by_id=get_or_create_system_user_pk(),
+    )
+    crawl_runner = runner_module.CrawlRunner(crawl)
+
+    async def finish_snapshot() -> None:
+        await asyncio.sleep(0)
+
+    async def run_test():
+        task = asyncio.create_task(finish_snapshot())
+        crawl_runner.snapshot_tasks['snap-1'] = task
+        await asyncio.wait_for(crawl_runner._wait_for_snapshot_tasks(), timeout=0.5)
+        assert crawl_runner.snapshot_tasks == {}
+
+    asyncio.run(run_test())
+
+
+def test_crawl_runner_calls_crawl_cleanup_after_snapshot_phase(monkeypatch):
+    import asgiref.sync
+    from archivebox.base_models.models import get_or_create_system_user_pk
+    from archivebox.crawls.models import Crawl
+    from archivebox.core.models import Snapshot
+    from archivebox.services import runner as runner_module
+
+    crawl = Crawl.objects.create(
+        urls='https://example.com',
+        created_by_id=get_or_create_system_user_pk(),
+        status=Crawl.StatusChoices.STARTED,
+    )
+    snapshot = Snapshot.objects.create(
+        url='https://example.com',
+        crawl=crawl,
+        status=Snapshot.StatusChoices.STARTED,
+    )
+
+    monkeypatch.setattr(runner_module, '_attach_bus_trace', lambda bus: None)
+    monkeypatch.setattr(runner_module, '_stop_bus_trace', lambda bus: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module, 'setup_abx_services', lambda *args, **kwargs: _DummyAbxServices())
+    monkeypatch.setenv("DJANGO_ALLOW_ASYNC_UNSAFE", "true")
+    monkeypatch.setattr(
+        asgiref.sync,
+        'sync_to_async',
+        lambda func, thread_sensitive=True: (lambda *args, **kwargs: _call_sync(func, *args, **kwargs)),
+    )
+    monkeypatch.setattr(Crawl.objects, 'get', lambda id: crawl)
+    monkeypatch.setattr(crawl, 'is_finished', lambda: False)
+    monkeypatch.setattr(crawl, 'save', lambda *args, **kwargs: None)
+    monkeypatch.setattr(runner_module.CrawlRunner, '_prepare', lambda self: None)
+    monkeypatch.setattr(runner_module.CrawlRunner, '_create_live_ui', lambda self: None)
+    monkeypatch.setattr(runner_module.CrawlRunner, '_initial_snapshot_ids', lambda self: [str(snapshot.id)])
+    monkeypatch.setattr(runner_module.CrawlRunner, '_run_crawl_setup', lambda self, snapshot_id: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, 'enqueue_snapshot', lambda self, snapshot_id: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, '_wait_for_snapshot_tasks', lambda self: asyncio.sleep(0))
+    monkeypatch.setattr(runner_module.CrawlRunner, '_cleanup_persona', lambda self: None)
+
+    cleanup_calls = []
+    monkeypatch.setattr(runner_module.CrawlRunner, '_run_crawl_cleanup', lambda self, snapshot_id: cleanup_calls.append('abx_cleanup') or asyncio.sleep(0))
+    monkeypatch.setattr(crawl, 'cleanup', lambda: cleanup_calls.append('crawl_cleanup'))
+
+    asyncio.run(runner_module.CrawlRunner(crawl, snapshot_ids=[str(snapshot.id)]).run())
+
+    assert cleanup_calls == ['crawl_cleanup', 'abx_cleanup']
+
+
+def test_abx_process_service_background_monitor_finishes_after_process_exit(monkeypatch, tmp_path):
+    from abx_dl.models import Process as AbxProcess, now_iso
+    from abx_dl.services.process_service import ProcessService
+    from abx_dl.events import ProcessCompletedEvent
+
+    service = object.__new__(ProcessService)
+    service.emit_jsonl = False
+    emitted_events = []
+
+    async def fake_emit_event(event, *, detach_from_parent):
+        emitted_events.append((event, detach_from_parent))
+
+    async def fake_stream_stdout(**kwargs):
+        try:
+            await asyncio.Event().wait()
+        except asyncio.CancelledError:
+            return ["daemon output\n"]
+
+    service._emit_event = fake_emit_event
+    monkeypatch.setattr(service, '_stream_stdout', fake_stream_stdout)
+
+    class FakeAsyncProcess:
+        def __init__(self):
+            self.pid = 42424
+            self.returncode = None
+
+        async def wait(self):
+            await asyncio.sleep(0)
+            self.returncode = 0
+            return 0
+
+    plugin_output_dir = tmp_path / 'chrome'
+    plugin_output_dir.mkdir()
+    stdout_file = plugin_output_dir / 'on_Crawl__90_chrome_launch.daemon.bg.stdout.log'
+    stderr_file = plugin_output_dir / 'on_Crawl__90_chrome_launch.daemon.bg.stderr.log'
+    stderr_file.write_text('')
+    pid_file = plugin_output_dir / 'on_Crawl__90_chrome_launch.daemon.bg.pid'
+    pid_file.write_text('12345')
+
+    proc = AbxProcess(
+        cmd=['hook'],
+        pwd=str(plugin_output_dir),
+        timeout=60,
+        started_at=now_iso(),
+        plugin='chrome',
+        hook_name='on_Crawl__90_chrome_launch.daemon.bg',
+    )
+    process = FakeAsyncProcess()
+    event = SimpleNamespace(
+        plugin_name='chrome',
+        hook_name='on_Crawl__90_chrome_launch.daemon.bg',
+        hook_path='hook',
+        hook_args=['--url=https://example.org/'],
+        env={},
+        output_dir=str(plugin_output_dir),
+        timeout=60,
+        snapshot_id='snap-1',
+        is_background=True,
+    )
+
+    async def run_test():
+        await asyncio.wait_for(
+            service._monitor_background_process(
+                event=event,
+                proc=proc,
+                process=process,
+                plugin_output_dir=plugin_output_dir,
+                stdout_file=stdout_file,
+                stderr_file=stderr_file,
+                pid_file=pid_file,
+                files_before=set(),
+            ),
+            timeout=0.5,
+        )
+
+    asyncio.run(run_test())
+
+    assert pid_file.exists() is False
+    assert any(isinstance(event, ProcessCompletedEvent) for event, _ in emitted_events)
+
+
+def test_run_pending_crawls_runs_due_snapshot_in_place(monkeypatch):
+    from archivebox.base_models.models import get_or_create_system_user_pk
+    from archivebox.crawls.models import Crawl
+    from archivebox.core.models import Snapshot
+    from archivebox.services import runner as runner_module
+
+    crawl = Crawl.objects.create(
+        urls='https://example.com',
+        created_by_id=get_or_create_system_user_pk(),
+        status=Crawl.StatusChoices.SEALED,
+    )
+    snapshot = Snapshot.objects.create(
+        url='https://example.com',
+        crawl=crawl,
+        status=Snapshot.StatusChoices.QUEUED,
+        retry_at=runner_module.timezone.now(),
+    )
+
+    monkeypatch.setattr(type(snapshot), 'claim_processing_lock', lambda self, lock_seconds=60: True)
+    monkeypatch.setattr(type(crawl), 'claim_processing_lock', lambda self, lock_seconds=60: True)
+
+    run_calls: list[tuple[str, list[str] | None, bool]] = []
+    def fake_run_crawl(crawl_id, snapshot_ids=None, selected_plugins=None, process_discovered_snapshots_inline=True):
+        run_calls.append((crawl_id, snapshot_ids, process_discovered_snapshots_inline))
+        snapshot.status = Snapshot.StatusChoices.SEALED
+        snapshot.retry_at = None
+        snapshot.save(update_fields=['status', 'retry_at', 'modified_at'])
+
+    monkeypatch.setattr(runner_module, 'run_crawl', fake_run_crawl)
+
+    result = runner_module.run_pending_crawls(daemon=False)
+
+    assert result == 0
+    assert run_calls == [(str(crawl.id), [str(snapshot.id)], False)]
+
+
+def test_run_pending_crawls_prioritizes_new_queued_crawl_before_snapshot_backlog(monkeypatch):
+    from archivebox.base_models.models import get_or_create_system_user_pk
+    from archivebox.crawls.models import Crawl
+    from archivebox.core.models import Snapshot
+    from archivebox.services import runner as runner_module
+
+    older_crawl = Crawl.objects.create(
+        urls='https://older.example.com',
+        created_by_id=get_or_create_system_user_pk(),
+        status=Crawl.StatusChoices.STARTED,
+    )
+    older_snapshot = Snapshot.objects.create(
+        url='https://older.example.com',
+        crawl=older_crawl,
+        status=Snapshot.StatusChoices.QUEUED,
+        retry_at=runner_module.timezone.now(),
+    )
+    newer_crawl = Crawl.objects.create(
+        urls='https://newer.example.com',
+        created_by_id=get_or_create_system_user_pk(),
+        status=Crawl.StatusChoices.QUEUED,
+        retry_at=runner_module.timezone.now(),
+    )
+
+    monkeypatch.setattr(type(older_snapshot), 'claim_processing_lock', lambda self, lock_seconds=60: True)
+    monkeypatch.setattr(type(older_crawl), 'claim_processing_lock', lambda self, lock_seconds=60: True)
+    monkeypatch.setattr(type(newer_crawl), 'claim_processing_lock', lambda self, lock_seconds=60: True)
+
+    run_calls: list[tuple[str, list[str] | None, bool]] = []
+
+    class _StopScheduling(Exception):
+        pass
+
+    def fake_run_crawl(crawl_id, snapshot_ids=None, selected_plugins=None, process_discovered_snapshots_inline=True):
+        run_calls.append((crawl_id, snapshot_ids, process_discovered_snapshots_inline))
+        raise _StopScheduling
+
+    monkeypatch.setattr(runner_module, 'run_crawl', fake_run_crawl)
+
+    with pytest.raises(_StopScheduling):
+        runner_module.run_pending_crawls(daemon=False)
+
+    assert run_calls == [(str(newer_crawl.id), None, False)]
--- a/archivebox/tests/test_tag_admin.py
+++ b/archivebox/tests/test_tag_admin.py
@@ -0,0 +1,205 @@
+import json
+from datetime import datetime
+from typing import cast
+
+import pytest
+from django.contrib.auth import get_user_model
+from django.contrib.auth.models import UserManager
+from django.urls import reverse
+from django.utils import timezone
+
+
+pytestmark = pytest.mark.django_db
+
+
+User = get_user_model()
+ADMIN_HOST = 'admin.archivebox.localhost:8000'
+
+
+@pytest.fixture
+def admin_user(db):
+    return cast(UserManager, User.objects).create_superuser(
+        username='tagadmin',
+        email='tagadmin@test.com',
+        password='testpassword',
+    )
+
+
+@pytest.fixture
+def api_token(admin_user):
+    from archivebox.api.auth import get_or_create_api_token
+
+    token = get_or_create_api_token(admin_user)
+    assert token is not None
+    return token.token
+
+
+@pytest.fixture
+def crawl(admin_user):
+    from archivebox.crawls.models import Crawl
+
+    return Crawl.objects.create(
+        urls='https://example.com',
+        created_by=admin_user,
+    )
+
+
+@pytest.fixture
+def tagged_data(crawl, admin_user):
+    from archivebox.core.models import Snapshot, Tag
+
+    tag = Tag.objects.create(name='Alpha Research', created_by=admin_user)
+    first = Snapshot.objects.create(
+        url='https://example.com/one',
+        title='Example One',
+        crawl=crawl,
+    )
+    second = Snapshot.objects.create(
+        url='https://example.com/two',
+        title='Example Two',
+        crawl=crawl,
+    )
+    first.tags.add(tag)
+    second.tags.add(tag)
+    return tag, [first, second]
+
+
+def test_tag_admin_changelist_renders_custom_ui(client, admin_user, tagged_data):
+    client.login(username='tagadmin', password='testpassword')
+
+    response = client.get(reverse('admin:core_tag_changelist'), HTTP_HOST=ADMIN_HOST)
+
+    assert response.status_code == 200
+    assert b'id="tag-live-search"' in response.content
+    assert b'id="tag-sort-select"' in response.content
+    assert b'id="tag-created-by-select"' in response.content
+    assert b'id="tag-year-select"' in response.content
+    assert b'id="tag-has-snapshots-select"' in response.content
+    assert b'Alpha Research' in response.content
+    assert b'class="tag-card"' in response.content
+
+
+def test_tag_admin_add_view_renders_similar_tag_reference(client, admin_user):
+    client.login(username='tagadmin', password='testpassword')
+
+    response = client.get(reverse('admin:core_tag_add'), HTTP_HOST=ADMIN_HOST)
+
+    assert response.status_code == 200
+    assert b'Similar Tags' in response.content
+    assert b'data-tag-name-input="1"' in response.content
+
+
+def test_tag_search_api_returns_card_payload(client, api_token, tagged_data):
+    tag, snapshots = tagged_data
+
+    response = client.get(
+        reverse('api-1:search_tags'),
+        {'q': 'Alpha', 'api_key': api_token},
+        HTTP_HOST=ADMIN_HOST,
+    )
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload['sort'] == 'created_desc'
+    assert payload['created_by'] == ''
+    assert payload['year'] == ''
+    assert payload['has_snapshots'] == 'all'
+    assert payload['tags'][0]['id'] == tag.id
+    assert payload['tags'][0]['name'] == 'Alpha Research'
+    assert payload['tags'][0]['num_snapshots'] == 2
+    assert payload['tags'][0]['snapshots'][0]['title'] in {'Example One', 'Example Two'}
+    assert payload['tags'][0]['export_jsonl_url'].endswith(f'/api/v1/core/tag/{tag.id}/snapshots.jsonl')
+    assert payload['tags'][0]['filter_url'].endswith(f'/admin/core/snapshot/?tags__id__exact={tag.id}')
+    assert {snapshot['url'] for snapshot in payload['tags'][0]['snapshots']} == {snap.url for snap in snapshots}
+
+
+def test_tag_search_api_respects_sort_and_filters(client, api_token, admin_user, crawl, tagged_data):
+    from archivebox.core.models import Snapshot, Tag
+
+    other_user = cast(UserManager, User.objects).create_user(
+        username='tagother',
+        email='tagother@test.com',
+        password='unused',
+    )
+    tag_with_snapshots = tagged_data[0]
+    empty_tag = Tag.objects.create(name='Zulu Empty', created_by=other_user)
+    alpha_tag = Tag.objects.create(name='Alpha Empty', created_by=other_user)
+    Snapshot.objects.create(
+        url='https://example.com/three',
+        title='Example Three',
+        crawl=crawl,
+    ).tags.add(alpha_tag)
+
+    Tag.objects.filter(pk=empty_tag.pk).update(created_at=timezone.make_aware(datetime(2024, 1, 1, 12, 0, 0)))
+    Tag.objects.filter(pk=alpha_tag.pk).update(created_at=timezone.make_aware(datetime(2025, 1, 1, 12, 0, 0)))
+    Tag.objects.filter(pk=tag_with_snapshots.pk).update(created_at=timezone.make_aware(datetime(2026, 1, 1, 12, 0, 0)))
+
+    response = client.get(
+        reverse('api-1:search_tags'),
+        {
+            'sort': 'name_desc',
+            'created_by': str(other_user.pk),
+            'year': '2024',
+            'has_snapshots': 'no',
+            'api_key': api_token,
+        },
+        HTTP_HOST=ADMIN_HOST,
+    )
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload['sort'] == 'name_desc'
+    assert payload['created_by'] == str(other_user.pk)
+    assert payload['year'] == '2024'
+    assert payload['has_snapshots'] == 'no'
+    assert [tag['name'] for tag in payload['tags']] == ['Zulu Empty']
+
+
+def test_tag_rename_api_updates_slug(client, api_token, tagged_data):
+    tag, _ = tagged_data
+
+    response = client.post(
+        f"{reverse('api-1:rename_tag', args=[tag.id])}?api_key={api_token}",
+        data=json.dumps({'name': 'Alpha Archive'}),
+        content_type='application/json',
+        HTTP_HOST=ADMIN_HOST,
+    )
+
+    assert response.status_code == 200
+
+    tag.refresh_from_db()
+    assert tag.name == 'Alpha Archive'
+    assert tag.slug == 'alpha-archive'
+
+
+def test_tag_snapshots_export_returns_jsonl(client, api_token, tagged_data):
+    tag, _ = tagged_data
+
+    response = client.get(
+        reverse('api-1:tag_snapshots_export', args=[tag.id]),
+        {'api_key': api_token},
+        HTTP_HOST=ADMIN_HOST,
+    )
+
+    assert response.status_code == 200
+    assert response['Content-Type'].startswith('application/x-ndjson')
+    assert f'tag-{tag.slug}-snapshots.jsonl' in response['Content-Disposition']
+    body = response.content.decode()
+    assert '"type": "Snapshot"' in body
+    assert '"tags": "Alpha Research"' in body
+
+
+def test_tag_urls_export_returns_plain_text_urls(client, api_token, tagged_data):
+    tag, snapshots = tagged_data
+
+    response = client.get(
+        reverse('api-1:tag_urls_export', args=[tag.id]),
+        {'api_key': api_token},
+        HTTP_HOST=ADMIN_HOST,
+    )
+
+    assert response.status_code == 200
+    assert response['Content-Type'].startswith('text/plain')
+    assert f'tag-{tag.slug}-urls.txt' in response['Content-Disposition']
+    exported_urls = set(filter(None, response.content.decode().splitlines()))
+    assert exported_urls == {snapshot.url for snapshot in snapshots}
--- a/archivebox/tests/test_urls.py
+++ b/archivebox/tests/test_urls.py
@@ -55,6 +55,7 @@ def _build_script(body: str) -> str:
        get_admin_host,
        get_api_host,
        get_web_host,
+        get_public_host,
        get_snapshot_host,
        get_original_host,
        get_listen_subdomain,
@@ -198,6 +199,7 @@ class TestUrlRouting:
            web_host = get_web_host()
            admin_host = get_admin_host()
            api_host = get_api_host()
+            public_host = get_public_host()
            snapshot_host = get_snapshot_host(snapshot_id)
            original_host = get_original_host(domain)
            base_host = SERVER_CONFIG.LISTEN_HOST
@@ -208,6 +210,7 @@ class TestUrlRouting:
            assert web_host == "web.archivebox.localhost:8000"
            assert admin_host == "admin.archivebox.localhost:8000"
            assert api_host == "api.archivebox.localhost:8000"
+            assert public_host == "public.archivebox.localhost:8000"
            assert snapshot_host == f"{snapshot_id}.archivebox.localhost:8000"
            assert original_host == f"{domain}.archivebox.localhost:8000"
            assert get_listen_subdomain(web_host) == "web"
@@ -302,6 +305,20 @@ class TestUrlRouting:
            assert resp.status_code == 200
            assert response_body(resp) == response_file.read_bytes()

+            resp = client.get("/index.html", HTTP_HOST=snapshot_host)
+            assert resp.status_code == 200
+            snapshot_html = response_body(resp).decode("utf-8", "ignore")
+            assert f"http://{snapshot_host}/" in snapshot_html
+            assert "See all files..." in snapshot_html
+            assert ">WARC<" not in snapshot_html
+            assert ">Media<" not in snapshot_html
+            assert ">Git<" not in snapshot_html
+
+            resp = client.get("/?files=1", HTTP_HOST=snapshot_host)
+            assert resp.status_code == 200
+            files_html = response_body(resp).decode("utf-8", "ignore")
+            assert output_rel.split("/", 1)[0] in files_html
+
            print("OK")
            """
        )
@@ -479,6 +496,7 @@ class TestUrlRouting:
            snapshot_host = get_snapshot_host(snapshot_id)
            admin_host = get_admin_host()
            web_host = get_web_host()
+            public_host = get_public_host()

            client = Client()

@@ -491,10 +509,17 @@ class TestUrlRouting:
            assert resp.status_code == 200
            live_html = response_body(resp).decode("utf-8", "ignore")
            assert f"http://{snapshot_host}/" in live_html
-            assert "http://web.archivebox.localhost:8000" in live_html
+            assert f"http://{public_host}/static/archive.png" in live_html
+            assert ">WARC<" not in live_html
+            assert ">Media<" not in live_html
+            assert ">Git<" not in live_html

            static_html = Path(snapshot.output_dir, "index.html").read_text(encoding="utf-8", errors="ignore")
            assert f"http://{snapshot_host}/" in static_html
+            assert f"http://{public_host}/static/archive.png" in static_html
+            assert ">WARC<" not in static_html
+            assert ">Media<" not in static_html
+            assert ">Git<" not in static_html

            client.login(username="testadmin", password="testpassword")
            resp = client.get(f"/admin/core/snapshot/{snapshot_id}/change/", HTTP_HOST=admin_host)
--- a/archivebox/workers/management/commands/runner_watch.py
+++ b/archivebox/workers/management/commands/runner_watch.py
@@ -19,12 +19,19 @@ class Command(BaseCommand):

    def handle(self, *args, **kwargs):
        import os
-        import subprocess
-        import sys
        import time

+        import psutil
+
        from archivebox.config.common import STORAGE_CONFIG
        from archivebox.machine.models import Machine, Process
+        from archivebox.workers.supervisord_util import (
+            RUNNER_WORKER,
+            get_existing_supervisord_process,
+            get_worker,
+            start_worker,
+            stop_worker,
+        )

        pidfile = kwargs.get("pidfile") or os.environ.get("ARCHIVEBOX_RUNSERVER_PIDFILE")
        if not pidfile:
@@ -32,11 +39,38 @@ class Command(BaseCommand):

        interval = max(0.2, float(kwargs.get("interval", 1.0)))
        last_pid = None
-        runner_proc: subprocess.Popen[bytes] | None = None
+
+        def stop_duplicate_watchers() -> None:
+            current_pid = os.getpid()
+            for proc in psutil.process_iter(["pid", "cmdline"]):
+                if proc.info["pid"] == current_pid:
+                    continue
+                cmdline = proc.info.get("cmdline") or []
+                if not cmdline:
+                    continue
+                if "runner_watch" not in " ".join(cmdline):
+                    continue
+                if not any(str(arg) == f"--pidfile={pidfile}" or str(arg) == pidfile for arg in cmdline):
+                    continue
+                try:
+                    proc.terminate()
+                    proc.wait(timeout=2.0)
+                except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.TimeoutExpired):
+                    try:
+                        proc.kill()
+                    except (psutil.NoSuchProcess, psutil.AccessDenied):
+                        pass
+
+        def get_supervisor():
+            supervisor = get_existing_supervisord_process()
+            if supervisor is None:
+                raise RuntimeError("runner_watch requires a running supervisord process")
+            return supervisor
+
+        stop_duplicate_watchers()
+        start_worker(get_supervisor(), RUNNER_WORKER, lazy=True)

        def restart_runner() -> None:
-            nonlocal runner_proc
-
            Process.cleanup_stale_running()
            machine = Machine.current()

@@ -55,29 +89,18 @@ class Command(BaseCommand):
                except Exception:
                    continue

-            if runner_proc and runner_proc.poll() is None:
-                try:
-                    runner_proc.terminate()
-                    runner_proc.wait(timeout=2.0)
-                except Exception:
-                    try:
-                        runner_proc.kill()
-                    except Exception:
-                        pass
+            supervisor = get_supervisor()

-            runner_proc = subprocess.Popen(
-                [sys.executable, '-m', 'archivebox', 'run', '--daemon'],
-                stdout=subprocess.DEVNULL,
-                stderr=subprocess.DEVNULL,
-                start_new_session=True,
-            )
+            try:
+                stop_worker(supervisor, RUNNER_WORKER["name"])
+            except Exception:
+                pass
+
+            start_worker(supervisor, RUNNER_WORKER)

        def runner_running() -> bool:
-            return Process.objects.filter(
-                machine=Machine.current(),
-                status=Process.StatusChoices.RUNNING,
-                process_type=Process.TypeChoices.ORCHESTRATOR,
-            ).exists()
+            proc = get_worker(get_supervisor(), RUNNER_WORKER["name"])
+            return bool(proc and proc.get("statename") == "RUNNING")

        while True:
            try:
--- a/archivebox/workers/supervisord_util.py
+++ b/archivebox/workers/supervisord_util.py
@@ -6,6 +6,7 @@ import socket
 import psutil
 import shutil
 import subprocess
+import shlex

 from typing import Dict, cast, Iterator
 from pathlib import Path
@@ -29,24 +30,63 @@ WORKERS_DIR_NAME = "workers"
 # Global reference to supervisord process for cleanup
 _supervisord_proc = None

+
+def _shell_join(args: list[str]) -> str:
+    return shlex.join(args)
+
 RUNNER_WORKER = {
    "name": "worker_runner",
-    "command": "archivebox run --daemon",
-    "autostart": "true",
+    "command": _shell_join([sys.executable, "-m", "archivebox", "run", "--daemon"]),
+    "autostart": "false",
    "autorestart": "true",
    "stdout_logfile": "logs/worker_runner.log",
    "redirect_stderr": "true",
 }

+RUNNER_WATCH_WORKER = lambda pidfile: {
+    "name": "worker_runner_watch",
+    "command": _shell_join([sys.executable, "-m", "archivebox", "manage", "runner_watch", f"--pidfile={pidfile}"]),
+    "autostart": "false",
+    "autorestart": "true",
+    "stdout_logfile": "logs/worker_runner_watch.log",
+    "redirect_stderr": "true",
+}
+
 SERVER_WORKER = lambda host, port: {
    "name": "worker_daphne",
-    "command": f"{sys.executable} -m daphne --bind={host} --port={port} --application-close-timeout=600 archivebox.core.asgi:application",
+    "command": _shell_join([sys.executable, "-m", "daphne", f"--bind={host}", f"--port={port}", "--application-close-timeout=600", "archivebox.core.asgi:application"]),
    "autostart": "false",
    "autorestart": "true",
    "stdout_logfile": "logs/worker_daphne.log",
    "redirect_stderr": "true",
 }

+
+def RUNSERVER_WORKER(host: str, port: str, *, reload: bool, pidfile: str | None = None, nothreading: bool = False):
+    command = [sys.executable, "-m", "archivebox", "manage", "runserver", f"{host}:{port}"]
+    if not reload:
+        command.append("--noreload")
+    if nothreading:
+        command.append("--nothreading")
+
+    environment = ['ARCHIVEBOX_RUNSERVER="1"']
+    if reload:
+        assert pidfile, "RUNSERVER_WORKER requires a pidfile when reload=True"
+        environment.extend([
+            'ARCHIVEBOX_AUTORELOAD="1"',
+            f'ARCHIVEBOX_RUNSERVER_PIDFILE="{pidfile}"',
+        ])
+
+    return {
+        "name": "worker_runserver",
+        "command": _shell_join(command),
+        "environment": ",".join(environment),
+        "autostart": "false",
+        "autorestart": "true",
+        "stdout_logfile": "logs/worker_runserver.log",
+        "redirect_stderr": "true",
+    }
+
 def is_port_in_use(host: str, port: int) -> bool:
    """Check if a port is already in use."""
    try:
@@ -511,16 +551,30 @@ def watch_worker(supervisor, daemon_name, interval=5):



-def start_server_workers(host='0.0.0.0', port='8000', daemonize=False):
+def start_server_workers(host='0.0.0.0', port='8000', daemonize=False, debug=False, reload=False, nothreading=False):
+    from archivebox.config.common import STORAGE_CONFIG
+
    supervisor = get_or_create_supervisord_process(daemonize=daemonize)

-    bg_workers = [RUNNER_WORKER]
+    if debug:
+        pidfile = str(STORAGE_CONFIG.TMP_DIR / 'runserver.pid') if reload else None
+        server_worker = RUNSERVER_WORKER(host=host, port=port, reload=reload, pidfile=pidfile, nothreading=nothreading)
+        bg_workers: list[tuple[dict[str, str], bool]] = (
+            [(RUNNER_WORKER, True), (RUNNER_WATCH_WORKER(pidfile), False)] if reload else [(RUNNER_WORKER, False)]
+        )
+        log_files = ['logs/worker_runserver.log', 'logs/worker_runner.log']
+        if reload:
+            log_files.insert(1, 'logs/worker_runner_watch.log')
+    else:
+        server_worker = SERVER_WORKER(host=host, port=port)
+        bg_workers = [(RUNNER_WORKER, False)]
+        log_files = ['logs/worker_daphne.log', 'logs/worker_runner.log']

    print()
-    start_worker(supervisor, SERVER_WORKER(host=host, port=port))
+    start_worker(supervisor, server_worker)
    print()
-    for worker in bg_workers:
-        start_worker(supervisor, worker)
+    for worker, lazy in bg_workers:
+        start_worker(supervisor, worker, lazy=lazy)
    print()

    if not daemonize:
@@ -529,7 +583,7 @@ def start_server_workers(host='0.0.0.0', port='8000', daemonize=False):
            sys.stdout.write('Tailing worker logs (Ctrl+C to stop)...\n\n')
            sys.stdout.flush()
            tail_multiple_worker_logs(
-                log_files=['logs/worker_daphne.log', 'logs/worker_runner.log'],
+                log_files=log_files,
                follow=True,
                proc=_supervisord_proc,  # Stop tailing when supervisord exits
            )
--- a/archivebox/workers/tasks.py
+++ b/archivebox/workers/tasks.py
@@ -50,10 +50,11 @@ def bg_archive_snapshots(snapshots, kwargs: dict | None = None) -> int:
            Snapshot.objects.filter(id=snapshot.id).update(
                status=Snapshot.StatusChoices.QUEUED,
                retry_at=timezone.now(),
+                downloaded_at=None,
            )
            crawl_id = getattr(snapshot, 'crawl_id', None)
            if crawl_id:
-                Crawl.objects.filter(id=crawl_id).exclude(status=Crawl.StatusChoices.SEALED).update(
+                Crawl.objects.filter(id=crawl_id).update(
                    status=Crawl.StatusChoices.QUEUED,
                    retry_at=timezone.now(),
                )
@@ -75,10 +76,11 @@ def bg_archive_snapshot(snapshot, overwrite: bool = False, methods: list | None
        Snapshot.objects.filter(id=snapshot.id).update(
            status=Snapshot.StatusChoices.QUEUED,
            retry_at=timezone.now(),
+            downloaded_at=None,
        )
        crawl_id = getattr(snapshot, 'crawl_id', None)
        if crawl_id:
-            Crawl.objects.filter(id=crawl_id).exclude(status=Crawl.StatusChoices.SEALED).update(
+            Crawl.objects.filter(id=crawl_id).update(
                status=Crawl.StatusChoices.QUEUED,
                retry_at=timezone.now(),
            )
--- a/bin/release.sh
+++ b/bin/release.sh
@@ -1,36 +1,373 @@
 #!/usr/bin/env bash

-### Bash Environment Setup
-# http://redsymbol.net/articles/unofficial-bash-strict-mode/
-# https://www.gnu.org/software/bash/manual/html_node/The-Set-Builtin.html
-# set -o xtrace
-set -o errexit
-set -o errtrace
-set -o nounset
-set -o pipefail
-IFS=$'\n'
+set -Eeuo pipefail
+IFS=$'\n\t'

-REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
-cd "$REPO_DIR"
+REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+WORKSPACE_DIR="$(cd "${REPO_DIR}/.." && pwd)"
+cd "${REPO_DIR}"

+TAG_PREFIX="v"
+PYPI_PACKAGE="archivebox"

-# Run the linters and tests
-# ./bin/lint.sh
-# ./bin/test.sh
+source_optional_env() {
+    if [[ -f "${REPO_DIR}/.env" ]]; then
+        set -a
+        # shellcheck disable=SC1091
+        source "${REPO_DIR}/.env"
+        set +a
+    fi
+}

-# # Run all the build scripts
-# ./bin/build_git.sh
-# ./bin/build_docs.sh
-# ./bin/build_pip.sh
-# ./bin/build_docker.sh
+repo_slug() {
+    python3 - <<'PY'
+import re
+import subprocess

-# Push relase to public repositories
-# ./bin/release_docs.sh
-./bin/release_git.sh "$@"
-./bin/release_pip.sh "$@"
-./bin/release_deb.sh "$@"
-./bin/release_brew.sh "$@"
-./bin/release_docker.sh "$@"
+remote = subprocess.check_output(
+    ['git', 'remote', 'get-url', 'origin'],
+    text=True,
+).strip()

-VERSION="$(grep '^version = ' "${REPO_DIR}/pyproject.toml" | awk -F'"' '{print $2}')"
-echo "[√] Done. Published version v$VERSION"
+patterns = [
+    r'github\.com[:/](?P<slug>[^/]+/[^/.]+)(?:\.git)?$',
+    r'github\.com/(?P<slug>[^/]+/[^/.]+)(?:\.git)?$',
+]
+
+for pattern in patterns:
+    match = re.search(pattern, remote)
+    if match:
+        print(match.group('slug'))
+        raise SystemExit(0)
+
+raise SystemExit(f'Unable to parse GitHub repo slug from remote: {remote}')
+PY
+}
+
+default_branch() {
+    if [[ -n "${DEFAULT_BRANCH:-}" ]]; then
+        echo "${DEFAULT_BRANCH}"
+        return 0
+    fi
+    if git symbolic-ref refs/remotes/origin/HEAD >/dev/null 2>&1; then
+        git symbolic-ref refs/remotes/origin/HEAD | sed 's#^refs/remotes/origin/##'
+        return 0
+    fi
+    git remote show origin | sed -n '/HEAD branch/s/.*: //p' | head -n 1
+}
+
+current_version() {
+    python3 - <<'PY'
+from pathlib import Path
+import json
+import re
+
+versions = []
+pyproject_text = Path('pyproject.toml').read_text()
+pyproject_match = re.search(r'^version = "([^"]+)"$', pyproject_text, re.MULTILINE)
+if pyproject_match:
+    versions.append(pyproject_match.group(1))
+
+package_json = json.loads(Path('etc/package.json').read_text())
+if 'version' in package_json:
+    versions.append(package_json['version'])
+
+def parse(version: str) -> tuple[int, int, int, int, int]:
+    match = re.fullmatch(r'(\d+)\.(\d+)\.(\d+)(?:-?rc(\d*))?$', version)
+    if not match:
+        raise SystemExit(f'Unsupported version format: {version}')
+    major, minor, patch, rc = match.groups()
+    rc_value = int(rc) if rc else (0 if 'rc' in version else 10_000)
+    return (int(major), int(minor), int(patch), 0 if 'rc' in version else 1, rc_value)
+
+print(max(versions, key=parse))
+PY
+}
+
+bump_version() {
+    python3 - <<'PY'
+from pathlib import Path
+import json
+import re
+
+def parse(version: str) -> tuple[int, int, int, int, int]:
+    match = re.fullmatch(r'(\d+)\.(\d+)\.(\d+)(?:-?rc(\d*))?$', version)
+    if not match:
+        raise SystemExit(f'Unsupported version format: {version}')
+    major, minor, patch, rc = match.groups()
+    rc_value = int(rc) if rc else (0 if 'rc' in version else 10_000)
+    return (int(major), int(minor), int(patch), 0 if 'rc' in version else 1, rc_value)
+
+pyproject_path = Path('pyproject.toml')
+pyproject_text = pyproject_path.read_text()
+pyproject_match = re.search(r'^version = "([^"]+)"$', pyproject_text, re.MULTILINE)
+if not pyproject_match:
+    raise SystemExit('Failed to find version in pyproject.toml')
+
+package_path = Path('etc/package.json')
+package_json = json.loads(package_path.read_text())
+if 'version' not in package_json:
+    raise SystemExit('Failed to find version in etc/package.json')
+
+current_version = max([pyproject_match.group(1), package_json['version']], key=parse)
+match = re.fullmatch(r'(\d+)\.(\d+)\.(\d+)(?:-?rc(\d*))?$', current_version)
+major, minor, patch, rc = match.groups()
+if 'rc' in current_version:
+    rc_number = int(rc or '0') + 1
+    next_version = f'{major}.{minor}.{patch}rc{rc_number}'
+else:
+    next_version = f'{major}.{minor}.{int(patch) + 1}'
+
+pyproject_path.write_text(
+    re.sub(r'^version = "[^"]+"$', f'version = "{next_version}"', pyproject_text, count=1, flags=re.MULTILINE)
+)
+package_json['version'] = next_version
+package_path.write_text(json.dumps(package_json, indent=2) + '\n')
+print(next_version)
+PY
+}
+
+read_repo_version() {
+    local repo_dir="$1"
+    if [[ ! -f "${repo_dir}/pyproject.toml" ]]; then
+        return 1
+    fi
+
+    python3 - "${repo_dir}/pyproject.toml" <<'PY'
+from pathlib import Path
+import re
+import sys
+
+text = Path(sys.argv[1]).read_text()
+match = re.search(r'^version = "([^"]+)"$', text, re.MULTILINE)
+if not match:
+    raise SystemExit('Failed to find version')
+print(match.group(1))
+PY
+}
+
+update_internal_dependencies() {
+    local abxbus_version abx_pkg_version abx_plugins_version abx_dl_version
+
+    abxbus_version="$(read_repo_version "${WORKSPACE_DIR}/abxbus" || true)"
+    abx_pkg_version="$(read_repo_version "${WORKSPACE_DIR}/abx-pkg" || true)"
+    abx_plugins_version="$(read_repo_version "${WORKSPACE_DIR}/abx-plugins" || true)"
+    abx_dl_version="$(read_repo_version "${WORKSPACE_DIR}/abx-dl" || true)"
+
+    python3 - "${abxbus_version}" "${abx_pkg_version}" "${abx_plugins_version}" "${abx_dl_version}" <<'PY'
+from pathlib import Path
+import re
+import sys
+
+path = Path('pyproject.toml')
+text = path.read_text()
+for name, version in (
+    ('abxbus', sys.argv[1]),
+    ('abx-pkg', sys.argv[2]),
+    ('abx-plugins', sys.argv[3]),
+    ('abx-dl', sys.argv[4]),
+):
+    if version:
+        text = re.sub(rf'("{re.escape(name)}>=)[^"]+(")', rf'\g<1>{version}\2', text)
+path.write_text(text)
+PY
+}
+
+compare_versions() {
+    python3 - "$1" "$2" <<'PY'
+import re
+import sys
+
+def parse(version: str) -> tuple[int, int, int, int, int]:
+    match = re.fullmatch(r'(\d+)\.(\d+)\.(\d+)(?:-?rc(\d*))?$', version)
+    if not match:
+        raise SystemExit(f'Unsupported version format: {version}')
+    major, minor, patch, rc = match.groups()
+    return (int(major), int(minor), int(patch), 0 if 'rc' in version else 1, int(rc or '0'))
+
+left, right = sys.argv[1], sys.argv[2]
+if parse(left) > parse(right):
+    print('gt')
+elif parse(left) == parse(right):
+    print('eq')
+else:
+    print('lt')
+PY
+}
+
+latest_release_version() {
+    local slug="$1"
+    local raw_tags
+    raw_tags="$(gh api "repos/${slug}/releases?per_page=100" --jq '.[].tag_name' || true)"
+    RELEASE_TAGS="${raw_tags}" TAG_PREFIX_VALUE="${TAG_PREFIX}" python3 - <<'PY'
+import os
+import re
+
+def parse(version: str) -> tuple[int, int, int, int, int]:
+    match = re.fullmatch(r'(\d+)\.(\d+)\.(\d+)(?:-?rc(\d*))?$', version)
+    if not match:
+        return (-1, -1, -1, -1, -1)
+    major, minor, patch, rc = match.groups()
+    return (int(major), int(minor), int(patch), 0 if 'rc' in version else 1, int(rc or '0'))
+
+prefix = os.environ.get('TAG_PREFIX_VALUE', '')
+versions = [line.strip() for line in os.environ.get('RELEASE_TAGS', '').splitlines() if line.strip()]
+if prefix:
+    versions = [version[len(prefix):] if version.startswith(prefix) else version for version in versions]
+if not versions:
+    print('')
+else:
+    print(max(versions, key=parse))
+PY
+}
+
+wait_for_runs() {
+    local slug="$1"
+    local event="$2"
+    local sha="$3"
+    local label="$4"
+    local runs_json
+    local attempts=0
+
+    while :; do
+        runs_json="$(GH_FORCE_TTY=0 GH_PAGER=cat gh run list --repo "${slug}" --event "${event}" --commit "${sha}" --limit 20 --json databaseId,status,conclusion,workflowName)"
+        if [[ "$(jq 'length' <<<"${runs_json}")" -gt 0 ]]; then
+            break
+        fi
+        attempts=$((attempts + 1))
+        if [[ "${attempts}" -ge 30 ]]; then
+            echo "Timed out waiting for ${label} workflows to start" >&2
+            return 1
+        fi
+        sleep 10
+    done
+
+    while read -r run_id; do
+        gh run watch "${run_id}" --repo "${slug}" --exit-status
+    done < <(jq -r '.[].databaseId' <<<"${runs_json}")
+}
+
+wait_for_pypi() {
+    local package_name="$1"
+    local expected_version="$2"
+    local attempts=0
+    local published_version
+
+    while :; do
+        published_version="$(curl -fsSL "https://pypi.org/pypi/${package_name}/json" | jq -r '.info.version')"
+        if [[ "${published_version}" == "${expected_version}" ]]; then
+            return 0
+        fi
+        attempts=$((attempts + 1))
+        if [[ "${attempts}" -ge 30 ]]; then
+            echo "Timed out waiting for ${package_name}==${expected_version} on PyPI" >&2
+            return 1
+        fi
+        sleep 10
+    done
+}
+
+run_checks() {
+    uv sync --all-extras --all-groups --no-cache --upgrade
+    uv build --all
+}
+
+validate_release_state() {
+    local slug="$1"
+    local branch="$2"
+    local current latest relation
+
+    if [[ "$(git branch --show-current)" != "${branch}" ]]; then
+        echo "Skipping release-state validation on non-default branch $(git branch --show-current)"
+        return 0
+    fi
+
+    current="$(current_version)"
+    latest="$(latest_release_version "${slug}")"
+    if [[ -z "${latest}" ]]; then
+        echo "No published releases found for ${slug}; release state is valid"
+        return 0
+    fi
+
+    relation="$(compare_versions "${current}" "${latest}")"
+    if [[ "${relation}" == "lt" ]]; then
+        echo "Current version ${current} is behind latest published version ${latest}" >&2
+        return 1
+    fi
+
+    echo "Release state is valid: local=${current} latest=${latest}"
+}
+
+create_release() {
+    local slug="$1"
+    local version="$2"
+    local prerelease_args=()
+    if [[ "${version}" == *rc* ]]; then
+        prerelease_args+=(--prerelease)
+    fi
+
+    gh release create "${TAG_PREFIX}${version}" \
+        --repo "${slug}" \
+        --target "$(git rev-parse HEAD)" \
+        --title "${TAG_PREFIX}${version}" \
+        --generate-notes \
+        "${prerelease_args[@]}"
+}
+
+publish_artifacts() {
+    local version="$1"
+    local pypi_token="${UV_PUBLISH_TOKEN:-${PYPI_TOKEN:-${PYPI_PAT_SECRET:-}}}"
+
+    if [[ -n "${pypi_token}" ]]; then
+        UV_PUBLISH_TOKEN="${pypi_token}" uv publish --username=__token__ dist/*
+    elif [[ -n "${GITHUB_ACTIONS:-}" ]]; then
+        uv publish --trusted-publishing always dist/*
+    else
+        echo "Missing PyPI credentials: set UV_PUBLISH_TOKEN or PYPI_TOKEN" >&2
+        return 1
+    fi
+
+    wait_for_pypi "${PYPI_PACKAGE}" "${version}"
+}
+
+main() {
+    local slug branch version latest relation
+
+    source_optional_env
+    slug="$(repo_slug)"
+    branch="$(default_branch)"
+
+    if [[ "${GITHUB_EVENT_NAME:-}" == "push" ]]; then
+        validate_release_state "${slug}" "${branch}"
+        return 0
+    fi
+
+    if [[ "$(git branch --show-current)" != "${branch}" ]]; then
+        echo "Release must run from ${branch}, found $(git branch --show-current)" >&2
+        return 1
+    fi
+
+    update_internal_dependencies
+    version="$(bump_version)"
+    run_checks
+
+    git add -A
+    git commit -m "release: ${TAG_PREFIX}${version}"
+    git push origin "${branch}"
+
+    wait_for_runs "${slug}" push "$(git rev-parse HEAD)" "push"
+
+    publish_artifacts "${version}"
+    create_release "${slug}" "${version}"
+
+    latest="$(latest_release_version "${slug}")"
+    relation="$(compare_versions "${latest}" "${version}")"
+    if [[ "${relation}" != "eq" ]]; then
+        echo "GitHub release version mismatch: expected ${version}, got ${latest}" >&2
+        return 1
+    fi
+
+    echo "Released ${PYPI_PACKAGE} ${version}"
+}
+
+main "$@"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "archivebox"
-version = "0.9.10rc1"
+version = "0.9.10rc2"
 requires-python = ">=3.13"
 description = "Self-hosted internet archiving solution."
 authors = [{name = "Nick Sweeting", email = "pyproject.toml@archivebox.io"}]
@@ -78,9 +78,10 @@ dependencies = [
    "w3lib>=2.2.1",          # used for parsing content-type encoding from http response headers & html tags
    ### Extractor dependencies (optional binaries detected at runtime via shutil.which)
    ### Binary/Package Management
-    "abx-pkg>=1.9.14",        # for: detecting, versioning, and installing binaries via apt/brew/pip/npm
-    "abx-plugins>=1.9.18",    # shared ArchiveBox plugin package with install_args-only overrides
-    "abx-dl>=1.10.13",        # shared ArchiveBox downloader package with install_args-only overrides
+    "abxbus>=2.4.2",          # explicit direct dep so local dev env resolves sibling abxbus repo, matching abx-dl EventBus API
+    "abx-pkg>=1.9.18",        # for: detecting, versioning, and installing binaries via apt/brew/pip/npm
+    "abx-plugins>=1.10.14",   # shared ArchiveBox plugin package with install_args-only overrides
+    "abx-dl>=1.10.14",        # shared ArchiveBox downloader package with install_args-only overrides
    ### UUID7 backport for Python <3.14
    "uuid7>=0.1.0; python_version < '3.14'",  # provides the uuid_extensions module on Python 3.13
 ]
@@ -156,9 +157,11 @@ environments = ["sys_platform == 'darwin'", "sys_platform == 'linux'"]
 package = true
 # compile-bytecode = true

-[tool.uv.pip]
-python-version = "3.13"
-# compile-bytecode = true
+[tool.uv.sources]
+abxbus = { path = "../abxbus", editable = true }
+abx-pkg = { path = "../abx-pkg", editable = true }
+abx-plugins = { path = "../abx-plugins", editable = true }
+abx-dl = { path = "../abx-dl", editable = true }

 [build-system]
 requires = ["pdm-backend"]
--- a/uv.lock
+++ b/uv.lock
@@ -14,8 +14,8 @@ supported-markers = [

 [[package]]
 name = "abx-dl"
-version = "1.10.13"
-source = { registry = "https://pypi.org/simple" }
+version = "1.10.14"
+source = { editable = "../abx-dl" }
 dependencies = [
    { name = "abx-pkg", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "abx-plugins", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -27,44 +27,110 @@ dependencies = [
    { name = "rich", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "rich-click", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/10/de/d9d5a398f053f899fc62d45b9d21eb85412c6ca7d32099c25b9b43f84e32/abx_dl-1.10.13.tar.gz", hash = "sha256:f9fef6119691e07e1792593ed5bcd8de2f84df9d01e77966006d743593c611aa", size = 58200, upload-time = "2026-03-21T18:47:20.901Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/e7/11/670fbdc0afe2274893b63774643f6bb44f09d4975d3968cf394384af1306/abx_dl-1.10.13-py3-none-any.whl", hash = "sha256:cd4aab469563b1c7d9f9202161d94ba7de62cf31fbe924f6fe6f51ad051f4d70", size = 62597, upload-time = "2026-03-21T18:47:19.573Z" },
+
+[package.metadata]
+requires-dist = [
+    { name = "abx-pkg", editable = "../abx-pkg" },
+    { name = "abx-plugins", editable = "../abx-plugins" },
+    { name = "abxbus", editable = "../abxbus" },
+    { name = "flake8", marker = "extra == 'dev'", specifier = ">=7.1.1" },
+    { name = "flask", marker = "extra == 'dev'", specifier = ">=3.0" },
+    { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.11.2" },
+    { name = "platformdirs", specifier = ">=4.0.0" },
+    { name = "psutil", specifier = ">=7.2.1" },
+    { name = "pydantic", specifier = ">=2.0.0" },
+    { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" },
+    { name = "pytest-xdist", marker = "extra == 'dev'", specifier = ">=3.5.0" },
+    { name = "requests", specifier = ">=2.28.0" },
+    { name = "rich", specifier = ">=13.0.0" },
+    { name = "rich-click", specifier = ">=1.8.0" },
+    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.6.6" },
+]
+provides-extras = ["dev"]
+
+[package.metadata.requires-dev]
+dev = [
+    { name = "prek", specifier = ">=0.3.6" },
+    { name = "pyright", specifier = ">=1.1.408" },
+    { name = "ruff", specifier = ">=0.15.7" },
+    { name = "ty", specifier = ">=0.0.24" },
 ]

 [[package]]
 name = "abx-pkg"
-version = "1.9.14"
-source = { registry = "https://pypi.org/simple" }
+version = "1.9.18"
+source = { editable = "../abx-pkg" }
 dependencies = [
    { name = "pip", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "platformdirs", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pydantic", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/f9/6e/4465d44686b40ab0361d153160e2bd0167f588756518084308a8e8d08d8c/abx_pkg-1.9.14.tar.gz", hash = "sha256:b94d42cdbc6dde88635903cf14977b34e552d807a72c03d60f27f075deb59952", size = 146811, upload-time = "2026-03-21T07:44:12.158Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/56/af/8e65a23d44e5ccc069c344a7a626f575498b3c1a3ccacb17e941b36ecd35/abx_pkg-1.9.14-py3-none-any.whl", hash = "sha256:cf89dc4c5737e2078cb05fa7e33683718d540391a018445b6e54aa22666f25e0", size = 63511, upload-time = "2026-03-21T07:44:11.038Z" },
+
+[package.metadata]
+requires-dist = [
+    { name = "abx-pkg", extras = ["rich", "pyinfra", "ansible"], marker = "extra == 'all'" },
+    { name = "ansible", marker = "extra == 'ansible'", specifier = ">=12.3.0" },
+    { name = "ansible-core", marker = "extra == 'ansible'", specifier = ">=2.0.0" },
+    { name = "ansible-runner", marker = "extra == 'ansible'", specifier = ">=2.4.2" },
+    { name = "pip", specifier = ">=26.0.1" },
+    { name = "platformdirs", specifier = ">=4.9.2" },
+    { name = "pydantic", specifier = ">=2.12.5" },
+    { name = "pyinfra", marker = "extra == 'pyinfra'", specifier = ">=3.6.1" },
+    { name = "rich", marker = "extra == 'rich'", specifier = ">=14.0.0" },
+    { name = "typing-extensions", specifier = ">=4.15.0" },
+]
+provides-extras = ["rich", "pyinfra", "ansible", "all"]
+
+[package.metadata.requires-dev]
+dev = [
+    { name = "django", specifier = ">=4.0" },
+    { name = "django-admin-data-views", specifier = ">=0.3.1" },
+    { name = "django-jsonform", specifier = ">=2.22.0" },
+    { name = "django-pydantic-field", specifier = ">=0.3.9" },
+    { name = "django-stubs", specifier = ">=5.0.0" },
+    { name = "mypy", specifier = ">=1.19.1" },
+    { name = "prek", specifier = ">=0.3.6" },
+    { name = "pyright" },
+    { name = "pytest", specifier = ">=9.0.2" },
+    { name = "rich", specifier = ">=14.0.0" },
+    { name = "ruff", specifier = ">=0.15.7" },
+    { name = "ty", specifier = ">=0.0.24" },
 ]

 [[package]]
 name = "abx-plugins"
-version = "1.10.13"
-source = { registry = "https://pypi.org/simple" }
+version = "1.10.14"
+source = { editable = "../abx-plugins" }
 dependencies = [
    { name = "abx-pkg", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pydantic-settings", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "rich-click", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/2b/ea/7e70fa30a1e52039decd8b755b22549b8c51fb9d97cf54751b6fd1af7f2d/abx_plugins-1.10.13.tar.gz", hash = "sha256:945623afc6436894d26e8e27ce6101032b0c42655d5cbfaeeaa8a57913d0d46a", size = 525322, upload-time = "2026-03-21T17:39:10.142Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/29/25/d5338a5a7a1958916e7104727046ec01744da3fb28b1e30934480ab57f65/abx_plugins-1.10.13-py3-none-any.whl", hash = "sha256:79353763baf685871d52ea7e5fa8d0249937ec9edb2f63c7768b0c0a98d5518e", size = 731961, upload-time = "2026-03-21T17:39:11.713Z" },
+
+[package.metadata]
+requires-dist = [
+    { name = "abx-pkg", editable = "../abx-pkg" },
+    { name = "feedparser", marker = "extra == 'dev'", specifier = ">=6.0.0" },
+    { name = "jinja2", marker = "extra == 'dev'", specifier = ">=3.1.0" },
+    { name = "pydantic-settings", specifier = ">=2.0.0" },
+    { name = "pyright", marker = "extra == 'dev'", specifier = ">=1.1.408" },
+    { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.2" },
+    { name = "pytest-httpserver", marker = "extra == 'dev'", specifier = ">=1.1.0" },
+    { name = "requests", marker = "extra == 'dev'", specifier = ">=2.28.0" },
+    { name = "rich-click", specifier = ">=1.9.7" },
+    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.2" },
+    { name = "ty", marker = "extra == 'dev'", specifier = ">=0.0.18" },
 ]
+provides-extras = ["dev"]
+
+[package.metadata.requires-dev]
+dev = [{ name = "prek", specifier = ">=0.3.6" }]

 [[package]]
 name = "abxbus"
-version = "2.4.2"
-source = { registry = "https://pypi.org/simple" }
+version = "2.4.7"
+source = { editable = "../abxbus" }
 dependencies = [
    { name = "aiofiles", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "anyio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -73,9 +139,41 @@ dependencies = [
    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "uuid7", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/14/e5/ddf5dab0db243ddd9b193a4461a2d07f3d554b595c77e58af0beceb60eb2/abxbus-2.4.2.tar.gz", hash = "sha256:1c8056655decc81d28a8622f313109df9da36bde77175b0388a0ab9300b878a8", size = 114123, upload-time = "2026-03-20T21:09:35.643Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/86/c8/7815696415e66a7753112062a1357457f1cdd52d623964942f9086872dcb/abxbus-2.4.2-py3-none-any.whl", hash = "sha256:bd2058280fea91a021b604fdc32c4e4e690dfdee848fa50ea746cd786581f923", size = 110208, upload-time = "2026-03-20T21:09:33.942Z" },
+
+[package.metadata]
+requires-dist = [
+    { name = "aiofiles", specifier = ">=24.1.0" },
+    { name = "anyio", specifier = ">=4.9.0" },
+    { name = "asyncpg", marker = "extra == 'bridges'", specifier = ">=0.31.0" },
+    { name = "asyncpg", marker = "extra == 'postgres'", specifier = ">=0.31.0" },
+    { name = "nats-py", marker = "extra == 'bridges'", specifier = ">=2.13.1" },
+    { name = "nats-py", marker = "extra == 'nats'", specifier = ">=2.13.1" },
+    { name = "portalocker", specifier = ">=2.7.0" },
+    { name = "pydantic", specifier = ">=2.11.5" },
+    { name = "redis", marker = "extra == 'bridges'", specifier = ">=7.1.1" },
+    { name = "redis", marker = "extra == 'redis'", specifier = ">=7.1.1" },
+    { name = "typing-extensions", specifier = ">=4.12.2" },
+    { name = "uuid7", specifier = ">=0.1.0" },
+]
+provides-extras = ["postgres", "nats", "redis", "bridges"]
+
+[package.metadata.requires-dev]
+dev = [
+    { name = "build", specifier = ">=1.2.2" },
+    { name = "codespell", specifier = ">=2.4.1" },
+    { name = "fastapi", specifier = ">=0.118.0" },
+    { name = "ipdb", specifier = ">=0.13.13" },
+    { name = "prek", specifier = ">=0.3.3" },
+    { name = "psutil", specifier = ">=7.0.0" },
+    { name = "pyright", specifier = ">=1.1.404" },
+    { name = "pytest", specifier = ">=8.3.5" },
+    { name = "pytest-asyncio", specifier = ">=1.1.0" },
+    { name = "pytest-cov", specifier = ">=6.2.1" },
+    { name = "pytest-httpserver", specifier = ">=1.0.8" },
+    { name = "pytest-timeout", specifier = ">=2.4.0" },
+    { name = "pytest-xdist", specifier = ">=3.7.0" },
+    { name = "ruff", specifier = ">=0.15.1" },
+    { name = "ty", specifier = ">=0.0.1a19" },
 ]

 [[package]]
@@ -119,12 +217,13 @@ wheels = [

 [[package]]
 name = "archivebox"
-version = "0.9.10rc1"
+version = "0.9.10rc2"
 source = { editable = "." }
 dependencies = [
    { name = "abx-dl", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "abx-pkg", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "abx-plugins", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "abxbus", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "atomicwrites", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "base32-crockford", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "click", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -213,9 +312,10 @@ dev = [

 [package.metadata]
 requires-dist = [
-    { name = "abx-dl", specifier = ">=1.10.13" },
-    { name = "abx-pkg", specifier = ">=1.9.14" },
-    { name = "abx-plugins", specifier = ">=1.9.18" },
+    { name = "abx-dl", editable = "../abx-dl" },
+    { name = "abx-pkg", editable = "../abx-pkg" },
+    { name = "abx-plugins", editable = "../abx-plugins" },
+    { name = "abxbus", editable = "../abxbus" },
    { name = "archivebox", extras = ["sonic", "ldap", "debug"], marker = "extra == 'all'" },
    { name = "atomicwrites", specifier = "==1.4.1" },
    { name = "base32-crockford", specifier = ">=0.3.0" },
@@ -1856,16 +1956,16 @@ wheels = [

 [[package]]
 name = "pytest-cov"
-version = "7.0.0"
+version = "7.1.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "coverage", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pluggy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pytest", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/5e/f7/c933acc76f5208b3b00089573cf6a2bc26dc80a8aece8f52bb7d6b1855ca/pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1", size = 54328, upload-time = "2025-09-09T10:57:02.113Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/b1/51/a849f96e117386044471c8ec2bd6cfebacda285da9525c9106aeb28da671/pytest_cov-7.1.0.tar.gz", hash = "sha256:30674f2b5f6351aa09702a9c8c364f6a01c27aae0c1366ae8016160d1efc56b2", size = 55592, upload-time = "2026-03-21T20:11:16.284Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/7a/d968e294073affff457b041c2be9868a40c1c71f4a35fcc1e45e5493067b/pytest_cov-7.1.0-py3-none-any.whl", hash = "sha256:a0461110b7865f9a271aa1b51e516c9a95de9d696734a2f71e3e78f46e1d4678", size = 22876, upload-time = "2026-03-21T20:11:14.438Z" },
 ]

 [[package]]