WIP: checkpoint working tree before rebasing onto dev

2026-04-06 07:47:53 +10:00 · 2026-03-22 20:23:45 -07:00
parent a6548df8d0
commit f400a2cd67
87 changed files with 12607 additions and 1808 deletions
--- a/archivebox/core/admin_archiveresults.py
+++ b/archivebox/core/admin_archiveresults.py
@@ -1,14 +1,23 @@
 __package__ = 'archivebox.core'

+import html
+import json
 import os
+import shlex
 from pathlib import Path
+from urllib.parse import quote
+from functools import reduce
+from operator import and_

 from django.contrib import admin
+from django.db.models import Min, Q, TextField
+from django.db.models.functions import Cast
 from django.utils.html import format_html
 from django.utils.safestring import mark_safe
 from django.core.exceptions import ValidationError
 from django.urls import reverse, resolve
 from django.utils import timezone
+from django.utils.text import smart_split

 from archivebox.config import DATA_DIR
 from archivebox.config.common import SERVER_CONFIG
@@ -16,11 +25,71 @@ from archivebox.misc.paginators import AccelleratedPaginator
 from archivebox.base_models.admin import BaseModelAdmin
 from archivebox.hooks import get_plugin_icon
 from archivebox.core.host_utils import build_snapshot_url
+from archivebox.core.widgets import InlineTagEditorWidget
+from archivebox.core.views import LIVE_PLUGIN_BASE_URL


 from archivebox.core.models import ArchiveResult, Snapshot


+def _stringify_env_value(value) -> str:
+    if value is None:
+        return ''
+    if isinstance(value, str):
+        return value
+    return json.dumps(value, separators=(',', ':'))
+
+
+def _quote_shell_string(value: str) -> str:
+    return "'" + str(value).replace("'", "'\"'\"'") + "'"
+
+
+def _get_replay_source_url(result: ArchiveResult) -> str:
+    process_env = getattr(getattr(result, 'process', None), 'env', None) or {}
+    return str(process_env.get('SOURCE_URL') or result.snapshot.url or '')
+
+
+def build_abx_dl_display_command(result: ArchiveResult) -> str:
+    source_url = _get_replay_source_url(result)
+    plugin_name = str(result.plugin or '').strip()
+    if not plugin_name and not source_url:
+        return 'abx-dl'
+    if not source_url:
+        return f'abx-dl --plugins={plugin_name}'
+    return f'abx-dl --plugins={plugin_name} {_quote_shell_string(source_url)}'
+
+
+def build_abx_dl_replay_command(result: ArchiveResult) -> str:
+    display_command = build_abx_dl_display_command(result)
+    process = getattr(result, 'process', None)
+    env = getattr(process, 'env', None) or {}
+    env_items = ' '.join(
+        f'{key}={shlex.quote(_stringify_env_value(value))}'
+        for key, value in sorted(env.items())
+        if value is not None
+    )
+    snapshot_dir = shlex.quote(str(result.snapshot_dir))
+    if env_items:
+        return f'cd {snapshot_dir}; env {env_items} {display_command}'
+    return f'cd {snapshot_dir}; {display_command}'
+
+
+def get_plugin_admin_url(plugin_name: str) -> str:
+    from archivebox.hooks import BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR, iter_plugin_dirs
+
+    plugin_dir = next((path.resolve() for path in iter_plugin_dirs() if path.name == plugin_name), None)
+    if plugin_dir:
+        builtin_root = BUILTIN_PLUGINS_DIR.resolve()
+        if plugin_dir.is_relative_to(builtin_root):
+            return f'{LIVE_PLUGIN_BASE_URL}builtin.{quote(plugin_name)}/'
+
+        user_root = USER_PLUGINS_DIR.resolve()
+        if plugin_dir.is_relative_to(user_root):
+            return f'{LIVE_PLUGIN_BASE_URL}user.{quote(plugin_name)}/'
+
+    return f'{LIVE_PLUGIN_BASE_URL}builtin.{quote(plugin_name)}/'
+
+
 def render_archiveresults_list(archiveresults_qs, limit=50):
    """Render a nice inline list view of archive results with status, plugin, output, and actions."""

@@ -35,6 +104,9 @@ def render_archiveresults_list(archiveresults_qs, limit=50):
        'failed': ('#991b1b', '#fee2e2'),       # red
        'queued': ('#6b7280', '#f3f4f6'),       # gray
        'started': ('#92400e', '#fef3c7'),      # amber
+        'backoff': ('#92400e', '#fef3c7'),
+        'skipped': ('#475569', '#f1f5f9'),
+        'noresults': ('#475569', '#f1f5f9'),
    }

    rows = []
@@ -54,8 +126,10 @@ def render_archiveresults_list(archiveresults_qs, limit=50):
        if len(full_output) > 60:
            output_display += '...'

-        # Get full command as tooltip
-        cmd_str = ' '.join(result.cmd) if isinstance(result.cmd, list) else str(result.cmd or '-')
+        display_cmd = build_abx_dl_display_command(result)
+        replay_cmd = build_abx_dl_replay_command(result)
+        cmd_str_escaped = html.escape(display_cmd)
+        cmd_attr = html.escape(replay_cmd, quote=True)

        # Build output link - use embed_path() which checks output_files first
        embed_path = result.embed_path() if hasattr(result, 'embed_path') else None
@@ -77,7 +151,7 @@ def render_archiveresults_list(archiveresults_qs, limit=50):
                    <a href="{reverse('admin:core_archiveresult_change', args=[result.id])}"
                       style="color: #2563eb; text-decoration: none; font-family: ui-monospace, monospace; font-size: 11px;"
                       title="View/edit archive result">
-                        <code>{str(result.id)[:8]}</code>
+                        <code>{str(result.id)[-8:]}</code>
                    </a>
                </td>
                <td style="padding: 10px 12px; white-space: nowrap;">
@@ -140,7 +214,15 @@ def render_archiveresults_list(archiveresults_qs, limit=50):
                            <div style="font-size: 11px; color: #64748b; margin-top: 8px;">
                                <b>Command:</b>
                            </div>
-                            <pre style="margin: 0; padding: 8px; background: #1e293b; border-radius: 4px; color: #e2e8f0; font-size: 11px; white-space: pre-wrap; word-break: break-all;">{cmd_str}</pre>
+                            <div style="position: relative; margin: 0; padding: 8px 56px 8px 8px; background: #1e293b; border-radius: 4px;">
+                                <button type="button"
+                                        data-command="{cmd_attr}"
+                                        onclick="(function(btn){{var text=btn.dataset.command||''; if(navigator.clipboard&&navigator.clipboard.writeText){{navigator.clipboard.writeText(text);}} else {{var ta=document.createElement('textarea'); ta.value=text; document.body.appendChild(ta); ta.select(); document.execCommand('copy'); document.body.removeChild(ta);}}}})(this); return false;"
+                                        style="position: absolute; top: 6px; right: 6px; padding: 2px 8px; border: 0; border-radius: 4px; background: #334155; color: #e2e8f0; font-size: 11px; cursor: pointer;">
+                                    Copy
+                                </button>
+                                <code title="{cmd_attr}" style="display: block; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; color: #e2e8f0; font-size: 11px;">{cmd_str_escaped}</code>
+                            </div>
                        </div>
                    </details>
                </td>
@@ -165,7 +247,7 @@ def render_archiveresults_list(archiveresults_qs, limit=50):
            <table style="width: 100%; border-collapse: collapse; font-size: 14px;">
                <thead>
                    <tr style="background: #f8fafc; border-bottom: 2px solid #e2e8f0;">
-                        <th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">ID</th>
+                        <th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">Details</th>
                        <th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">Status</th>
                        <th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; width: 32px;"></th>
                        <th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">Plugin</th>
@@ -193,7 +275,7 @@ class ArchiveResultInline(admin.TabularInline):
    extra = 0
    sort_fields = ('end_ts', 'plugin', 'output_str', 'status', 'cmd_version')
    readonly_fields = ('id', 'result_id', 'completed', 'command', 'version')
-    fields = ('start_ts', 'end_ts', *readonly_fields, 'plugin', 'cmd', 'cmd_version', 'pwd', 'status', 'retry_at', 'output_str')
+    fields = ('start_ts', 'end_ts', *readonly_fields, 'plugin', 'cmd', 'cmd_version', 'pwd', 'status', 'output_str')
    # exclude = ('id',)
    ordering = ('end_ts',)
    show_change_link = True
@@ -259,10 +341,11 @@ class ArchiveResultInline(admin.TabularInline):


 class ArchiveResultAdmin(BaseModelAdmin):
-    list_display = ('id', 'created_at', 'snapshot_info', 'tags_str', 'status', 'plugin_with_icon', 'cmd_str', 'output_str')
+    list_display = ('details_link', 'created_at', 'snapshot_info', 'tags_inline', 'status_badge', 'plugin_with_icon', 'process_link', 'machine_link', 'cmd_str', 'output_str_display')
+    list_display_links = None
    sort_fields = ('id', 'created_at', 'plugin', 'status')
-    readonly_fields = ('cmd', 'cmd_version', 'pwd', 'cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'output_summary', 'plugin_with_icon')
-    search_fields = ('id', 'snapshot__url', 'plugin', 'output_str', 'cmd_version', 'cmd', 'snapshot__timestamp')
+    readonly_fields = ('cmd', 'cmd_version', 'pwd', 'cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'output_summary', 'plugin_with_icon', 'process_link')
+    search_fields = ()
    autocomplete_fields = ['snapshot']

    fieldsets = (
@@ -271,7 +354,7 @@ class ArchiveResultAdmin(BaseModelAdmin):
            'classes': ('card', 'wide'),
        }),
        ('Plugin', {
-            'fields': ('plugin', 'plugin_with_icon', 'status', 'retry_at'),
+            'fields': ('plugin_with_icon', 'process_link', 'status'),
            'classes': ('card',),
        }),
        ('Timing', {
@@ -305,8 +388,61 @@ class ArchiveResultAdmin(BaseModelAdmin):
        self.request = request
        return super().change_view(request, object_id, form_url, extra_context)

+    def get_queryset(self, request):
+        return (
+            super()
+            .get_queryset(request)
+            .select_related('snapshot', 'process')
+            .prefetch_related('snapshot__tags')
+            .annotate(snapshot_first_tag=Min('snapshot__tags__name'))
+        )
+
+    def get_search_results(self, request, queryset, search_term):
+        if not search_term:
+            return queryset, False
+
+        queryset = queryset.annotate(
+            snapshot_id_text=Cast('snapshot__id', output_field=TextField()),
+            snapshot_crawl_id_text=Cast('snapshot__crawl_id', output_field=TextField()),
+            output_json_text=Cast('output_json', output_field=TextField()),
+            cmd_text=Cast('process__cmd', output_field=TextField()),
+        )
+
+        search_bits = [
+            bit[1:-1] if len(bit) >= 2 and bit[0] == bit[-1] and bit[0] in {'"', "'"} else bit
+            for bit in smart_split(search_term)
+        ]
+        search_bits = [bit.strip() for bit in search_bits if bit.strip()]
+        if not search_bits:
+            return queryset, False
+
+        filters = []
+        for bit in search_bits:
+            filters.append(
+                Q(snapshot_id_text__icontains=bit)
+                | Q(snapshot__url__icontains=bit)
+                | Q(snapshot__tags__name__icontains=bit)
+                | Q(snapshot_crawl_id_text__icontains=bit)
+                | Q(plugin__icontains=bit)
+                | Q(hook_name__icontains=bit)
+                | Q(output_str__icontains=bit)
+                | Q(output_json_text__icontains=bit)
+                | Q(cmd_text__icontains=bit)
+            )
+
+        return queryset.filter(reduce(and_, filters)).distinct(), True
+
+    @admin.display(description='Details', ordering='id')
+    def details_link(self, result):
+        return format_html(
+            '<a href="{}"><code>{}</code></a>',
+            reverse('admin:core_archiveresult_change', args=[result.id]),
+            str(result.id)[-8:],
+        )
+
    @admin.display(
-        description='Snapshot Info'
+        description='Snapshot',
+        ordering='snapshot__url',
    )
    def snapshot_info(self, result):
        snapshot_id = str(result.snapshot_id)
@@ -325,20 +461,83 @@ class ArchiveResultAdmin(BaseModelAdmin):
    def tags_str(self, result):
        return result.snapshot.tags_str()

+    @admin.display(description='Tags', ordering='snapshot_first_tag')
+    def tags_inline(self, result):
+        widget = InlineTagEditorWidget(snapshot_id=str(result.snapshot_id), editable=False)
+        tags_html = widget.render(
+            name=f'tags_{result.snapshot_id}',
+            value=result.snapshot.tags.all(),
+            attrs={'id': f'tags_{result.snapshot_id}'},
+            snapshot_id=str(result.snapshot_id),
+        )
+        return mark_safe(f'<span class="tags-inline-editor">{tags_html}</span>')
+
+    @admin.display(description='Status', ordering='status')
+    def status_badge(self, result):
+        status = result.status or ArchiveResult.StatusChoices.QUEUED
+        return format_html(
+            '<span class="status-badge {} status-{}">{}</span>',
+            status,
+            status,
+            result.get_status_display() or status,
+        )
+
    @admin.display(description='Plugin', ordering='plugin')
    def plugin_with_icon(self, result):
        icon = get_plugin_icon(result.plugin)
        return format_html(
-            '<span title="{}">{}</span> {}',
+            '<a href="{}" title="{}">{}</a> <a href="{}"><code>{}</code></a>',
+            get_plugin_admin_url(result.plugin),
            result.plugin,
            icon,
+            get_plugin_admin_url(result.plugin),
            result.plugin,
        )

-    def cmd_str(self, result):
+    @admin.display(description='Process', ordering='process__pid')
+    def process_link(self, result):
+        if not result.process_id:
+            return '-'
+        process_label = result.process.pid if result.process and result.process.pid else '-'
        return format_html(
-            '<pre>{}</pre>',
-            ' '.join(result.cmd) if isinstance(result.cmd, list) else str(result.cmd),
+            '<a href="{}"><code>{}</code></a>',
+            reverse('admin:machine_process_change', args=[result.process_id]),
+            process_label,
+        )
+
+    @admin.display(description='Machine', ordering='process__machine__hostname')
+    def machine_link(self, result):
+        if not result.process_id or not result.process or not result.process.machine_id:
+            return '-'
+        machine = result.process.machine
+        return format_html(
+            '<a href="{}"><code>{}</code> {}</a>',
+            reverse('admin:machine_machine_change', args=[machine.id]),
+            str(machine.id)[:8],
+            machine.hostname,
+        )
+
+    @admin.display(description='Command')
+    def cmd_str(self, result):
+        display_cmd = build_abx_dl_display_command(result)
+        replay_cmd = build_abx_dl_replay_command(result)
+        return format_html(
+            '''
+            <div style="position: relative; width: 300px; min-width: 300px; max-width: 300px; overflow: hidden; box-sizing: border-box;">
+                <button type="button"
+                        data-command="{}"
+                        onclick="(function(btn){{var text=btn.dataset.command||''; if(navigator.clipboard&&navigator.clipboard.writeText){{navigator.clipboard.writeText(text);}} else {{var ta=document.createElement('textarea'); ta.value=text; document.body.appendChild(ta); ta.select(); document.execCommand('copy'); document.body.removeChild(ta);}}}})(this); return false;"
+                        style="position: absolute; top: 6px; right: 6px; z-index: 1; padding: 2px 8px; border: 0; border-radius: 4px; background: #e2e8f0; color: #334155; font-size: 11px; cursor: pointer;">
+                    Copy
+                </button>
+                <code title="{}" style="display: block; width: 100%; max-width: 100%; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; padding: 8px 56px 8px 8px; background: #f8fafc; border: 1px solid #e2e8f0; border-radius: 6px; font-size: 11px; box-sizing: border-box;">
+                    {}
+                </code>
+            </div>
+            ''',
+            replay_cmd,
+            replay_cmd,
+            display_cmd,
        )

    def output_display(self, result):
@@ -352,6 +551,27 @@ class ArchiveResultAdmin(BaseModelAdmin):
            result.output_str,
        )

+    @admin.display(description='Output', ordering='output_str')
+    def output_str_display(self, result):
+        output_text = str(result.output_str or '').strip()
+        if not output_text:
+            return '-'
+
+        live_path = result.embed_path() if hasattr(result, 'embed_path') else None
+        if live_path:
+            return format_html(
+                '<a href="{}" title="{}"><code>{}</code></a>',
+                build_snapshot_url(str(result.snapshot_id), live_path),
+                output_text,
+                output_text,
+            )
+
+        return format_html(
+            '<span title="{}">{}</span>',
+            output_text,
+            output_text,
+        )
+
    def output_summary(self, result):
        snapshot_dir = Path(DATA_DIR) / str(result.pwd).split('data/', 1)[-1]
        output_html = format_html(
--- a/archivebox/core/admin_site.py
+++ b/archivebox/core/admin_site.py
@@ -61,12 +61,14 @@ def register_admin_site():
    from archivebox.crawls.admin import register_admin as register_crawls_admin
    from archivebox.api.admin import register_admin as register_api_admin
    from archivebox.machine.admin import register_admin as register_machine_admin
+    from archivebox.personas.admin import register_admin as register_personas_admin
    from archivebox.workers.admin import register_admin as register_workers_admin

    register_core_admin(archivebox_admin)
    register_crawls_admin(archivebox_admin)
    register_api_admin(archivebox_admin)
    register_machine_admin(archivebox_admin)
+    register_personas_admin(archivebox_admin)
    register_workers_admin(archivebox_admin)

    return archivebox_admin
--- a/archivebox/core/admin_snapshots.py
+++ b/archivebox/core/admin_snapshots.py
@@ -6,6 +6,7 @@ from pathlib import Path

 from django.contrib import admin, messages
 from django.urls import path
+from django.shortcuts import get_object_or_404, redirect
 from django.utils.html import format_html
 from django.utils.safestring import mark_safe
 from django.utils import timezone
@@ -14,6 +15,7 @@ from django.db.models.functions import Coalesce
 from django import forms
 from django.template import Template, RequestContext
 from django.contrib.admin.helpers import ActionForm
+from django.middleware.csrf import get_token

 from archivebox.config import DATA_DIR
 from archivebox.config.common import SERVER_CONFIG
@@ -24,7 +26,7 @@ from archivebox.search.admin import SearchResultsAdminMixin
 from archivebox.core.host_utils import build_snapshot_url, build_web_url

 from archivebox.base_models.admin import BaseModelAdmin, ConfigEditorMixin
-from archivebox.workers.tasks import bg_archive_snapshots, bg_add
+from archivebox.workers.tasks import bg_archive_snapshot, bg_archive_snapshots, bg_add

 from archivebox.core.models import Tag, Snapshot, ArchiveResult
 from archivebox.core.admin_archiveresults import render_archiveresults_list
@@ -215,10 +217,23 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
    def get_urls(self):
        urls = super().get_urls()
        custom_urls = [
-            path('grid/', self.admin_site.admin_view(self.grid_view), name='grid')
+            path('grid/', self.admin_site.admin_view(self.grid_view), name='grid'),
+            path('<path:object_id>/redo-failed/', self.admin_site.admin_view(self.redo_failed_view), name='core_snapshot_redo_failed'),
        ]
        return custom_urls + urls

+    def redo_failed_view(self, request, object_id):
+        snapshot = get_object_or_404(Snapshot, pk=object_id)
+
+        if request.method == 'POST':
+            queued = bg_archive_snapshot(snapshot, overwrite=False)
+            messages.success(
+                request,
+                f"Queued {queued} snapshot for re-archiving. The background runner will process it.",
+            )
+
+        return redirect(snapshot.admin_change_url)
+
    # def get_queryset(self, request):
    #     # tags_qs = SnapshotTag.objects.all().select_related('tag')
    #     # prefetch = Prefetch('snapshottag_set', queryset=tags_qs)
@@ -312,6 +327,8 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
    def admin_actions(self, obj):
        summary_url = build_web_url(f'/{obj.archive_path}')
        results_url = build_web_url(f'/{obj.archive_path}/index.html#all')
+        redo_failed_url = f'/admin/core/snapshot/{obj.pk}/redo-failed/'
+        csrf_token = get_token(self.request)
        return format_html(
            '''
            <div style="display: flex; flex-wrap: wrap; gap: 12px; align-items: center;">
@@ -344,13 +361,15 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
                   onmouseout="this.style.background='#eff6ff';">
                    🆕 Archive Now
                </a>
-                <a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #ecfdf5; border: 1px solid #a7f3d0; border-radius: 8px; color: #065f46; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
-                   href="/admin/core/snapshot/?id__exact={}"
-                   title="Redo failed extractors (missing outputs)"
-                   onmouseover="this.style.background='#d1fae5';"
-                   onmouseout="this.style.background='#ecfdf5';">
-                    🔁 Redo Failed
-                </a>
+                <form action="{}" method="post" style="display: inline-flex; margin: 0;">
+                    <input type="hidden" name="csrfmiddlewaretoken" value="{}">
+                    <button type="submit" class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #ecfdf5; border: 1px solid #a7f3d0; border-radius: 8px; color: #065f46; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s; cursor: pointer;"
+                       title="Redo failed extractors (missing outputs)"
+                       onmouseover="this.style.background='#d1fae5';"
+                       onmouseout="this.style.background='#ecfdf5';">
+                        🔁 Redo Failed
+                    </button>
+                </form>
                <a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #fffbeb; border: 1px solid #fde68a; border-radius: 8px; color: #92400e; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
                   href="/admin/core/snapshot/?id__exact={}"
                   title="Re-run all extractors (overwrite existing)"
@@ -367,14 +386,15 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
                </a>
            </div>
            <p style="margin-top: 12px; font-size: 12px; color: #64748b;">
-                <b>Tip:</b> Action buttons link to the list view with this snapshot pre-selected. Select it and use the action dropdown to execute.
+                <b>Tip:</b> Redo Failed runs immediately. The other action buttons link to the list view with this snapshot pre-selected.
            </p>
            ''',
            summary_url,
            results_url,
            obj.url,
            obj.pk,
-            obj.pk,
+            redo_failed_url,
+            csrf_token,
            obj.pk,
            obj.pk,
        )
--- a/archivebox/core/admin_tags.py
+++ b/archivebox/core/admin_tags.py
@@ -1,63 +1,74 @@
 __package__ = 'archivebox.core'

-from django.contrib import admin
+from urllib.parse import quote
+
+from django import forms
+from django.contrib import admin, messages
+from django.contrib.admin.options import IS_POPUP_VAR
+from django.http import HttpRequest, HttpResponseRedirect
+from django.urls import reverse
 from django.utils.html import format_html
 from django.utils.safestring import mark_safe

-from archivebox.misc.paginators import AccelleratedPaginator
 from archivebox.base_models.admin import BaseModelAdmin
-
 from archivebox.core.models import SnapshotTag, Tag
+from archivebox.core.tag_utils import (
+    TAG_HAS_SNAPSHOTS_CHOICES,
+    TAG_SORT_CHOICES,
+    build_tag_cards,
+    get_tag_creator_choices,
+    get_tag_year_choices,
+    normalize_created_by_filter,
+    normalize_created_year_filter,
+    normalize_has_snapshots_filter,
+    normalize_tag_sort,
+)
+from archivebox.core.host_utils import build_snapshot_url


 class TagInline(admin.TabularInline):
    model = SnapshotTag
-    # fk_name = 'snapshot'
    fields = ('id', 'tag')
    extra = 1
-    # min_num = 1
    max_num = 1000
    autocomplete_fields = (
        'tag',
    )
-    

-# class AutocompleteTags:
-#     model = Tag
-#     search_fields = ['name']
-#     name = 'name'
-#     # source_field = 'name'
-#     remote_field = Tag._meta.get_field('name')

-# class AutocompleteTagsAdminStub:
-#     name = 'admin'
-    
-    
-# class TaggedItemInline(admin.TabularInline):
-#     readonly_fields = ('object_link',)
-#     fields = ('id', 'tag', 'content_type', 'object_id', *readonly_fields)
-#     model = TaggedItem
-#     extra = 1
-#     show_change_link = True
-    
-#     @admin.display(description='object')
-#     def object_link(self, obj):
-#         obj = obj.content_type.get_object_for_this_type(pk=obj.object_id)
-#         return format_html('<a href="/admin/{}/{}/{}/change"><b>[{}]</b></a>', obj._meta.app_label, obj._meta.model_name, obj.pk, str(obj))
+class TagAdminForm(forms.ModelForm):
+    class Meta:
+        model = Tag
+        fields = '__all__'
+        widgets = {
+            'name': forms.TextInput(attrs={
+                'placeholder': 'research, receipts, product-design...',
+                'autocomplete': 'off',
+                'spellcheck': 'false',
+                'data-tag-name-input': '1',
+            }),
+        }
+
+    def clean_name(self):
+        name = (self.cleaned_data.get('name') or '').strip()
+        if not name:
+            raise forms.ValidationError('Tag name is required.')
+        return name
+

-    
 class TagAdmin(BaseModelAdmin):
-    list_display = ('created_at', 'created_by', 'id', 'name', 'num_snapshots', 'snapshots')
+    form = TagAdminForm
+    change_list_template = 'admin/core/tag/change_list.html'
+    change_form_template = 'admin/core/tag/change_form.html'
+    list_display = ('name', 'num_snapshots', 'created_at', 'created_by')
    list_filter = ('created_at', 'created_by')
-    sort_fields = ('name', 'slug', 'id', 'created_by', 'created_at')
-    readonly_fields = ('slug', 'id', 'created_at', 'modified_at', 'snapshots')
    search_fields = ('id', 'name', 'slug')
-    actions = ['delete_selected', 'merge_tags']
-    ordering = ['-created_at']
-    # inlines = [TaggedItemInline]
+    readonly_fields = ('slug', 'id', 'created_at', 'modified_at', 'snapshots')
+    actions = ['delete_selected']
+    ordering = ['name', 'id']

    fieldsets = (
-        ('Tag Info', {
+        ('Tag', {
            'fields': ('name', 'slug'),
            'classes': ('card',),
        }),
@@ -65,112 +76,137 @@ class TagAdmin(BaseModelAdmin):
            'fields': ('id', 'created_by', 'created_at', 'modified_at'),
            'classes': ('card',),
        }),
-        ('Snapshots', {
+        ('Recent Snapshots', {
            'fields': ('snapshots',),
            'classes': ('card', 'wide'),
        }),
    )

-    paginator = AccelleratedPaginator
+    add_fieldsets = (
+        ('Tag', {
+            'fields': ('name',),
+            'classes': ('card', 'wide'),
+        }),
+        ('Metadata', {
+            'fields': ('created_by',),
+            'classes': ('card',),
+        }),
+    )

+    def get_fieldsets(self, request: HttpRequest, obj: Tag | None = None):
+        return self.fieldsets if obj else self.add_fieldsets

-    def num_snapshots(self, tag):
+    def changelist_view(self, request: HttpRequest, extra_context=None):
+        query = (request.GET.get('q') or '').strip()
+        sort = normalize_tag_sort((request.GET.get('sort') or 'created_desc').strip())
+        created_by = normalize_created_by_filter((request.GET.get('created_by') or '').strip())
+        year = normalize_created_year_filter((request.GET.get('year') or '').strip())
+        has_snapshots = normalize_has_snapshots_filter((request.GET.get('has_snapshots') or 'all').strip())
+        extra_context = {
+            **(extra_context or {}),
+            'initial_query': query,
+            'initial_sort': sort,
+            'initial_created_by': created_by,
+            'initial_year': year,
+            'initial_has_snapshots': has_snapshots,
+            'tag_sort_choices': TAG_SORT_CHOICES,
+            'tag_has_snapshots_choices': TAG_HAS_SNAPSHOTS_CHOICES,
+            'tag_created_by_choices': get_tag_creator_choices(),
+            'tag_year_choices': get_tag_year_choices(),
+            'initial_tag_cards': build_tag_cards(
+                query=query,
+                request=request,
+                sort=sort,
+                created_by=created_by,
+                year=year,
+                has_snapshots=has_snapshots,
+            ),
+            'tag_search_api_url': reverse('api-1:search_tags'),
+            'tag_create_api_url': reverse('api-1:tags_create'),
+        }
+        return super().changelist_view(request, extra_context=extra_context)
+
+    def render_change_form(self, request, context, add=False, change=False, form_url='', obj=None):
+        current_name = (request.POST.get('name') or '').strip()
+        if not current_name and obj:
+            current_name = obj.name
+
+        similar_tag_cards = build_tag_cards(query=current_name, request=request, limit=12) if current_name else build_tag_cards(request=request, limit=12)
+        if obj:
+            similar_tag_cards = [card for card in similar_tag_cards if card['id'] != obj.pk]
+
+        context.update({
+            'tag_search_api_url': reverse('api-1:search_tags'),
+            'tag_similar_cards': similar_tag_cards,
+            'tag_similar_query': current_name,
+        })
+        return super().render_change_form(request, context, add=add, change=change, form_url=form_url, obj=obj)
+
+    def response_add(self, request: HttpRequest, obj: Tag, post_url_continue=None):
+        if IS_POPUP_VAR in request.POST or '_continue' in request.POST or '_addanother' in request.POST:
+            return super().response_add(request, obj, post_url_continue=post_url_continue)
+
+        self.message_user(request, f'Tag "{obj.name}" saved.', level=messages.SUCCESS)
+        return self._redirect_to_changelist(obj.name)
+
+    def response_change(self, request: HttpRequest, obj: Tag):
+        if IS_POPUP_VAR in request.POST or '_continue' in request.POST or '_addanother' in request.POST or '_saveasnew' in request.POST:
+            return super().response_change(request, obj)
+
+        self.message_user(request, f'Tag "{obj.name}" updated.', level=messages.SUCCESS)
+        return self._redirect_to_changelist(obj.name)
+
+    def _redirect_to_changelist(self, query: str = '') -> HttpResponseRedirect:
+        changelist_url = reverse('admin:core_tag_changelist')
+        if query:
+            changelist_url = f'{changelist_url}?q={quote(query)}'
+        return HttpResponseRedirect(changelist_url)
+
+    @admin.display(description='Snapshots')
+    def snapshots(self, tag: Tag):
+        snapshots = tag.snapshot_set.select_related('crawl__created_by').order_by('-downloaded_at', '-created_at', '-pk')[:10]
+        total_count = tag.snapshot_set.count()
+        if not snapshots:
+            return mark_safe(
+                f'<p style="margin:0;color:#64748b;">No snapshots use this tag yet. '
+                f'<a href="/admin/core/snapshot/?tags__id__exact={tag.id}">Open filtered snapshot list</a>.</p>'
+            )
+
+        cards = []
+        for snapshot in snapshots:
+            title = (snapshot.title or '').strip() or snapshot.url
+            cards.append(format_html(
+                '''
+                <a href="{}" style="display:flex;align-items:center;gap:10px;padding:10px 12px;border:1px solid #e2e8f0;border-radius:12px;background:#fff;text-decoration:none;color:#0f172a;">
+                    <img src="{}" alt="" style="width:18px;height:18px;border-radius:4px;flex:0 0 auto;" onerror="this.style.display='none'">
+                    <span style="min-width:0;">
+                        <strong style="display:block;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;">{}</strong>
+                        <code style="display:block;color:#64748b;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;">{}</code>
+                    </span>
+                </a>
+                ''',
+                reverse('admin:core_snapshot_change', args=[snapshot.pk]),
+                build_snapshot_url(str(snapshot.pk), 'favicon.ico'),
+                title[:120],
+                snapshot.url[:120],
+            ))
+
+        cards.append(format_html(
+            '<a href="/admin/core/snapshot/?tags__id__exact={}" style="display:inline-flex;margin-top:10px;font-weight:600;">View all {} tagged snapshots</a>',
+            tag.id,
+            total_count,
+        ))
+        return mark_safe('<div style="display:grid;gap:10px;">' + ''.join(cards) + '</div>')
+
+    @admin.display(description='Snapshots', ordering='num_snapshots')
+    def num_snapshots(self, tag: Tag):
+        count = getattr(tag, 'num_snapshots', tag.snapshot_set.count())
        return format_html(
            '<a href="/admin/core/snapshot/?tags__id__exact={}">{} total</a>',
            tag.id,
-            tag.snapshot_set.count(),
+            count,
        )

-    def snapshots(self, tag):
-        total_count = tag.snapshot_set.count()
-        return mark_safe('<br/>'.join(
-            format_html(
-                '<code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a></code> {}',
-                snap.pk,
-                snap.downloaded_at.strftime('%Y-%m-%d %H:%M') if snap.downloaded_at else 'pending...',
-                snap.url[:64],
-            )
-            for snap in tag.snapshot_set.order_by('-downloaded_at')[:10]
-        ) + (f'<br/><a href="/admin/core/snapshot/?tags__id__exact={tag.id}">{total_count} total snapshots...<a>'))
-
-    # def get_urls(self):
-    #     urls = super().get_urls()
-    #     custom_urls = [
-    #         path(
-    #             "merge-tags/",
-    #             self.admin_site.admin_view(self.merge_tags_view),
-    #             name="taggit_tag_merge_tags",
-    #         ),
-    #     ]
-    #     return custom_urls + urls
-
-    # @admin.action(description="Merge selected tags")
-    # def merge_tags(self, request, queryset):
-    #     selected = request.POST.getlist(admin.helpers.ACTION_CHECKBOX_NAME)
-    #     if not selected:
-    #         self.message_user(request, "Please select at least one tag.")
-    #         return redirect(request.get_full_path())
-
-    #     selected_tag_ids = ",".join(selected)
-    #     redirect_url = f"{request.get_full_path()}merge-tags/"
-
-    #     request.session["selected_tag_ids"] = selected_tag_ids
-
-    #     return redirect(redirect_url)
-
-    # def merge_tags_view(self, request):
-    #     selected_tag_ids = request.session.get("selected_tag_ids", "").split(",")
-    #     if request.method == "POST":
-    #         form = MergeTagsForm(request.POST)
-    #         if form.is_valid():
-    #             new_tag_name = form.cleaned_data["new_tag_name"]
-    #             new_tag, created = Tag.objects.get_or_create(name=new_tag_name)
-    #             with transaction.atomic():
-    #                 for tag_id in selected_tag_ids:
-    #                     tag = Tag.objects.get(id=tag_id)
-    #                     tagged_items = TaggedItem.objects.filter(tag=tag)
-    #                     for tagged_item in tagged_items:
-    #                         if TaggedItem.objects.filter(
-    #                             tag=new_tag,
-    #                             content_type=tagged_item.content_type,
-    #                             object_id=tagged_item.object_id,
-    #                         ).exists():
-    #                             # we have the new tag as well, so we can just
-    #                             # remove the tag association
-    #                             tagged_item.delete()
-    #                         else:
-    #                             # point this taggedItem to the new one
-    #                             tagged_item.tag = new_tag
-    #                             tagged_item.save()
-                        
-    #                     # delete the old tag
-    #                     if tag.id != new_tag.id:
-    #                         tag.delete()
-
-    #             self.message_user(request, "Tags have been merged", level="success")
-    #             # clear the selected_tag_ids from session after merge is complete
-    #             request.session.pop("selected_tag_ids", None)
-
-    #             return redirect("..")
-    #         else:
-    #             self.message_user(request, "Form is invalid.", level="error")
-
-    #     context = {
-    #         "form": MergeTagsForm(),
-    #         "selected_tag_ids": selected_tag_ids,
-    #     }
-    #     return render(request, "admin/taggit/merge_tags_form.html", context)
-
-
-# @admin.register(SnapshotTag, site=archivebox_admin)
-# class SnapshotTagAdmin(BaseModelAdmin):
-#     list_display = ('id', 'snapshot', 'tag')
-#     sort_fields = ('id', 'snapshot', 'tag')
-#     search_fields = ('id', 'snapshot_id', 'tag_id')
-#     fields = ('snapshot', 'id')
-#     actions = ['delete_selected']
-#     ordering = ['-id']
-

 def register_admin(admin_site):
    admin_site.register(Tag, TagAdmin)
--- a/archivebox/core/forms.py
+++ b/archivebox/core/forms.py
@@ -1,12 +1,16 @@
 __package__ = 'archivebox.core'

 from django import forms
+from django.utils.html import format_html

-from archivebox.misc.util import URL_REGEX
+from archivebox.misc.util import URL_REGEX, find_all_urls
 from taggit.utils import edit_string_for_tags, parse_tags
 from archivebox.base_models.admin import KeyValueWidget
 from archivebox.crawls.schedule_utils import validate_schedule
-from archivebox.hooks import get_plugins
+from archivebox.config.common import SEARCH_BACKEND_CONFIG
+from archivebox.core.widgets import TagEditorWidget, URLFiltersWidget
+from archivebox.hooks import get_plugins, discover_plugin_configs, get_plugin_icon
+from archivebox.personas.models import Persona

 DEPTH_CHOICES = (
    ('0', 'depth = 0 (archive just these URLs)'),
@@ -22,6 +26,22 @@ def get_plugin_choices():
    return [(name, name) for name in get_plugins()]


+def get_plugin_choice_label(plugin_name: str, plugin_configs: dict[str, dict]) -> str:
+    schema = plugin_configs.get(plugin_name, {})
+    description = str(schema.get('description') or '').strip()
+    if not description:
+        return plugin_name
+    icon_html = get_plugin_icon(plugin_name)
+
+    return format_html(
+        '<span class="plugin-choice-icon">{}</span><span class="plugin-choice-name">{}</span><a class="plugin-choice-description" href="https://archivebox.github.io/abx-plugins/#{}" target="_blank" rel="noopener noreferrer">{}</a>',
+        icon_html,
+        plugin_name,
+        plugin_name,
+        description,
+    )
+
+
 def get_choice_field(form: forms.Form, name: str) -> forms.ChoiceField:
    field = form.fields[name]
    if not isinstance(field, forms.ChoiceField):
@@ -31,22 +51,19 @@ def get_choice_field(form: forms.Form, name: str) -> forms.ChoiceField:

 class AddLinkForm(forms.Form):
    # Basic fields
-    url = forms.RegexField(
-        label="URLs (one per line)",
-        regex=URL_REGEX,
-        min_length=6,
+    url = forms.CharField(
+        label="URLs",
        strip=True,
-        widget=forms.Textarea,
+        widget=forms.Textarea(attrs={
+            'data-url-regex': URL_REGEX.pattern,
+        }),
        required=True
    )
    tag = forms.CharField(
-        label="Tags (comma separated tag1,tag2,tag3)",
+        label="Tags",
        strip=True,
        required=False,
-        widget=forms.TextInput(attrs={
-            'list': 'tag-datalist',
-            'autocomplete': 'off',
-        })
+        widget=TagEditorWidget(),
    )
    depth = forms.ChoiceField(
        label="Archive depth",
@@ -58,11 +75,15 @@ class AddLinkForm(forms.Form):
        label="Notes",
        strip=True,
        required=False,
-        widget=forms.Textarea(attrs={
-            'rows': 3,
-            'placeholder': 'Optional notes about this crawl (e.g., purpose, project name, context...)',
+        widget=forms.TextInput(attrs={
+            'placeholder': 'Optional notes about this crawl',
        })
    )
+    url_filters = forms.Field(
+        label="URL allowlist / denylist",
+        required=False,
+        widget=URLFiltersWidget(source_selector='textarea[name="url"]'),
+    )

    # Plugin groups
    chrome_plugins = forms.MultipleChoiceField(
@@ -111,24 +132,15 @@ class AddLinkForm(forms.Form):
            'placeholder': 'e.g., daily, weekly, 0 */6 * * * (every 6 hours)',
        })
    )
-    persona = forms.CharField(
+    persona = forms.ModelChoiceField(
        label="Persona (authentication profile)",
-        max_length=100,
-        initial='Default',
-        required=False,
-    )
-    overwrite = forms.BooleanField(
-        label="Overwrite existing snapshots",
-        initial=False,
-        required=False,
-    )
-    update = forms.BooleanField(
-        label="Update/retry previously failed URLs",
-        initial=False,
        required=False,
+        queryset=Persona.objects.none(),
+        empty_label=None,
+        to_field_name='name',
    )
    index_only = forms.BooleanField(
-        label="Index only (don't archive yet)",
+        label="Index only dry run (add crawl but don't archive yet)",
        initial=False,
        required=False,
    )
@@ -142,11 +154,13 @@ class AddLinkForm(forms.Form):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

-        # Import at runtime to avoid circular imports
-        from archivebox.config.common import ARCHIVING_CONFIG
+        default_persona = Persona.get_or_create_default()
+        self.fields['persona'].queryset = Persona.objects.order_by('name')
+        self.fields['persona'].initial = default_persona.name

        # Get all plugins
        all_plugins = get_plugins()
+        plugin_configs = discover_plugin_configs()

        # Define plugin groups
        chrome_dependent = {
@@ -170,26 +184,28 @@ class AddLinkForm(forms.Form):

        # Populate plugin field choices
        get_choice_field(self, 'chrome_plugins').choices = [
-            (p, p) for p in sorted(all_plugins) if p in chrome_dependent
+            (p, get_plugin_choice_label(p, plugin_configs)) for p in sorted(all_plugins) if p in chrome_dependent
        ]
        get_choice_field(self, 'archiving_plugins').choices = [
-            (p, p) for p in sorted(all_plugins) if p in archiving
+            (p, get_plugin_choice_label(p, plugin_configs)) for p in sorted(all_plugins) if p in archiving
        ]
        get_choice_field(self, 'parsing_plugins').choices = [
-            (p, p) for p in sorted(all_plugins) if p in parsing
+            (p, get_plugin_choice_label(p, plugin_configs)) for p in sorted(all_plugins) if p in parsing
        ]
        get_choice_field(self, 'search_plugins').choices = [
-            (p, p) for p in sorted(all_plugins) if p in search
+            (p, get_plugin_choice_label(p, plugin_configs)) for p in sorted(all_plugins) if p in search
        ]
        get_choice_field(self, 'binary_plugins').choices = [
-            (p, p) for p in sorted(all_plugins) if p in binary
+            (p, get_plugin_choice_label(p, plugin_configs)) for p in sorted(all_plugins) if p in binary
        ]
        get_choice_field(self, 'extension_plugins').choices = [
-            (p, p) for p in sorted(all_plugins) if p in extensions
+            (p, get_plugin_choice_label(p, plugin_configs)) for p in sorted(all_plugins) if p in extensions
        ]

-        # Set update default from config
-        self.fields['update'].initial = not ARCHIVING_CONFIG.ONLY_NEW
+        required_search_plugin = f'search_backend_{SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE}'.strip()
+        search_choices = [choice[0] for choice in get_choice_field(self, 'search_plugins').choices]
+        if required_search_plugin in search_choices:
+            get_choice_field(self, 'search_plugins').initial = [required_search_plugin]

    def clean(self):
        cleaned_data = super().clean() or {}
@@ -207,6 +223,23 @@ class AddLinkForm(forms.Form):

        return cleaned_data

+    def clean_url(self):
+        value = self.cleaned_data.get('url') or ''
+        urls = '\n'.join(find_all_urls(value))
+        if not urls:
+            raise forms.ValidationError('Enter at least one valid URL.')
+        return urls
+
+    def clean_url_filters(self):
+        from archivebox.crawls.models import Crawl
+
+        value = self.cleaned_data.get('url_filters') or {}
+        return {
+            'allowlist': '\n'.join(Crawl.split_filter_patterns(value.get('allowlist', ''))),
+            'denylist': '\n'.join(Crawl.split_filter_patterns(value.get('denylist', ''))),
+            'same_domain_only': bool(value.get('same_domain_only')),
+        }
+
    def clean_schedule(self):
        schedule = (self.cleaned_data.get('schedule') or '').strip()
        if not schedule:
--- a/archivebox/core/host_utils.py
+++ b/archivebox/core/host_utils.py
@@ -163,6 +163,10 @@ def get_api_base_url(request=None) -> str:
    return _build_base_url_for_host(get_api_host(), request=request)


+def get_public_base_url(request=None) -> str:
+    return _build_base_url_for_host(get_public_host(), request=request)
+
+
 # Backwards-compat aliases (archive == web)
 def get_archive_base_url(request=None) -> str:
    return get_web_base_url(request=request)
--- a/archivebox/core/migrations/0032_remove_archiveresult_retry_at.py
+++ b/archivebox/core/migrations/0032_remove_archiveresult_retry_at.py
@@ -0,0 +1,15 @@
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("core", "0031_add_archiveresult_snapshot_status_index"),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name="archiveresult",
+            name="retry_at",
+        ),
+    ]
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -36,7 +36,7 @@ from archivebox.base_models.models import (
 from archivebox.workers.models import ModelWithStateMachine, BaseStateMachine
 from archivebox.workers.tasks import bg_archive_snapshot
 from archivebox.crawls.models import Crawl
-from archivebox.machine.models import NetworkInterface, Binary
+from archivebox.machine.models import Binary



@@ -60,32 +60,41 @@ class Tag(ModelWithUUID):
    def __str__(self):
        return self.name

+    def _generate_unique_slug(self) -> str:
+        base_slug = slugify(self.name) or 'tag'
+        existing = Tag.objects.filter(slug__startswith=base_slug)
+        if self.pk:
+            existing = existing.exclude(pk=self.pk)
+        existing_slugs = set(existing.values_list("slug", flat=True))
+
+        slug = base_slug
+        i = 1
+        while slug in existing_slugs:
+            slug = f"{base_slug}_{i}"
+            i += 1
+        return slug
+
    def save(self, *args, **kwargs):
-        is_new = self._state.adding
-        if is_new:
-            self.slug = slugify(self.name)
-            existing = set(Tag.objects.filter(slug__startswith=self.slug).values_list("slug", flat=True))
-            i = None
-            while True:
-                slug = f"{slugify(self.name)}_{i}" if i else slugify(self.name)
-                if slug not in existing:
-                    self.slug = slug
-                    break
-                i = (i or 0) + 1
+        existing_name = None
+        if self.pk:
+            existing_name = Tag.objects.filter(pk=self.pk).values_list('name', flat=True).first()
+
+        if not self.slug or existing_name != self.name:
+            self.slug = self._generate_unique_slug()
        super().save(*args, **kwargs)

-        if is_new:
-            from archivebox.misc.logging_util import log_worker_event
-            log_worker_event(
-                worker_type='DB',
-                event='Created Tag',
-                indent_level=0,
-                metadata={
-                    'id': self.id,
-                    'name': self.name,
-                    'slug': self.slug,
-                },
-            )
+        # if is_new:
+        #     from archivebox.misc.logging_util import log_worker_event
+        #     log_worker_event(
+        #         worker_type='DB',
+        #         event='Created Tag',
+        #         indent_level=0,
+        #         metadata={
+        #             'id': self.id,
+        #             'name': self.name,
+        #             'slug': self.slug,
+        #         },
+        #     )

    @property
    def api_url(self) -> str:
@@ -364,7 +373,6 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
        return Binary.objects.filter(process_set__archiveresult__snapshot_id=self.id).distinct()

    def save(self, *args, **kwargs):
-        is_new = self._state.adding
        if not self.bookmarked_at:
            self.bookmarked_at = self.created_at or timezone.now()
        if not self.timestamp:
@@ -393,24 +401,25 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea

        super().save(*args, **kwargs)
        self.ensure_legacy_archive_symlink()
-        if self.url not in self.crawl.urls:
+        existing_urls = {url for _raw_line, url in self.crawl._iter_url_lines() if url}
+        if self.crawl.url_passes_filters(self.url, snapshot=self) and self.url not in existing_urls:
            self.crawl.urls += f'\n{self.url}'
            self.crawl.save()

-        if is_new:
-            from archivebox.misc.logging_util import log_worker_event
-            log_worker_event(
-                worker_type='DB',
-                event='Created Snapshot',
-                indent_level=2,
-                url=self.url,
-                metadata={
-                    'id': str(self.id),
-                    'crawl_id': str(self.crawl_id),
-                    'depth': self.depth,
-                    'status': self.status,
-                },
-            )
+        # if is_new:
+        #     from archivebox.misc.logging_util import log_worker_event
+        #     log_worker_event(
+        #         worker_type='DB',
+        #         event='Created Snapshot',
+        #         indent_level=2,
+        #         url=self.url,
+        #         metadata={
+        #             'id': str(self.id),
+        #             'crawl_id': str(self.crawl_id),
+        #             'depth': self.depth,
+        #             'status': self.status,
+        #         },
+        #     )

    # =========================================================================
    # Filesystem Migration Methods
@@ -1528,16 +1537,6 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
        """
        Execute snapshot by creating pending ArchiveResults for all enabled hooks.

-        Called by: SnapshotMachine.enter_started()
-
-        Hook Lifecycle:
-            1. discover_hooks('Snapshot') → finds all plugin hooks
-            2. For each hook:
-               - Create ArchiveResult with status=QUEUED
-               - Store hook_name (e.g., 'on_Snapshot__50_wget.py')
-            3. ArchiveResults execute independently via ArchiveResultMachine
-            4. Hook execution happens in ArchiveResult.run(), NOT here
-
        Returns:
            list[ArchiveResult]: Newly created pending results
        """
@@ -1602,7 +1601,6 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
            'url': self.url,
            'title': self.title,
            'tags': self.tags_str(),
-            'tags_str': self.tags_str(),
            'bookmarked_at': self.bookmarked_at.isoformat() if self.bookmarked_at else None,
            'created_at': self.created_at.isoformat() if self.created_at else None,
            'timestamp': self.timestamp,
@@ -1672,7 +1670,9 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
                # ID not found, fall through to create-by-URL logic
                pass

-        url = record.get('url')
+        from archivebox.misc.util import fix_url_from_markdown
+
+        url = fix_url_from_markdown(str(record.get('url') or '').strip())
        if not url:
            return None

@@ -1807,7 +1807,6 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
                defaults={
                    'plugin': plugin,
                    'status': ArchiveResult.INITIAL_STATE,
-                    'retry_at': timezone.now(),
                },
            )
            if archiveresult.status == ArchiveResult.INITIAL_STATE:
@@ -1853,11 +1852,12 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
        failed = results.filter(status='failed').count()
        running = results.filter(status='started').count()
        skipped = results.filter(status='skipped').count()
+        noresults = results.filter(status='noresults').count()
        total = results.count()
-        pending = total - succeeded - failed - running - skipped
+        pending = total - succeeded - failed - running - skipped - noresults

-        # Calculate percentage (succeeded + failed + skipped as completed)
-        completed = succeeded + failed + skipped
+        # Calculate percentage (succeeded + failed + skipped + noresults as completed)
+        completed = succeeded + failed + skipped + noresults
        percent = int((completed / total * 100) if total > 0 else 0)

        # Sum output sizes
@@ -1875,47 +1875,38 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
            'running': running,
            'pending': pending,
            'skipped': skipped,
+            'noresults': noresults,
            'percent': percent,
            'output_size': output_size,
            'is_sealed': is_sealed,
        }

-    def retry_failed_archiveresults(self, retry_at: Optional[datetime] = None) -> int:
+    def retry_failed_archiveresults(self) -> int:
        """
        Reset failed/skipped ArchiveResults to queued for retry.

-        This enables seamless retry of the entire extraction pipeline:
-        - Resets FAILED and SKIPPED results to QUEUED
-        - Sets retry_at so workers pick them up
-        - Plugins run in order (numeric prefix)
-        - Each plugin checks its dependencies at runtime
-
-        Dependency handling (e.g., chrome → screenshot):
-        - Plugins check if required outputs exist before running
-        - If dependency output missing → plugin returns 'skipped'
-        - On retry, if dependency now succeeds → dependent can run
-
        Returns count of ArchiveResults reset.
        """
-        retry_at = retry_at or timezone.now()
-
        count = self.archiveresult_set.filter(
            status__in=[
                ArchiveResult.StatusChoices.FAILED,
                ArchiveResult.StatusChoices.SKIPPED,
+                ArchiveResult.StatusChoices.NORESULTS,
            ]
        ).update(
            status=ArchiveResult.StatusChoices.QUEUED,
-            retry_at=retry_at,
-            output=None,
+            output_str='',
+            output_json=None,
+            output_files={},
+            output_size=0,
+            output_mimetypes='',
            start_ts=None,
            end_ts=None,
        )

-        # Also reset the snapshot and current_step so it gets re-checked from the beginning
        if count > 0:
            self.status = self.StatusChoices.STARTED
-            self.retry_at = retry_at
+            self.retry_at = timezone.now()
            self.current_step = 0  # Reset to step 0 for retry
            self.save(update_fields=['status', 'retry_at', 'current_step', 'modified_at'])

@@ -2228,6 +2219,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
            best_result = outputs[0]
        context = {
            **self.to_dict(extended=True),
+            'snapshot': self,
            'title': htmlencode(self.title or (self.base_url if self.is_archived else TITLE_LOADING_MSG)),
            'url_str': htmlencode(urldecode(self.base_url)),
            'archive_url': urlencode(f'warc/{self.timestamp}' or (self.domain if self.is_archived else '')) or 'about:blank',
@@ -2275,8 +2267,8 @@ class SnapshotMachine(BaseStateMachine):
    │     • discover_hooks('Snapshot') → finds all plugin hooks   │
    │     • create_pending_archiveresults() → creates ONE         │
    │       ArchiveResult per hook (NO execution yet)             │
-    │  2. ArchiveResults process independently with their own     │
-    │     state machines (see ArchiveResultMachine)               │
+    │  2. The shared abx-dl runner executes hooks and the         │
+    │     projector updates ArchiveResult rows from events        │
    │  3. Advance through steps 0-9 as foreground hooks complete  │
    └─────────────────────────────────────────────────────────────┘
                            ↓ tick() when is_finished()
@@ -2358,7 +2350,7 @@ class SnapshotMachine(BaseStateMachine):
                cast(Any, crawl).sm.seal()


-class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithStateMachine):
+class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes):
    class StatusChoices(models.TextChoices):
        QUEUED = 'queued', 'Queued'
        STARTED = 'started', 'Started'
@@ -2366,6 +2358,17 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
        SUCCEEDED = 'succeeded', 'Succeeded'
        FAILED = 'failed', 'Failed'
        SKIPPED = 'skipped', 'Skipped'
+        NORESULTS = 'noresults', 'No Results'
+
+    INITIAL_STATE = StatusChoices.QUEUED
+    ACTIVE_STATE = StatusChoices.STARTED
+    FINAL_STATES = (
+        StatusChoices.SUCCEEDED,
+        StatusChoices.FAILED,
+        StatusChoices.SKIPPED,
+        StatusChoices.NORESULTS,
+    )
+    FINAL_OR_ACTIVE_STATES = (*FINAL_STATES, ACTIVE_STATE)

    @classmethod
    def get_plugin_choices(cls):
@@ -2404,16 +2407,10 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
    start_ts = models.DateTimeField(default=None, null=True, blank=True)
    end_ts = models.DateTimeField(default=None, null=True, blank=True)

-    status = ModelWithStateMachine.StatusField(choices=StatusChoices.choices, default=StatusChoices.QUEUED)
-    retry_at = ModelWithStateMachine.RetryAtField(default=timezone.now)
+    status = models.CharField(max_length=16, choices=StatusChoices.choices, default=StatusChoices.QUEUED, db_index=True)
    notes = models.TextField(blank=True, null=False, default='')
    # output_dir is computed via @property from snapshot.output_dir / plugin

-    state_machine_name = 'archivebox.core.models.ArchiveResultMachine'
-    retry_at_field_name = 'retry_at'
-    state_field_name = 'status'
-    active_state = StatusChoices.STARTED
-
    snapshot_id: uuid.UUID
    process_id: uuid.UUID | None

@@ -2421,7 +2418,6 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
        ModelWithOutputDir.Meta,
        ModelWithConfig.Meta,
        ModelWithNotes.Meta,
-        ModelWithStateMachine.Meta,
    ):
        app_label = 'core'
        verbose_name = 'Archive Result'
@@ -2516,40 +2512,24 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
            return None

    def save(self, *args, **kwargs):
-        is_new = self._state.adding
-
-        # Create Process record if this is a new ArchiveResult and no process exists yet
-        if is_new and not self.process_id:
-            from archivebox.machine.models import Process, Machine
-
-            process = Process.objects.create(
-                machine=Machine.current(),
-                pwd=str(Path(self.snapshot.output_dir) / self.plugin),
-                cmd=[],  # Will be set by run()
-                status='queued',
-                timeout=120,
-                env={},
-            )
-            self.process = process
-
        # Skip ModelWithOutputDir.save() to avoid creating index.json in plugin directories
        # Call the Django Model.save() directly instead
        models.Model.save(self, *args, **kwargs)

-        if is_new:
-            from archivebox.misc.logging_util import log_worker_event
-            log_worker_event(
-                worker_type='DB',
-                event='Created ArchiveResult',
-                indent_level=3,
-                plugin=self.plugin,
-                metadata={
-                    'id': str(self.id),
-                    'snapshot_id': str(self.snapshot_id),
-                    'snapshot_url': str(self.snapshot.url)[:64],
-                    'status': self.status,
-                },
-            )
+        # if is_new:
+        #     from archivebox.misc.logging_util import log_worker_event
+        #     log_worker_event(
+        #         worker_type='DB',
+        #         event='Created ArchiveResult',
+        #         indent_level=3,
+        #         plugin=self.plugin,
+        #         metadata={
+        #             'id': str(self.id),
+        #             'snapshot_id': str(self.snapshot_id),
+        #             'snapshot_url': str(self.snapshot.url)[:64],
+        #             'status': self.status,
+        #         },
+        #     )

    @cached_property
    def snapshot_dir(self):
@@ -2566,6 +2546,28 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
    def get_absolute_url(self):
        return f'/{self.snapshot.archive_path}/{self.plugin}'

+    def reset_for_retry(self, *, save: bool = True) -> None:
+        self.status = self.StatusChoices.QUEUED
+        self.output_str = ''
+        self.output_json = None
+        self.output_files = {}
+        self.output_size = 0
+        self.output_mimetypes = ''
+        self.start_ts = None
+        self.end_ts = None
+        if save:
+            self.save(update_fields=[
+                'status',
+                'output_str',
+                'output_json',
+                'output_files',
+                'output_size',
+                'output_mimetypes',
+                'start_ts',
+                'end_ts',
+                'modified_at',
+            ])
+
    @property
    def plugin_module(self) -> Any | None:
        # Hook scripts are now used instead of Python plugin modules
@@ -2723,11 +2725,6 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi

        return None

-    def create_output_dir(self):
-        output_dir = Path(self.snapshot_dir) / self.plugin
-        output_dir.mkdir(parents=True, exist_ok=True)
-        return output_dir
-
    @property
    def output_dir_name(self) -> str:
        return self.plugin
@@ -2782,134 +2779,17 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
    def save_search_index(self):
        pass

-    def cascade_health_update(self, success: bool):
-        """Update health stats for parent Snapshot, Crawl, and execution infrastructure (Binary, Machine, NetworkInterface)."""
-        # Update archival hierarchy
-        self.snapshot.increment_health_stats(success)
-        self.snapshot.crawl.increment_health_stats(success)
-
-        # Update execution infrastructure
-        if self.binary:
-            self.binary.increment_health_stats(success)
-            if self.binary.machine:
-                self.binary.machine.increment_health_stats(success)
-
-        if self.iface:
-            self.iface.increment_health_stats(success)
-
-    def run(self):
-        """
-        Execute this ArchiveResult's hook and update status.
-
-        If self.hook_name is set, runs only that specific hook.
-        If self.hook_name is empty, discovers and runs all hooks for self.plugin (backwards compat).
-
-        Updates status/output fields, queues discovered URLs, and triggers indexing.
-        """
-        from django.utils import timezone
-        from archivebox.hooks import BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR, run_hook
-        from archivebox.config.configset import get_config
-
-        # Get merged config with proper context
-        config = get_config(
-            crawl=self.snapshot.crawl,
-            snapshot=self.snapshot,
-        )
-
-        # Determine which hook(s) to run
-        hooks = []
-
-        if self.hook_name:
-            # SPECIFIC HOOK MODE: Find the specific hook by name
-            for base_dir in (BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR):
-                if not base_dir.exists():
-                    continue
-                plugin_dir = base_dir / self.plugin
-                if plugin_dir.exists():
-                    hook_path = plugin_dir / self.hook_name
-                    if hook_path.exists():
-                        hooks.append(hook_path)
-                        break
-        else:
-            # LEGACY MODE: Discover all hooks for this plugin (backwards compatibility)
-            for base_dir in (BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR):
-                if not base_dir.exists():
-                    continue
-                plugin_dir = base_dir / self.plugin
-                if plugin_dir.exists():
-                    matches = list(plugin_dir.glob('on_Snapshot__*.*'))
-                    if matches:
-                        hooks.extend(sorted(matches))
-
-        if not hooks:
-            self.status = self.StatusChoices.FAILED
-            if self.hook_name:
-                self.output_str = f'Hook not found: {self.plugin}/{self.hook_name}'
-            else:
-                self.output_str = f'No hooks found for plugin: {self.plugin}'
-            self.retry_at = None
-            self.save()
-            return
-
-        # Output directory is plugin_dir for the hook output
-        plugin_dir = Path(self.snapshot.output_dir) / self.plugin
-
-        start_ts = timezone.now()
-        process = None
-
-        for hook in hooks:
-            # Run hook using Process.launch() - returns Process model
-            process = run_hook(
-                hook,
-                output_dir=plugin_dir,
-                config=config,
-                url=self.snapshot.url,
-                snapshot_id=str(self.snapshot.id),
-                crawl_id=str(self.snapshot.crawl.id),
-                depth=self.snapshot.depth,
-            )
-
-            # Link ArchiveResult to Process
-            self.process = process
-            self.start_ts = start_ts
-            self.save(update_fields=['process_id', 'start_ts', 'modified_at'])
-
-        if not process:
-            # No hooks ran
-            self.status = self.StatusChoices.FAILED
-            self.output_str = 'No hooks executed'
-            self.save()
-            return
-
-        # Update status based on hook execution
-        if process.status == process.StatusChoices.RUNNING:
-            # BACKGROUND HOOK - still running, return immediately
-            # Status is already STARTED from enter_started(), will be finalized by Snapshot.cleanup()
-            return
-
-        # FOREGROUND HOOK - completed, update from filesystem
-        self.update_from_output()
-
-        # Clean up empty output directory if no files were created
-        if plugin_dir.exists() and not self.output_files:
-            try:
-                if not any(plugin_dir.iterdir()):
-                    plugin_dir.rmdir()
-            except (OSError, RuntimeError):
-                pass
-
    def update_from_output(self):
        """
        Update this ArchiveResult from filesystem logs and output files.

-        Used for:
-        - Foreground hooks that completed (called from ArchiveResult.run())
-        - Background hooks that completed (called from Snapshot.cleanup())
+        Used for Snapshot cleanup / orphan recovery when a hook's output exists
+        on disk but the projector did not finalize the row in the database.

        Updates:
        - status, output_str, output_json from ArchiveResult JSONL record
        - output_files, output_size, output_mimetypes by walking filesystem
-        - end_ts, retry_at, cmd, cmd_version, binary FK
+        - end_ts, cmd, cmd_version, binary FK
        - Processes side-effect records (Snapshot, Tag, etc.) via process_hook_records()
        """
        import mimetypes
@@ -2924,7 +2804,6 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
            self.status = self.StatusChoices.FAILED
            self.output_str = 'Output directory not found'
            self.end_ts = timezone.now()
-            self.retry_at = None
            self.save()
            return

@@ -2948,6 +2827,7 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
                'succeeded': self.StatusChoices.SUCCEEDED,
                'failed': self.StatusChoices.FAILED,
                'skipped': self.StatusChoices.SKIPPED,
+                'noresults': self.StatusChoices.NORESULTS,
            }
            self.status = status_map.get(hook_data.get('status', 'failed'), self.StatusChoices.FAILED)

@@ -3011,7 +2891,6 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi

        # Update timestamps
        self.end_ts = timezone.now()
-        self.retry_at = None

        self.save()

@@ -3095,340 +2974,13 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi

        Uses proper config hierarchy: defaults -> file -> env -> machine -> user -> crawl -> snapshot
        """
-        import re
-        from archivebox.config.configset import get_config
-
-        # Get merged config with proper hierarchy
-        config = get_config(
-            user=self.created_by,
-            crawl=self.snapshot.crawl,
-            snapshot=self.snapshot,
-        )
-
-        # Get allowlist/denylist (can be string or list)
-        allowlist_raw = config.get('URL_ALLOWLIST', '')
-        denylist_raw = config.get('URL_DENYLIST', '')
-
-        # Normalize to list of patterns
-        def to_pattern_list(value):
-            if isinstance(value, list):
-                return value
-            if isinstance(value, str):
-                return [p.strip() for p in value.split(',') if p.strip()]
-            return []
-
-        allowlist = to_pattern_list(allowlist_raw)
-        denylist = to_pattern_list(denylist_raw)
-
-        # Denylist takes precedence
-        if denylist:
-            for pattern in denylist:
-                try:
-                    if re.search(pattern, url):
-                        return False
-                except re.error:
-                    continue  # Skip invalid regex patterns
-
-        # If allowlist exists, URL must match at least one pattern
-        if allowlist:
-            for pattern in allowlist:
-                try:
-                    if re.search(pattern, url):
-                        return True
-                except re.error:
-                    continue  # Skip invalid regex patterns
-            return False  # No allowlist patterns matched
-
-        return True  # No filters or passed filters
+        return self.snapshot.crawl.url_passes_filters(url, snapshot=self.snapshot)

    @property
    def output_dir(self) -> Path:
        """Get the output directory for this plugin's results."""
        return Path(self.snapshot.output_dir) / self.plugin

-    def is_background_hook(self) -> bool:
-        """Check if this ArchiveResult is for a background hook."""
-        plugin_dir = Path(self.pwd) if self.pwd else None
-        if not plugin_dir:
-            return False
-        pid_file = plugin_dir / 'hook.pid'
-        return pid_file.exists()
-
-
-# =============================================================================
-# ArchiveResult State Machine
-# =============================================================================
-
-class ArchiveResultMachine(BaseStateMachine):
-    """
-    State machine for managing ArchiveResult (single plugin execution) lifecycle.
-
-    Hook Lifecycle:
-    ┌─────────────────────────────────────────────────────────────┐
-    │ QUEUED State                                                │
-    │  • Waiting for its turn to run                              │
-    └─────────────────────────────────────────────────────────────┘
-                            ↓ tick() when can_start()
-    ┌─────────────────────────────────────────────────────────────┐
-    │ STARTED State → enter_started()                             │
-    │  1. archiveresult.run()                                     │
-    │     • Find specific hook by hook_name                       │
-    │     • run_hook(script, output_dir, ...) → subprocess        │
-    │                                                              │
-    │  2a. FOREGROUND hook (returns HookResult):                  │
-    │      • update_from_output() immediately                     │
-    │        - Read stdout.log                                    │
-    │        - Parse JSONL records                                │
-    │        - Extract 'ArchiveResult' record → update status     │
-    │        - Walk output_dir → populate output_files            │
-    │        - Call process_hook_records() for side effects       │
-    │                                                              │
-    │  2b. BACKGROUND hook (returns None):                        │
-    │      • Status stays STARTED                                 │
-    │      • Continues running in background                      │
-    │      • Killed by Snapshot.cleanup() when sealed             │
-    └─────────────────────────────────────────────────────────────┘
-                            ↓ tick() checks status
-    ┌─────────────────────────────────────────────────────────────┐
-    │ SUCCEEDED / FAILED / SKIPPED / BACKOFF                      │
-    │  • Set by hook's JSONL output during update_from_output()   │
-    │  • Health stats incremented (num_uses_succeeded/failed)     │
-    │  • Parent Snapshot health stats also updated                │
-    └─────────────────────────────────────────────────────────────┘
-
-    https://github.com/ArchiveBox/ArchiveBox/wiki/ArchiveBox-Architecture-Diagrams
-    """
-
-    model_attr_name = 'archiveresult'
-
-    # States
-    queued = State(value=ArchiveResult.StatusChoices.QUEUED, initial=True)
-    started = State(value=ArchiveResult.StatusChoices.STARTED)
-    backoff = State(value=ArchiveResult.StatusChoices.BACKOFF)
-    succeeded = State(value=ArchiveResult.StatusChoices.SUCCEEDED, final=True)
-    failed = State(value=ArchiveResult.StatusChoices.FAILED, final=True)
-    skipped = State(value=ArchiveResult.StatusChoices.SKIPPED, final=True)
-
-    # Tick Event - transitions based on conditions
-    # Flow: queued → started → (succeeded|failed|skipped)
-    #       queued → skipped (if exceeded max attempts)
-    #       started → backoff → started (retry)
-    tick = (
-        queued.to(skipped, cond='is_exceeded_max_attempts')  # Check skip first
-        | queued.to.itself(unless='can_start')
-        | queued.to(started, cond='can_start')
-        | started.to(succeeded, cond='is_succeeded')
-        | started.to(failed, cond='is_failed')
-        | started.to(skipped, cond='is_skipped')
-        | started.to(backoff, cond='is_backoff')
-        | backoff.to(skipped, cond='is_exceeded_max_attempts')  # Check skip from backoff too
-        | backoff.to.itself(unless='can_start')
-        | backoff.to(started, cond='can_start')
-        # Removed redundant transitions: backoff.to(succeeded/failed/skipped)
-        # Reason: backoff should always retry→started, then started→final states
-    )
-
-    archiveresult: ArchiveResult
-
-    def can_start(self) -> bool:
-        """Pure function - check if AR can start (has valid URL)."""
-        return bool(self.archiveresult.snapshot.url)
-
-    def is_exceeded_max_attempts(self) -> bool:
-        """Check if snapshot has exceeded MAX_URL_ATTEMPTS failed results."""
-        from archivebox.config.configset import get_config
-
-        config = get_config(
-            crawl=self.archiveresult.snapshot.crawl,
-            snapshot=self.archiveresult.snapshot,
-        )
-        max_attempts = config.get('MAX_URL_ATTEMPTS', 50)
-
-        # Count failed ArchiveResults for this snapshot (any plugin type)
-        failed_count = self.archiveresult.snapshot.archiveresult_set.filter(
-            status=ArchiveResult.StatusChoices.FAILED
-        ).count()
-
-        return failed_count >= max_attempts
-
-    def is_succeeded(self) -> bool:
-        """Check if extractor plugin succeeded (status was set by run())."""
-        return self.archiveresult.status == ArchiveResult.StatusChoices.SUCCEEDED
-
-    def is_failed(self) -> bool:
-        """Check if extractor plugin failed (status was set by run())."""
-        return self.archiveresult.status == ArchiveResult.StatusChoices.FAILED
-
-    def is_skipped(self) -> bool:
-        """Check if extractor plugin was skipped (status was set by run())."""
-        return self.archiveresult.status == ArchiveResult.StatusChoices.SKIPPED
-
-    def is_backoff(self) -> bool:
-        """Check if we should backoff and retry later."""
-        # Backoff if status is still started (plugin didn't complete) and output_str is empty
-        return (
-            self.archiveresult.status == ArchiveResult.StatusChoices.STARTED
-            and not self.archiveresult.output_str
-        )
-
-    def is_finished(self) -> bool:
-        """
-        Check if extraction has completed (success, failure, or skipped).
-
-        For background hooks in STARTED state, checks if their Process has finished and reaps them.
-        """
-        # If already in final state, return True
-        if self.archiveresult.status in (
-            ArchiveResult.StatusChoices.SUCCEEDED,
-            ArchiveResult.StatusChoices.FAILED,
-            ArchiveResult.StatusChoices.SKIPPED,
-        ):
-            return True
-
-        # If in STARTED state with a Process, check if Process has finished running
-        if self.archiveresult.status == ArchiveResult.StatusChoices.STARTED:
-            if self.archiveresult.process_id:
-                process = self.archiveresult.process
-
-                # If process is NOT running anymore, reap the background hook
-                if not process.is_running:
-                    self.archiveresult.update_from_output()
-                    # Check if now in final state after reaping
-                    return self.archiveresult.status in (
-                        ArchiveResult.StatusChoices.SUCCEEDED,
-                        ArchiveResult.StatusChoices.FAILED,
-                        ArchiveResult.StatusChoices.SKIPPED,
-                    )
-
-        return False
-
-    @queued.enter
-    def enter_queued(self):
-        self.archiveresult.update_and_requeue(
-            retry_at=timezone.now(),
-            status=ArchiveResult.StatusChoices.QUEUED,
-            start_ts=None,
-        )  # bump the snapshot's retry_at so they pickup any new changes
-
-    @started.enter
-    def enter_started(self):
-
-        # Update Process with network interface
-        if self.archiveresult.process_id:
-            self.archiveresult.process.iface = NetworkInterface.current()
-            self.archiveresult.process.save()
-
-        # Lock the object and mark start time
-        self.archiveresult.update_and_requeue(
-            retry_at=timezone.now() + timedelta(seconds=120),  # 2 min timeout for plugin
-            status=ArchiveResult.StatusChoices.STARTED,
-            start_ts=timezone.now(),
-        )
-
-        # Run the plugin - this updates status, output, timestamps, etc.
-        self.archiveresult.run()
-
-        # Save the updated result
-        self.archiveresult.save()
-
-
-    @backoff.enter
-    def enter_backoff(self):
-        self.archiveresult.update_and_requeue(
-            retry_at=timezone.now() + timedelta(seconds=60),
-            status=ArchiveResult.StatusChoices.BACKOFF,
-            end_ts=None,
-        )
-
-    def _check_and_seal_parent_snapshot(self):
-        """
-        Check if this is the last ArchiveResult to finish - if so, seal the parent Snapshot.
-
-        Note: In the new architecture, the shared runner handles step advancement and sealing.
-        This method is kept for direct model-driven edge cases.
-        """
-        import sys
-
-        snapshot = self.archiveresult.snapshot
-
-        # Check if all archiveresults are finished (in final states)
-        remaining_active = snapshot.archiveresult_set.exclude(
-            status__in=[
-                ArchiveResult.StatusChoices.SUCCEEDED,
-                ArchiveResult.StatusChoices.FAILED,
-                ArchiveResult.StatusChoices.SKIPPED,
-            ]
-        ).count()
-
-        if remaining_active == 0:
-            print(f'[cyan]    🔒 All archiveresults finished for snapshot {snapshot.url}, sealing snapshot[/cyan]', file=sys.stderr)
-            # Seal the parent snapshot
-            cast(Any, snapshot).sm.seal()
-
-    @succeeded.enter
-    def enter_succeeded(self):
-        import sys
-
-        self.archiveresult.update_and_requeue(
-            retry_at=None,
-            status=ArchiveResult.StatusChoices.SUCCEEDED,
-            end_ts=timezone.now(),
-        )
-
-        # Update health stats for ArchiveResult, Snapshot, and Crawl cascade
-        self.archiveresult.cascade_health_update(success=True)
-
-        print(f'[cyan]    ✅ ArchiveResult succeeded: {self.archiveresult.plugin} for {self.archiveresult.snapshot.url}[/cyan]', file=sys.stderr)
-
-        # Check if this is the last AR to finish - seal parent snapshot if so
-        self._check_and_seal_parent_snapshot()
-
-    @failed.enter
-    def enter_failed(self):
-        import sys
-
-        print(f'[red]    ❌ ArchiveResult.enter_failed() called for {self.archiveresult.plugin}[/red]', file=sys.stderr)
-
-        self.archiveresult.update_and_requeue(
-            retry_at=None,
-            status=ArchiveResult.StatusChoices.FAILED,
-            end_ts=timezone.now(),
-        )
-
-        # Update health stats for ArchiveResult, Snapshot, and Crawl cascade
-        self.archiveresult.cascade_health_update(success=False)
-
-        print(f'[red]    ❌ ArchiveResult failed: {self.archiveresult.plugin} for {self.archiveresult.snapshot.url}[/red]', file=sys.stderr)
-
-        # Check if this is the last AR to finish - seal parent snapshot if so
-        self._check_and_seal_parent_snapshot()
-
-    @skipped.enter
-    def enter_skipped(self):
-        import sys
-
-        # Set output_str if not already set (e.g., when skipped due to max attempts)
-        if not self.archiveresult.output_str and self.is_exceeded_max_attempts():
-            from archivebox.config.configset import get_config
-            config = get_config(
-                crawl=self.archiveresult.snapshot.crawl,
-                snapshot=self.archiveresult.snapshot,
-            )
-            max_attempts = config.get('MAX_URL_ATTEMPTS', 50)
-            self.archiveresult.output_str = f'Skipped: snapshot exceeded MAX_URL_ATTEMPTS ({max_attempts} failures)'
-
-        self.archiveresult.update_and_requeue(
-            retry_at=None,
-            status=ArchiveResult.StatusChoices.SKIPPED,
-            end_ts=timezone.now(),
-        )
-
-        print(f'[dim]    ⏭️  ArchiveResult skipped: {self.archiveresult.plugin} for {self.archiveresult.snapshot.url}[/dim]', file=sys.stderr)
-
-        # Check if this is the last AR to finish - seal parent snapshot if so
-        self._check_and_seal_parent_snapshot()
-
-
 # =============================================================================
 # State Machine Registration
 # =============================================================================
@@ -3436,4 +2988,3 @@ class ArchiveResultMachine(BaseStateMachine):
 # Manually register state machines with python-statemachine registry
 # (normally auto-discovered from statemachines.py, but we define them here for clarity)
 registry.register(SnapshotMachine)
-registry.register(ArchiveResultMachine)
--- a/archivebox/core/settings.py
+++ b/archivebox/core/settings.py
@@ -232,11 +232,12 @@ SQLITE_CONNECTION_OPTIONS = {
        # https://gcollazo.com/optimal-sqlite-settings-for-django/
        # https://litestream.io/tips/#busy-timeout
        # https://docs.djangoproject.com/en/5.1/ref/databases/#setting-pragma-options
-        "timeout": 10,
+        "timeout": 30,
        "check_same_thread": False,
        "transaction_mode": "IMMEDIATE",
        "init_command": (
            "PRAGMA foreign_keys=ON;"
+            "PRAGMA busy_timeout = 30000;"
            "PRAGMA journal_mode = WAL;"
            "PRAGMA synchronous = NORMAL;"
            "PRAGMA temp_store = MEMORY;"
--- a/archivebox/core/tag_utils.py
+++ b/archivebox/core/tag_utils.py
@@ -0,0 +1,271 @@
+from __future__ import annotations
+
+import json
+from collections import defaultdict
+from typing import Any
+
+from django.contrib.auth.models import User
+from django.db.models import Count, F, Q, QuerySet
+from django.db.models.functions import Lower
+from django.http import HttpRequest
+from django.urls import reverse
+
+from archivebox.core.host_utils import build_snapshot_url, build_web_url
+from archivebox.core.models import Snapshot, SnapshotTag, Tag
+
+
+TAG_SNAPSHOT_PREVIEW_LIMIT = 10
+TAG_SORT_CHOICES = (
+    ('name_asc', 'Name A-Z'),
+    ('name_desc', 'Name Z-A'),
+    ('created_desc', 'Created newest'),
+    ('created_asc', 'Created oldest'),
+    ('snapshots_desc', 'Most snapshots'),
+    ('snapshots_asc', 'Fewest snapshots'),
+)
+TAG_HAS_SNAPSHOTS_CHOICES = (
+    ('all', 'All'),
+    ('yes', 'Has snapshots'),
+    ('no', 'No snapshots'),
+)
+
+
+def normalize_tag_name(name: str) -> str:
+    return (name or '').strip()
+
+
+def normalize_tag_sort(sort: str = 'created_desc') -> str:
+    valid_sorts = {key for key, _label in TAG_SORT_CHOICES}
+    return sort if sort in valid_sorts else 'created_desc'
+
+
+def normalize_has_snapshots_filter(value: str = 'all') -> str:
+    valid_filters = {key for key, _label in TAG_HAS_SNAPSHOTS_CHOICES}
+    return value if value in valid_filters else 'all'
+
+
+def normalize_created_by_filter(created_by: str = '') -> str:
+    return created_by if str(created_by).isdigit() else ''
+
+
+def normalize_created_year_filter(year: str = '') -> str:
+    year = (year or '').strip()
+    return year if len(year) == 4 and year.isdigit() else ''
+
+
+def get_matching_tags(
+    query: str = '',
+    sort: str = 'created_desc',
+    created_by: str = '',
+    year: str = '',
+    has_snapshots: str = 'all',
+) -> QuerySet[Tag]:
+    queryset = Tag.objects.select_related('created_by').annotate(
+        num_snapshots=Count('snapshot_set', distinct=True),
+    )
+
+    query = normalize_tag_name(query)
+    if query:
+        queryset = queryset.filter(
+            Q(name__icontains=query) | Q(slug__icontains=query),
+        )
+
+    created_by = normalize_created_by_filter(created_by)
+    if created_by:
+        queryset = queryset.filter(created_by_id=int(created_by))
+
+    year = normalize_created_year_filter(year)
+    if year:
+        queryset = queryset.filter(created_at__year=int(year))
+
+    has_snapshots = normalize_has_snapshots_filter(has_snapshots)
+    if has_snapshots == 'yes':
+        queryset = queryset.filter(num_snapshots__gt=0)
+    elif has_snapshots == 'no':
+        queryset = queryset.filter(num_snapshots=0)
+
+    sort = normalize_tag_sort(sort)
+    if sort == 'name_asc':
+        queryset = queryset.order_by(Lower('name'), 'id')
+    elif sort == 'name_desc':
+        queryset = queryset.order_by(Lower('name').desc(), '-id')
+    elif sort == 'created_asc':
+        queryset = queryset.order_by(F('created_at').asc(nulls_first=True), 'id', Lower('name'))
+    elif sort == 'snapshots_desc':
+        queryset = queryset.order_by(F('num_snapshots').desc(nulls_last=True), F('created_at').desc(nulls_last=True), '-id', Lower('name'))
+    elif sort == 'snapshots_asc':
+        queryset = queryset.order_by(F('num_snapshots').asc(nulls_first=True), Lower('name'), 'id')
+    else:
+        queryset = queryset.order_by(F('created_at').desc(nulls_last=True), '-id', Lower('name'))
+
+    return queryset
+
+
+def get_tag_creator_choices() -> list[tuple[str, str]]:
+    rows = (
+        Tag.objects
+        .filter(created_by__isnull=False)
+        .values_list('created_by_id', 'created_by__username')
+        .order_by(Lower('created_by__username'), 'created_by_id')
+        .distinct()
+    )
+    return [(str(user_id), username or f'User {user_id}') for user_id, username in rows]
+
+
+def get_tag_year_choices() -> list[str]:
+    years = Tag.objects.exclude(created_at__isnull=True).dates('created_at', 'year', order='DESC')
+    return [str(year.year) for year in years]
+
+
+def get_tag_by_ref(tag_ref: str | int) -> Tag:
+    if isinstance(tag_ref, int):
+        return Tag.objects.get(pk=tag_ref)
+
+    ref = str(tag_ref).strip()
+    if ref.isdigit():
+        return Tag.objects.get(pk=int(ref))
+
+    try:
+        return Tag.objects.get(slug__iexact=ref)
+    except Tag.DoesNotExist:
+        return Tag.objects.get(slug__icontains=ref)
+
+
+def get_or_create_tag(name: str, created_by: User | None = None) -> tuple[Tag, bool]:
+    normalized_name = normalize_tag_name(name)
+    if not normalized_name:
+        raise ValueError('Tag name is required')
+
+    existing = Tag.objects.filter(name__iexact=normalized_name).first()
+    if existing:
+        return existing, False
+
+    tag = Tag.objects.create(
+        name=normalized_name,
+        created_by=created_by,
+    )
+    return tag, True
+
+
+def rename_tag(tag: Tag, name: str) -> Tag:
+    normalized_name = normalize_tag_name(name)
+    if not normalized_name:
+        raise ValueError('Tag name is required')
+
+    existing = Tag.objects.filter(name__iexact=normalized_name).exclude(pk=tag.pk).first()
+    if existing:
+        raise ValueError(f'Tag "{existing.name}" already exists')
+
+    if tag.name != normalized_name:
+        tag.name = normalized_name
+        tag.save()
+    return tag
+
+
+def delete_tag(tag: Tag) -> tuple[int, dict[str, int]]:
+    return tag.delete()
+
+
+def export_tag_urls(tag: Tag) -> str:
+    urls = tag.snapshot_set.order_by('-downloaded_at', '-created_at', '-pk').values_list('url', flat=True)
+    return '\n'.join(urls)
+
+
+def export_tag_snapshots_jsonl(tag: Tag) -> str:
+    snapshots = tag.snapshot_set.order_by('-downloaded_at', '-created_at', '-pk').prefetch_related('tags')
+    return '\n'.join(json.dumps(snapshot.to_json()) for snapshot in snapshots)
+
+
+def _display_snapshot_title(snapshot: Snapshot) -> str:
+    title = (snapshot.title or '').strip()
+    url = (snapshot.url or '').strip()
+    if not title:
+        return url
+
+    normalized_title = title.lower()
+    if normalized_title == 'pending...' or normalized_title == url.lower():
+        return url
+    return title
+
+
+def _build_snapshot_preview(snapshot: Snapshot, request: HttpRequest | None = None) -> dict[str, Any]:
+    return {
+        'id': str(snapshot.pk),
+        'title': _display_snapshot_title(snapshot),
+        'url': snapshot.url,
+        'favicon_url': build_snapshot_url(str(snapshot.pk), 'favicon.ico', request=request),
+        'admin_url': reverse('admin:core_snapshot_change', args=[snapshot.pk]),
+        'archive_url': build_web_url(f'/{snapshot.archive_path_from_db}/index.html', request=request),
+        'downloaded_at': snapshot.downloaded_at.isoformat() if snapshot.downloaded_at else None,
+    }
+
+
+def _build_snapshot_preview_map(tags: list[Tag], request: HttpRequest | None = None, preview_limit: int = TAG_SNAPSHOT_PREVIEW_LIMIT) -> dict[int, list[dict[str, Any]]]:
+    tag_ids = [tag.pk for tag in tags]
+    if not tag_ids:
+        return {}
+
+    snapshot_tags = (
+        SnapshotTag.objects
+        .filter(tag_id__in=tag_ids)
+        .select_related('snapshot__crawl__created_by')
+        .order_by(
+            'tag_id',
+            F('snapshot__downloaded_at').desc(nulls_last=True),
+            F('snapshot__created_at').desc(nulls_last=True),
+            F('snapshot_id').desc(),
+        )
+    )
+
+    preview_map: dict[int, list[dict[str, Any]]] = defaultdict(list)
+    for snapshot_tag in snapshot_tags:
+        previews = preview_map[snapshot_tag.tag_id]
+        if len(previews) >= preview_limit:
+            continue
+        previews.append(_build_snapshot_preview(snapshot_tag.snapshot, request=request))
+    return preview_map
+
+
+def build_tag_card(tag: Tag, snapshot_previews: list[dict[str, Any]] | None = None) -> dict[str, Any]:
+    count = getattr(tag, 'num_snapshots', tag.snapshot_set.count())
+    return {
+        'id': tag.pk,
+        'name': tag.name,
+        'slug': tag.slug,
+        'num_snapshots': count,
+        'filter_url': f"{reverse('admin:core_snapshot_changelist')}?tags__id__exact={tag.pk}",
+        'edit_url': reverse('admin:core_tag_change', args=[tag.pk]),
+        'export_urls_url': reverse('api-1:tag_urls_export', args=[tag.pk]),
+        'export_jsonl_url': reverse('api-1:tag_snapshots_export', args=[tag.pk]),
+        'rename_url': reverse('api-1:rename_tag', args=[tag.pk]),
+        'delete_url': reverse('api-1:delete_tag', args=[tag.pk]),
+        'snapshots': snapshot_previews or [],
+    }
+
+
+def build_tag_cards(
+    query: str = '',
+    request: HttpRequest | None = None,
+    limit: int | None = None,
+    preview_limit: int = TAG_SNAPSHOT_PREVIEW_LIMIT,
+    sort: str = 'created_desc',
+    created_by: str = '',
+    year: str = '',
+    has_snapshots: str = 'all',
+) -> list[dict[str, Any]]:
+    queryset = get_matching_tags(
+        query=query,
+        sort=sort,
+        created_by=created_by,
+        year=year,
+        has_snapshots=has_snapshots,
+    )
+    if limit is not None:
+        queryset = queryset[:limit]
+
+    tags = list(queryset)
+    preview_map = _build_snapshot_preview_map(tags, request=request, preview_limit=preview_limit)
+    return [
+        build_tag_card(tag, snapshot_previews=preview_map.get(tag.pk, []))
+        for tag in tags
+    ]
--- a/archivebox/core/templatetags/core_tags.py
+++ b/archivebox/core/templatetags/core_tags.py
@@ -11,6 +11,7 @@ from archivebox.hooks import (
 )
 from archivebox.core.host_utils import (
    get_admin_base_url,
+    get_public_base_url,
    get_web_base_url,
    get_snapshot_base_url,
    build_snapshot_url,
@@ -166,6 +167,11 @@ def web_base_url(context) -> str:
    return get_web_base_url(request=context.get('request'))


+@register.simple_tag(takes_context=True)
+def public_base_url(context) -> str:
+    return get_public_base_url(request=context.get('request'))
+
+
@register.simple_tag(takes_context=True)
 def snapshot_base_url(context, snapshot) -> str:
    snapshot_id = getattr(snapshot, 'id', snapshot)
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -1,5 +1,6 @@
 __package__ = 'archivebox.core'

+import json
 import os
 import posixpath
 from glob import glob, escape
@@ -7,7 +8,7 @@ from django.utils import timezone
 import inspect
 from typing import Callable, cast, get_type_hints
 from pathlib import Path
-from urllib.parse import urlparse
+from urllib.parse import quote, urlparse

 from django.shortcuts import render, redirect
 from django.http import JsonResponse, HttpRequest, HttpResponse, Http404, HttpResponseForbidden
@@ -26,7 +27,7 @@ from admin_data_views.typing import TableContext, ItemContext, SectionData
 from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink

 from archivebox.config import CONSTANTS, CONSTANTS_CONFIG, DATA_DIR, VERSION
-from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG
+from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG, SEARCH_BACKEND_CONFIG
 from archivebox.config.configset import get_flat_config, get_config, get_all_configs
 from archivebox.misc.util import base_url, htmlencode, ts_to_date_str, urldecode
 from archivebox.misc.serve_static import serve_static_with_byterange_support
@@ -37,7 +38,18 @@ from archivebox.core.models import Snapshot
 from archivebox.core.host_utils import build_snapshot_url
 from archivebox.core.forms import AddLinkForm
 from archivebox.crawls.models import Crawl
-from archivebox.hooks import get_enabled_plugins, get_plugin_name
+from archivebox.hooks import (
+    BUILTIN_PLUGINS_DIR,
+    USER_PLUGINS_DIR,
+    discover_plugin_configs,
+    get_enabled_plugins,
+    get_plugin_name,
+    iter_plugin_dirs,
+)
+
+
+ABX_PLUGINS_GITHUB_BASE_URL = 'https://github.com/ArchiveBox/abx-plugins/tree/main/abx_plugins/plugins/'
+LIVE_PLUGIN_BASE_URL = '/admin/environment/plugins/'


 def _files_index_target(snapshot: Snapshot, archivefile: str | None) -> str:
@@ -699,6 +711,9 @@ def _serve_responses_path(request, responses_root: Path, rel_path: str, show_ind
 def _serve_snapshot_replay(request: HttpRequest, snapshot: Snapshot, path: str = ""):
    rel_path = path or ""
    show_indexes = bool(request.GET.get("files"))
+    if not show_indexes and (not rel_path or rel_path == "index.html"):
+        return SnapshotView.render_live_index(request, snapshot)
+
    if not rel_path or rel_path.endswith("/"):
        if show_indexes:
            rel_path = rel_path.rstrip("/")
@@ -784,7 +799,6 @@ class SnapshotHostView(View):
            raise Http404
        return _serve_snapshot_replay(request, snapshot, path)

-
 class SnapshotReplayView(View):
    """Serve snapshot directory contents on a one-domain replay path."""

@@ -915,8 +929,17 @@ class AddView(UserPassesTestMixin, FormView):
        return custom_config

    def get_context_data(self, **kwargs):
-        from archivebox.core.models import Tag
-
+        required_search_plugin = f'search_backend_{SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE}'.strip()
+        plugin_configs = discover_plugin_configs()
+        plugin_dependency_map = {
+            plugin_name: [
+                str(required_plugin).strip()
+                for required_plugin in (schema.get('required_plugins') or [])
+                if str(required_plugin).strip()
+            ]
+            for plugin_name, schema in plugin_configs.items()
+            if isinstance(schema.get('required_plugins'), list) and schema.get('required_plugins')
+        }
        return {
            **super().get_context_data(**kwargs),
            'title': "Create Crawl",
@@ -924,8 +947,9 @@ class AddView(UserPassesTestMixin, FormView):
            'absolute_add_path': self.request.build_absolute_uri(self.request.path),
            'VERSION': VERSION,
            'FOOTER_INFO': SERVER_CONFIG.FOOTER_INFO,
+            'required_search_plugin': required_search_plugin,
+            'plugin_dependency_map_json': json.dumps(plugin_dependency_map, sort_keys=True),
            'stdout': '',
-            'available_tags': list(Tag.objects.all().order_by('name').values_list('name', flat=True)),
        }

    def _create_crawl_from_form(self, form, *, created_by_id=None) -> Crawl:
@@ -937,11 +961,10 @@ class AddView(UserPassesTestMixin, FormView):
        depth = int(form.cleaned_data["depth"])
        plugins = ','.join(form.cleaned_data.get("plugins", []))
        schedule = form.cleaned_data.get("schedule", "").strip()
-        persona = form.cleaned_data.get("persona", "Default")
-        overwrite = form.cleaned_data.get("overwrite", False)
-        update = form.cleaned_data.get("update", False)
+        persona = form.cleaned_data.get("persona")
        index_only = form.cleaned_data.get("index_only", False)
        notes = form.cleaned_data.get("notes", "")
+        url_filters = form.cleaned_data.get("url_filters") or {}
        custom_config = self._get_custom_config_overrides(form)

        from archivebox.config.permissions import HOSTNAME
@@ -957,6 +980,7 @@ class AddView(UserPassesTestMixin, FormView):

        # 1. save the provided urls to sources/2024-11-05__23-59-59__web_ui_add_by_user_<user_pk>.txt
        sources_file = CONSTANTS.SOURCES_DIR / f'{timezone.now().strftime("%Y-%m-%d__%H-%M-%S")}__web_ui_add_by_user_{created_by_id}.txt'
+        sources_file.parent.mkdir(parents=True, exist_ok=True)
        sources_file.write_text(urls if isinstance(urls, str) else '\n'.join(urls))

        # 2. create a new Crawl with the URLs from the file
@@ -964,16 +988,18 @@ class AddView(UserPassesTestMixin, FormView):
        urls_content = sources_file.read_text()
        # Build complete config
        config = {
-            'ONLY_NEW': not update,
            'INDEX_ONLY': index_only,
-            'OVERWRITE': overwrite,
            'DEPTH': depth,
            'PLUGINS': plugins or '',
-            'DEFAULT_PERSONA': persona or 'Default',
+            'DEFAULT_PERSONA': (persona.name if persona else 'Default'),
        }

        # Merge custom config overrides
        config.update(custom_config)
+        if url_filters.get('allowlist'):
+            config['URL_ALLOWLIST'] = url_filters['allowlist']
+        if url_filters.get('denylist'):
+            config['URL_DENYLIST'] = url_filters['denylist']

        crawl = Crawl.objects.create(
            urls=urls_content,
@@ -999,6 +1025,8 @@ class AddView(UserPassesTestMixin, FormView):
            crawl.schedule = crawl_schedule
            crawl.save(update_fields=['schedule'])

+        crawl.create_snapshots_from_urls()
+
        # 4. start the Orchestrator & wait until it completes
        #    ... orchestrator will create the root Snapshot, which creates pending ArchiveResults, which gets run by the ArchiveResultActors ...
        # from archivebox.crawls.actors import CrawlActor
@@ -1011,7 +1039,7 @@ class AddView(UserPassesTestMixin, FormView):

        urls = form.cleaned_data["url"]
        schedule = form.cleaned_data.get("schedule", "").strip()
-        rough_url_count = urls.count('://')
+        rough_url_count = len([url for url in urls.splitlines() if url.strip()])

        # Build success message with schedule link if created
        schedule_msg = ""
@@ -1080,10 +1108,6 @@ class WebAddView(AddView):
            'persona': defaults_form.fields['persona'].initial or 'Default',
            'config': {},
        }
-        if defaults_form.fields['update'].initial:
-            form_data['update'] = 'on'
-        if defaults_form.fields['overwrite'].initial:
-            form_data['overwrite'] = 'on'
        if defaults_form.fields['index_only'].initial:
            form_data['index_only'] = 'on'

@@ -1118,6 +1142,41 @@ def live_progress_view(request):
        from archivebox.core.models import Snapshot, ArchiveResult
        from archivebox.machine.models import Process, Machine

+        def hook_details(hook_name: str, plugin: str = "setup") -> tuple[str, str, str, str]:
+            normalized_hook_name = Path(hook_name).name if hook_name else ""
+            if not normalized_hook_name:
+                return (plugin, plugin, "unknown", "")
+
+            phase = "unknown"
+            if normalized_hook_name.startswith("on_Crawl__"):
+                phase = "crawl"
+            elif normalized_hook_name.startswith("on_Snapshot__"):
+                phase = "snapshot"
+            elif normalized_hook_name.startswith("on_Binary__"):
+                phase = "binary"
+
+            label = normalized_hook_name
+            if "__" in normalized_hook_name:
+                label = normalized_hook_name.split("__", 1)[1]
+            label = label.rsplit(".", 1)[0]
+            if len(label) > 3 and label[:2].isdigit() and label[2] == "_":
+                label = label[3:]
+            label = label.replace("_", " ").strip() or plugin
+
+            return (plugin, label, phase, normalized_hook_name)
+
+        def process_label(cmd: list[str] | None) -> tuple[str, str, str, str]:
+            hook_path = ""
+            if isinstance(cmd, list) and cmd:
+                first = cmd[0]
+                if isinstance(first, str):
+                    hook_path = first
+
+            if not hook_path:
+                return ("", "setup", "unknown", "")
+
+            return hook_details(Path(hook_path).name, plugin=Path(hook_path).parent.name or "setup")
+
        machine = Machine.current()
        orchestrator_proc = Process.objects.filter(
            machine=machine,
@@ -1188,8 +1247,19 @@ def live_progress_view(request):
                Process.TypeChoices.BINARY,
            ],
        )
+        recent_processes = Process.objects.filter(
+            machine=machine,
+            process_type__in=[
+                Process.TypeChoices.HOOK,
+                Process.TypeChoices.BINARY,
+            ],
+            modified_at__gte=timezone.now() - timedelta(minutes=10),
+        ).order_by("-modified_at")
        crawl_process_pids: dict[str, int] = {}
        snapshot_process_pids: dict[str, int] = {}
+        process_records_by_crawl: dict[str, list[dict[str, object]]] = {}
+        process_records_by_snapshot: dict[str, list[dict[str, object]]] = {}
+        seen_process_records: set[str] = set()
        for proc in running_processes:
            env = proc.env or {}
            if not isinstance(env, dict):
@@ -1197,11 +1267,48 @@ def live_progress_view(request):

            crawl_id = env.get('CRAWL_ID')
            snapshot_id = env.get('SNAPSHOT_ID')
+            _plugin, _label, phase, _hook_name = process_label(proc.cmd)
            if crawl_id and proc.pid:
                crawl_process_pids.setdefault(str(crawl_id), proc.pid)
-            if snapshot_id and proc.pid:
+            if phase == "snapshot" and snapshot_id and proc.pid:
                snapshot_process_pids.setdefault(str(snapshot_id), proc.pid)

+        for proc in recent_processes:
+            env = proc.env or {}
+            if not isinstance(env, dict):
+                env = {}
+
+            crawl_id = env.get("CRAWL_ID")
+            snapshot_id = env.get("SNAPSHOT_ID")
+            if not crawl_id and not snapshot_id:
+                continue
+
+            plugin, label, phase, hook_name = process_label(proc.cmd)
+
+            record_scope = str(snapshot_id) if phase == "snapshot" and snapshot_id else str(crawl_id)
+            proc_key = f"{record_scope}:{plugin}:{label}:{proc.status}:{proc.exit_code}"
+            if proc_key in seen_process_records:
+                continue
+            seen_process_records.add(proc_key)
+
+            status = "started" if proc.status == Process.StatusChoices.RUNNING else ("failed" if proc.exit_code not in (None, 0) else "succeeded")
+            payload: dict[str, object] = {
+                "id": str(proc.id),
+                "plugin": plugin,
+                "label": label,
+                "hook_name": hook_name,
+                "status": status,
+                "phase": phase,
+                "source": "process",
+                "process_id": str(proc.id),
+            }
+            if status == "started" and proc.pid:
+                payload["pid"] = proc.pid
+            if phase == "snapshot" and snapshot_id:
+                process_records_by_snapshot.setdefault(str(snapshot_id), []).append(payload)
+            elif crawl_id:
+                process_records_by_crawl.setdefault(str(crawl_id), []).append(payload)
+
        active_crawls_qs = Crawl.objects.filter(
            status__in=[Crawl.StatusChoices.QUEUED, Crawl.StatusChoices.STARTED]
        ).prefetch_related(
@@ -1234,6 +1341,11 @@ def live_progress_view(request):

            # Calculate crawl progress
            crawl_progress = int((completed_snapshots / total_snapshots) * 100) if total_snapshots > 0 else 0
+            crawl_setup_plugins = list(process_records_by_crawl.get(str(crawl.id), []))
+            crawl_setup_total = len(crawl_setup_plugins)
+            crawl_setup_completed = sum(1 for item in crawl_setup_plugins if item.get("status") == "succeeded")
+            crawl_setup_failed = sum(1 for item in crawl_setup_plugins if item.get("status") == "failed")
+            crawl_setup_pending = sum(1 for item in crawl_setup_plugins if item.get("status") == "queued")

            # Get active snapshots for this crawl (already prefetched)
            active_snapshots_for_crawl = []
@@ -1241,28 +1353,21 @@ def live_progress_view(request):
                # Get archive results for this snapshot (already prefetched)
                snapshot_results = snapshot.archiveresult_set.all()

-                # Count in memory instead of DB queries
-                total_plugins = len(snapshot_results)
-                completed_plugins = sum(1 for ar in snapshot_results if ar.status == ArchiveResult.StatusChoices.SUCCEEDED)
-                failed_plugins = sum(1 for ar in snapshot_results if ar.status == ArchiveResult.StatusChoices.FAILED)
-                pending_plugins = sum(1 for ar in snapshot_results if ar.status == ArchiveResult.StatusChoices.QUEUED)
-
-                # Calculate snapshot progress using per-plugin progress
                now = timezone.now()
                plugin_progress_values: list[int] = []
+                all_plugins: list[dict[str, object]] = []
+                seen_plugin_keys: set[str] = set()

-                # Get all extractor plugins for this snapshot (already prefetched, sort in Python)
-                # Order: started first, then queued, then completed
                def plugin_sort_key(ar):
                    status_order = {
                        ArchiveResult.StatusChoices.STARTED: 0,
                        ArchiveResult.StatusChoices.QUEUED: 1,
                        ArchiveResult.StatusChoices.SUCCEEDED: 2,
-                        ArchiveResult.StatusChoices.FAILED: 3,
+                        ArchiveResult.StatusChoices.NORESULTS: 3,
+                        ArchiveResult.StatusChoices.FAILED: 4,
                    }
-                    return (status_order.get(ar.status, 4), ar.plugin)
+                    return (status_order.get(ar.status, 5), ar.plugin, ar.hook_name or "")

-                all_plugins = []
                for ar in sorted(snapshot_results, key=plugin_sort_key):
                    status = ar.status
                    progress_value = 0
@@ -1270,6 +1375,7 @@ def live_progress_view(request):
                        ArchiveResult.StatusChoices.SUCCEEDED,
                        ArchiveResult.StatusChoices.FAILED,
                        ArchiveResult.StatusChoices.SKIPPED,
+                        ArchiveResult.StatusChoices.NORESULTS,
                    ):
                        progress_value = 100
                    elif status == ArchiveResult.StatusChoices.STARTED:
@@ -1284,20 +1390,49 @@ def live_progress_view(request):
                        progress_value = 0

                    plugin_progress_values.append(progress_value)
+                    plugin, label, phase, hook_name = hook_details(ar.hook_name or ar.plugin, plugin=ar.plugin)

                    plugin_payload = {
                        'id': str(ar.id),
                        'plugin': ar.plugin,
+                        'label': label,
+                        'hook_name': hook_name,
+                        'phase': phase,
                        'status': status,
+                        'process_id': str(ar.process_id) if ar.process_id else None,
                    }
                    if status == ArchiveResult.StatusChoices.STARTED and ar.process_id and ar.process:
                        plugin_payload['pid'] = ar.process.pid
                    if status == ArchiveResult.StatusChoices.STARTED:
                        plugin_payload['progress'] = progress_value
                        plugin_payload['timeout'] = ar.timeout or 120
+                    plugin_payload['source'] = 'archiveresult'
                    all_plugins.append(plugin_payload)
+                    seen_plugin_keys.add(
+                        str(ar.process_id) if ar.process_id else f"{ar.plugin}:{hook_name}"
+                    )

-                snapshot_progress = int(sum(plugin_progress_values) / total_plugins) if total_plugins > 0 else 0
+                for proc_payload in process_records_by_snapshot.get(str(snapshot.id), []):
+                    proc_key = str(proc_payload.get("process_id") or f"{proc_payload.get('plugin')}:{proc_payload.get('hook_name')}")
+                    if proc_key in seen_plugin_keys:
+                        continue
+                    seen_plugin_keys.add(proc_key)
+                    all_plugins.append(proc_payload)
+
+                    proc_status = proc_payload.get("status")
+                    if proc_status in ("succeeded", "failed", "skipped"):
+                        plugin_progress_values.append(100)
+                    elif proc_status == "started":
+                        plugin_progress_values.append(1)
+                    else:
+                        plugin_progress_values.append(0)
+
+                total_plugins = len(all_plugins)
+                completed_plugins = sum(1 for item in all_plugins if item.get("status") == "succeeded")
+                failed_plugins = sum(1 for item in all_plugins if item.get("status") == "failed")
+                pending_plugins = sum(1 for item in all_plugins if item.get("status") == "queued")
+
+                snapshot_progress = int(sum(plugin_progress_values) / len(plugin_progress_values)) if plugin_progress_values else 0

                active_snapshots_for_crawl.append({
                    'id': str(snapshot.id),
@@ -1334,6 +1469,11 @@ def live_progress_view(request):
                'started_snapshots': started_snapshots,
                'failed_snapshots': 0,
                'pending_snapshots': pending_snapshots,
+                'setup_plugins': crawl_setup_plugins,
+                'setup_total_plugins': crawl_setup_total,
+                'setup_completed_plugins': crawl_setup_completed,
+                'setup_failed_plugins': crawl_setup_failed,
+                'setup_pending_plugins': crawl_setup_pending,
                'active_snapshots': active_snapshots_for_crawl,
                'can_start': can_start,
                'urls_preview': urls_preview,
@@ -1461,7 +1601,11 @@ def find_config_source(key: str, merged_config: dict) -> str:
    """Determine where a config value comes from."""
    from archivebox.machine.models import Machine

-    # Check if it's from archivebox.machine.config
+    # Environment variables override all persistent config sources.
+    if key in os.environ:
+        return 'Environment'
+
+    # Machine.config overrides ArchiveBox.conf.
    try:
        machine = Machine.current()
        if machine.config and key in machine.config:
@@ -1469,10 +1613,6 @@ def find_config_source(key: str, merged_config: dict) -> str:
    except Exception:
        pass

-    # Check if it's from environment variable
-    if key in os.environ:
-        return 'Environment'
-
    # Check if it's from archivebox.config.file
    from archivebox.config.configset import BaseConfigSet
    file_config = BaseConfigSet.load_from_file(CONSTANTS.CONFIG_FILE)
@@ -1483,6 +1623,43 @@ def find_config_source(key: str, merged_config: dict) -> str:
    return 'Default'


+def find_plugin_for_config_key(key: str) -> str | None:
+    for plugin_name, schema in discover_plugin_configs().items():
+        if key in (schema.get('properties') or {}):
+            return plugin_name
+    return None
+
+
+def get_config_definition_link(key: str) -> tuple[str, str]:
+    plugin_name = find_plugin_for_config_key(key)
+    if not plugin_name:
+        return (
+            f'https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig+{quote(key)}&type=code',
+            'archivebox/config',
+        )
+
+    plugin_dir = next((path.resolve() for path in iter_plugin_dirs() if path.name == plugin_name), None)
+    if plugin_dir:
+        builtin_root = BUILTIN_PLUGINS_DIR.resolve()
+        if plugin_dir.is_relative_to(builtin_root):
+            return (
+                f'{ABX_PLUGINS_GITHUB_BASE_URL}{quote(plugin_name)}/config.json',
+                f'abx_plugins/plugins/{plugin_name}/config.json',
+            )
+
+        user_root = USER_PLUGINS_DIR.resolve()
+        if plugin_dir.is_relative_to(user_root):
+            return (
+                f'{LIVE_PLUGIN_BASE_URL}user.{quote(plugin_name)}/',
+                f'data/custom_plugins/{plugin_name}/config.json',
+            )
+
+    return (
+        f'{LIVE_PLUGIN_BASE_URL}builtin.{quote(plugin_name)}/',
+        f'abx_plugins/plugins/{plugin_name}/config.json',
+    )
+
+
@render_with_table_view
 def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
    CONFIGS = get_all_configs()
@@ -1566,17 +1743,6 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
    # Determine all sources for this config value
    sources_info = []

-    # Default value
-    default_val = find_config_default(key)
-    if default_val:
-        sources_info.append(('Default', default_val, 'gray'))
-
-    # Config file value
-    if CONSTANTS.CONFIG_FILE.exists():
-        file_config = BaseConfigSet.load_from_file(CONSTANTS.CONFIG_FILE)
-        if key in file_config:
-            sources_info.append(('Config File', file_config[key], 'green'))
-
    # Environment variable
    if key in os.environ:
        sources_info.append(('Environment', os.environ[key] if key_is_safe(key) else '********', 'blue'))
@@ -1592,6 +1758,17 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
    except Exception:
        pass

+    # Config file value
+    if CONSTANTS.CONFIG_FILE.exists():
+        file_config = BaseConfigSet.load_from_file(CONSTANTS.CONFIG_FILE)
+        if key in file_config:
+            sources_info.append(('Config File', file_config[key], 'green'))
+
+    # Default value
+    default_val = find_config_default(key)
+    if default_val:
+        sources_info.append(('Default', default_val, 'gray'))
+
    # Final computed value
    final_value = merged_config.get(key, FLAT_CONFIG.get(key, CONFIGS.get(key, None)))
    if not key_is_safe(key):
@@ -1614,6 +1791,8 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
        section_header = mark_safe(f'[DYNAMIC CONFIG]   &nbsp; <b><code style="color: lightgray">{key}</code></b> &nbsp; <small>(read-only, calculated at runtime)</small>')


+    definition_url, definition_label = get_config_definition_link(key)
+
    section_data = cast(SectionData, {
        "name": section_header,
        "description": None,
@@ -1621,7 +1800,7 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
            'Key': key,
            'Type': find_config_type(key),
            'Value': final_value,
-            'Source': find_config_source(key, merged_config),
+            'Currently read from': find_config_source(key, merged_config),
        },
        "help_texts": {
            'Key': mark_safe(f'''
@@ -1631,14 +1810,14 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
                </span>
            '''),
            'Type': mark_safe(f'''
-                <a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig+{key}&type=code">
-                    See full definition in <code>archivebox/config</code>...
+                <a href="{definition_url}" target="_blank" rel="noopener noreferrer">
+                    See full definition in <code>{definition_label}</code>...
                </a>
            '''),
            'Value': mark_safe(f'''
                {'<b style="color: red">Value is redacted for your security. (Passwords, secrets, API tokens, etc. cannot be viewed in the Web UI)</b><br/><br/>' if not key_is_safe(key) else ''}
                <br/><hr/><br/>
-                <b>Configuration Sources (in priority order):</b><br/><br/>
+                <b>Configuration Sources (highest priority first):</b><br/><br/>
                {sources_html}
                <br/><br/>
                <p style="display: {"block" if key in FLAT_CONFIG and key not in CONSTANTS_CONFIG else "none"}">
@@ -1651,15 +1830,15 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
                    }"</code>
                </p>
            '''),
-            'Source': mark_safe(f'''
+            'Currently read from': mark_safe(f'''
                The value shown in the "Value" field comes from the <b>{find_config_source(key, merged_config)}</b> source.
                <br/><br/>
                Priority order (highest to lowest):
                <ol>
+                    <li><b style="color: blue">Environment</b> - Environment variables</li>
                    <li><b style="color: purple">Machine</b> - Machine-specific overrides (e.g., resolved binary paths)
                        {f'<br/><a href="{machine_admin_url}">→ Edit <code>{key}</code> in Machine.config for this server</a>' if machine_admin_url else ''}
                    </li>
-                    <li><b style="color: blue">Environment</b> - Environment variables</li>
                    <li><b style="color: green">Config File</b> - data/ArchiveBox.conf</li>
                    <li><b style="color: gray">Default</b> - Default value from code</li>
                </ol>
--- a/archivebox/core/widgets.py
+++ b/archivebox/core/widgets.py
@@ -131,7 +131,46 @@ class TagEditorWidget(forms.Widget):
            }};

            window.updateHiddenInput_{widget_id} = function() {{
-                document.getElementById('{widget_id}').value = currentTags_{widget_id}.join(',');
+                var hiddenInput = document.getElementById('{widget_id}');
+                if (!hiddenInput) {{
+                    return;
+                }}
+                hiddenInput.value = currentTags_{widget_id}.join(',');
+                hiddenInput.dispatchEvent(new Event('input', {{ bubbles: true }}));
+                hiddenInput.dispatchEvent(new Event('change', {{ bubbles: true }}));
+            }};
+
+            function normalizeTags_{widget_id}(value) {{
+                var rawTags = Array.isArray(value) ? value : String(value || '').split(',');
+                var seen = {{}};
+                return rawTags
+                    .map(function(tag) {{ return String(tag || '').trim(); }})
+                    .filter(function(tag) {{
+                        if (!tag) return false;
+                        var normalized = tag.toLowerCase();
+                        if (seen[normalized]) return false;
+                        seen[normalized] = true;
+                        return true;
+                    }})
+                    .sort(function(a, b) {{
+                        return a.toLowerCase().localeCompare(b.toLowerCase());
+                    }});
+            }}
+
+            window.setTags_{widget_id} = function(value, options) {{
+                currentTags_{widget_id} = normalizeTags_{widget_id}(value);
+                rebuildPills_{widget_id}();
+                if (!(options && options.skipHiddenUpdate)) {{
+                    updateHiddenInput_{widget_id}();
+                }}
+            }};
+
+            window.syncTagEditorFromHidden_{widget_id} = function() {{
+                var hiddenInput = document.getElementById('{widget_id}');
+                if (!hiddenInput) {{
+                    return;
+                }}
+                setTags_{widget_id}(hiddenInput.value, {{ skipHiddenUpdate: true }});
            }};

            function computeTagStyle_{widget_id}(tagName) {{
@@ -190,9 +229,7 @@ class TagEditorWidget(forms.Widget):

                // Add to current tags
                currentTags_{widget_id}.push(tagName);
-                currentTags_{widget_id}.sort(function(a, b) {{
-                    return a.toLowerCase().localeCompare(b.toLowerCase());
-                }});
+                currentTags_{widget_id} = normalizeTags_{widget_id}(currentTags_{widget_id});

                // Rebuild pills
                rebuildPills_{widget_id}();
@@ -252,6 +289,14 @@ class TagEditorWidget(forms.Widget):
                }}
            }});

+            document.getElementById('{widget_id}').addEventListener('change', function() {{
+                syncTagEditorFromHidden_{widget_id}();
+            }});
+
+            document.getElementById('{widget_id}').addEventListener('archivebox:sync-tags', function() {{
+                syncTagEditorFromHidden_{widget_id}();
+            }});
+
            window.handleTagKeydown_{widget_id} = function(event) {{
                var input = event.target;
                var value = input.value.trim();
@@ -320,6 +365,8 @@ class TagEditorWidget(forms.Widget):
                var input = document.querySelector('input[name="csrfmiddlewaretoken"]');
                return input ? input.value : '';
            }}
+
+            syncTagEditorFromHidden_{widget_id}();
        }})();
        </script>
        '''
@@ -327,15 +374,232 @@ class TagEditorWidget(forms.Widget):
        return mark_safe(html)


+class URLFiltersWidget(forms.Widget):
+    """Render URL allowlist / denylist controls with same-domain autofill."""
+
+    template_name = ""
+
+    def __init__(self, attrs=None, *, source_selector='textarea[name="url"]'):
+        self.source_selector = source_selector
+        super().__init__(attrs)
+
+    def render(self, name, value, attrs=None, renderer=None):
+        value = value if isinstance(value, dict) else {}
+        widget_id_raw = attrs.get('id', name) if attrs else name
+        widget_id = re.sub(r'[^A-Za-z0-9_]', '_', str(widget_id_raw)) or name
+        allowlist = escape(value.get('allowlist', '') or '')
+        denylist = escape(value.get('denylist', '') or '')
+
+        return mark_safe(f'''
+        <div id="{widget_id}_container" class="url-filters-widget">
+            <input type="hidden" name="{name}" value="">
+            <div class="url-filters-grid">
+                <div class="url-filters-column">
+                    <div class="url-filter-label-row">
+                        <label for="{widget_id}_allowlist" class="url-filter-label"><span class="url-filter-label-main">🟢 URL_ALLOWLIST</span></label>
+                        <span class="url-filter-label-note">Regex patterns or domains to exclude, one pattern per line.</span>
+                    </div>
+                    <textarea id="{widget_id}_allowlist"
+                              name="{name}_allowlist"
+                              rows="2"
+                              placeholder="^https?://([^/]+\\.)?(example\\.com|example\\.org)([:/]|$)">{allowlist}</textarea>
+                </div>
+                <div class="url-filters-column">
+                    <div class="url-filter-label-row">
+                        <label for="{widget_id}_denylist" class="url-filter-label"><span class="url-filter-label-main">⛔ URL_DENYLIST</span></label>
+                        <span class="url-filter-label-note">Regex patterns or domains to exclude, one pattern per line.</span>
+                    </div>
+                    <textarea id="{widget_id}_denylist"
+                              name="{name}_denylist"
+                              rows="2"
+                              placeholder="^https?://([^/]+\\.)?(cdn\\.example\\.com|analytics\\.example\\.org)([:/]|$)">{denylist}</textarea>
+                </div>
+            </div>
+            <label class="url-filters-toggle" for="{widget_id}_same_domain_only">
+                <input type="checkbox" id="{widget_id}_same_domain_only" name="{name}_same_domain_only" value="1">
+                <span>Same domain only</span>
+            </label>
+            <div class="help-text">These values can be one regex pattern or domain per line. URL_DENYLIST takes precedence over URL_ALLOWLIST.</div>
+            <script>
+            (function() {{
+                var allowlistField = document.getElementById('{widget_id}_allowlist');
+                var denylistField = document.getElementById('{widget_id}_denylist');
+                var sameDomainOnly = document.getElementById('{widget_id}_same_domain_only');
+                var sourceField = document.querySelector({json.dumps(self.source_selector)});
+                var lastAutoGeneratedAllowlist = '';
+                if (!allowlistField || !sameDomainOnly || !sourceField) {{
+                    return;
+                }}
+
+                function extractUrl(line) {{
+                    var trimmed = String(line || '').trim();
+                    if (!trimmed || trimmed.charAt(0) === '#') {{
+                        return '';
+                    }}
+                    if (trimmed.charAt(0) === '{{') {{
+                        try {{
+                            var record = JSON.parse(trimmed);
+                            return String(record.url || '').trim();
+                        }} catch (error) {{
+                            return '';
+                        }}
+                    }}
+                    return trimmed;
+                }}
+
+                function escapeRegex(text) {{
+                    return String(text || '').replace(/[.*+?^${{}}()|[\\]\\\\]/g, '\\\\$&');
+                }}
+
+                function buildHostRegex(domains) {{
+                    if (!domains.length) {{
+                        return '';
+                    }}
+                    return '^https?://(' + domains.map(escapeRegex).join('|') + ')([:/]|$)';
+                }}
+
+                function getConfigEditorRows() {{
+                    return document.getElementById('id_config_rows');
+                }}
+
+                function getConfigUpdater() {{
+                    return window.updateHiddenField_id_config || null;
+                }}
+
+                function findConfigRow(key) {{
+                    var rows = getConfigEditorRows();
+                    if (!rows) {{
+                        return null;
+                    }}
+                    var matches = Array.prototype.filter.call(rows.querySelectorAll('.key-value-row'), function(row) {{
+                        var keyInput = row.querySelector('.kv-key');
+                        return keyInput && keyInput.value.trim() === key;
+                    }});
+                    return matches.length ? matches[0] : null;
+                }}
+
+                function addConfigRow() {{
+                    if (typeof window.addKeyValueRow_id_config === 'function') {{
+                        window.addKeyValueRow_id_config();
+                        var rows = getConfigEditorRows();
+                        return rows ? rows.lastElementChild : null;
+                    }}
+                    return null;
+                }}
+
+                function setConfigRow(key, value) {{
+                    var rows = getConfigEditorRows();
+                    var updater = getConfigUpdater();
+                    if (!rows || !updater) {{
+                        return;
+                    }}
+
+                    var row = findConfigRow(key);
+                    if (!value) {{
+                        if (row) {{
+                            row.remove();
+                            updater();
+                        }}
+                        return;
+                    }}
+
+                    if (!row) {{
+                        row = addConfigRow();
+                    }}
+                    if (!row) {{
+                        return;
+                    }}
+
+                    var keyInput = row.querySelector('.kv-key');
+                    var valueInput = row.querySelector('.kv-value');
+                    if (!keyInput || !valueInput) {{
+                        return;
+                    }}
+
+                    keyInput.value = key;
+                    valueInput.value = value;
+                    keyInput.dispatchEvent(new Event('input', {{ bubbles: true }}));
+                    valueInput.dispatchEvent(new Event('input', {{ bubbles: true }}));
+                    updater();
+                }}
+
+                function syncConfigEditor() {{
+                    setConfigRow('URL_ALLOWLIST', allowlistField.value.trim());
+                    setConfigRow('URL_DENYLIST', denylistField ? denylistField.value.trim() : '');
+                }}
+
+                function syncAllowlistFromUrls() {{
+                    if (!sameDomainOnly.checked) {{
+                        if (allowlistField.value.trim() === lastAutoGeneratedAllowlist) {{
+                            allowlistField.value = '';
+                            syncConfigEditor();
+                        }}
+                        lastAutoGeneratedAllowlist = '';
+                        return;
+                    }}
+
+                    var seen = Object.create(null);
+                    var domains = [];
+                    sourceField.value.split(/\\n+/).forEach(function(line) {{
+                        var url = extractUrl(line);
+                        if (!url) {{
+                            return;
+                        }}
+                        try {{
+                            var parsed = new URL(url);
+                            var domain = String(parsed.hostname || '').toLowerCase();
+                            if (!domain || seen[domain]) {{
+                                return;
+                            }}
+                            seen[domain] = true;
+                            domains.push(domain);
+                        }} catch (error) {{
+                            return;
+                        }}
+                    }});
+                    lastAutoGeneratedAllowlist = buildHostRegex(domains);
+                    allowlistField.value = lastAutoGeneratedAllowlist;
+                    syncConfigEditor();
+                }}
+
+                sameDomainOnly.addEventListener('change', syncAllowlistFromUrls);
+                sourceField.addEventListener('input', syncAllowlistFromUrls);
+                sourceField.addEventListener('change', syncAllowlistFromUrls);
+                allowlistField.addEventListener('input', syncConfigEditor);
+                allowlistField.addEventListener('change', syncConfigEditor);
+                if (denylistField) {{
+                    denylistField.addEventListener('input', syncConfigEditor);
+                    denylistField.addEventListener('change', syncConfigEditor);
+                }}
+
+                if (document.readyState === 'loading') {{
+                    document.addEventListener('DOMContentLoaded', syncConfigEditor, {{ once: true }});
+                }} else {{
+                    syncConfigEditor();
+                }}
+            }})();
+            </script>
+        </div>
+        ''')
+
+    def value_from_datadict(self, data, files, name):
+        return {
+            'allowlist': data.get(f'{name}_allowlist', ''),
+            'denylist': data.get(f'{name}_denylist', ''),
+            'same_domain_only': data.get(f'{name}_same_domain_only') in ('1', 'on', 'true'),
+        }
+
+
 class InlineTagEditorWidget(TagEditorWidget):
    """
    Inline version of TagEditorWidget for use in list views.
    Includes AJAX save functionality for immediate persistence.
    """

-    def __init__(self, attrs=None, snapshot_id=None):
+    def __init__(self, attrs=None, snapshot_id=None, editable=True):
        super().__init__(attrs, snapshot_id)
        self.snapshot_id = snapshot_id
+        self.editable = editable

    def render(self, name, value, attrs=None, renderer=None, snapshot_id=None):
        """Render inline tag editor with AJAX save."""
@@ -361,20 +625,24 @@ class InlineTagEditorWidget(TagEditorWidget):
        # Build pills HTML with filter links
        pills_html = ''
        for td in tag_data:
+            remove_button = ''
+            if self.editable:
+                remove_button = (
+                    f'<button type="button" class="tag-remove-btn" '
+                    f'data-tag-id="{td["id"]}" data-tag-name="{self._escape(td["name"])}">&times;</button>'
+                )
            pills_html += f'''
                <span class="tag-pill" data-tag="{self._escape(td['name'])}" data-tag-id="{td['id']}" style="{self._tag_style(td['name'])}">
                    <a href="/admin/core/snapshot/?tags__id__exact={td['id']}" class="tag-link">{self._escape(td['name'])}</a>
-                    <button type="button" class="tag-remove-btn" data-tag-id="{td['id']}" data-tag-name="{self._escape(td['name'])}">&times;</button>
+                    {remove_button}
                </span>
            '''

        tags_json = escape(json.dumps(tag_data))
-
-        html = f'''
-        <span id="{widget_id}_container" class="tag-editor-inline" data-snapshot-id="{snapshot_id}" data-tags="{tags_json}">
-            <span id="{widget_id}_pills" class="tag-pills-inline">
-                {pills_html}
-            </span>
+        input_html = ''
+        readonly_class = ' readonly' if not self.editable else ''
+        if self.editable:
+            input_html = f'''
            <input type="text"
                   id="{widget_id}_input"
                   class="tag-inline-input-sm"
@@ -384,6 +652,14 @@ class InlineTagEditorWidget(TagEditorWidget):
                   data-inline-tag-input="1"
            >
            <datalist id="{widget_id}_datalist"></datalist>
+            '''
+
+        html = f'''
+        <span id="{widget_id}_container" class="tag-editor-inline{readonly_class}" data-snapshot-id="{snapshot_id}" data-tags="{tags_json}" data-readonly="{int(not self.editable)}">
+            <span id="{widget_id}_pills" class="tag-pills-inline">
+                {pills_html}
+            </span>
+            {input_html}
        </span>
        '''