remove huey

2026-04-03 06:17:53 +10:00 · 2025-12-24 23:40:18 -08:00
parent 6c769d831c
commit d95f0dc186
105 changed files with 3635 additions and 1402 deletions
--- a/archivebox/api/v1_api.py
+++ b/archivebox/api/v1_api.py
@@ -42,6 +42,7 @@ def register_urls(api: NinjaAPI) -> NinjaAPI:
    api.add_router('/crawls/',   'api.v1_crawls.router')
    api.add_router('/cli/',      'api.v1_cli.router')
    api.add_router('/workers/',  'api.v1_workers.router')
+    api.add_router('/machine/',  'api.v1_machine.router')
    return api


--- a/archivebox/api/v1_cli.py
+++ b/archivebox/api/v1_cli.py
@@ -107,7 +107,7 @@ class RemoveCommandSchema(Schema):
@router.post("/add", response=CLICommandResponseSchema, summary='archivebox add [args] [urls]')
 def cli_add(request, args: AddCommandSchema):
    from archivebox.cli.archivebox_add import add
-    
+
    result = add(
        urls=args.urls,
        tag=args.tag,
@@ -115,8 +115,9 @@ def cli_add(request, args: AddCommandSchema):
        update=args.update,
        index_only=args.index_only,
        overwrite=args.overwrite,
-        extract=args.extract,
+        plugins=args.extract,  # extract in API maps to plugins param
        parser=args.parser,
+        bg=True,  # Always run in background for API calls
    )

    return {
--- a/archivebox/api/v1_machine.py
+++ b/archivebox/api/v1_machine.py
@@ -0,0 +1,206 @@
+__package__ = 'archivebox.api'
+
+from uuid import UUID
+from typing import List, Optional
+from datetime import datetime
+
+from ninja import Router, Schema, FilterSchema, Field, Query
+from ninja.pagination import paginate
+
+from api.v1_core import CustomPagination
+
+
+router = Router(tags=['Machine and Dependencies'])
+
+
+# ============================================================================
+# Machine Schemas
+# ============================================================================
+
+class MachineSchema(Schema):
+    """Schema for Machine model."""
+    TYPE: str = 'machine.Machine'
+    id: UUID
+    created_at: datetime
+    modified_at: datetime
+    guid: str
+    hostname: str
+    hw_in_docker: bool
+    hw_in_vm: bool
+    hw_manufacturer: str
+    hw_product: str
+    hw_uuid: str
+    os_arch: str
+    os_family: str
+    os_platform: str
+    os_release: str
+    os_kernel: str
+    stats: dict
+    num_uses_succeeded: int
+    num_uses_failed: int
+
+
+class MachineFilterSchema(FilterSchema):
+    id: Optional[str] = Field(None, q='id__startswith')
+    hostname: Optional[str] = Field(None, q='hostname__icontains')
+    os_platform: Optional[str] = Field(None, q='os_platform__icontains')
+    os_arch: Optional[str] = Field(None, q='os_arch')
+    hw_in_docker: Optional[bool] = Field(None, q='hw_in_docker')
+    hw_in_vm: Optional[bool] = Field(None, q='hw_in_vm')
+
+
+# ============================================================================
+# Dependency Schemas
+# ============================================================================
+
+class DependencySchema(Schema):
+    """Schema for Dependency model."""
+    TYPE: str = 'machine.Dependency'
+    id: UUID
+    created_at: datetime
+    modified_at: datetime
+    bin_name: str
+    bin_providers: str
+    custom_cmds: dict
+    config: dict
+    is_installed: bool
+    installed_count: int
+
+    @staticmethod
+    def resolve_is_installed(obj) -> bool:
+        return obj.is_installed
+
+    @staticmethod
+    def resolve_installed_count(obj) -> int:
+        return obj.installed_binaries.count()
+
+
+class DependencyFilterSchema(FilterSchema):
+    id: Optional[str] = Field(None, q='id__startswith')
+    bin_name: Optional[str] = Field(None, q='bin_name__icontains')
+    bin_providers: Optional[str] = Field(None, q='bin_providers__icontains')
+
+
+# ============================================================================
+# InstalledBinary Schemas
+# ============================================================================
+
+class InstalledBinarySchema(Schema):
+    """Schema for InstalledBinary model."""
+    TYPE: str = 'machine.InstalledBinary'
+    id: UUID
+    created_at: datetime
+    modified_at: datetime
+    machine_id: UUID
+    machine_hostname: str
+    dependency_id: Optional[UUID]
+    dependency_bin_name: Optional[str]
+    name: str
+    binprovider: str
+    abspath: str
+    version: str
+    sha256: str
+    is_valid: bool
+    num_uses_succeeded: int
+    num_uses_failed: int
+
+    @staticmethod
+    def resolve_machine_hostname(obj) -> str:
+        return obj.machine.hostname
+
+    @staticmethod
+    def resolve_dependency_id(obj) -> Optional[UUID]:
+        return obj.dependency_id
+
+    @staticmethod
+    def resolve_dependency_bin_name(obj) -> Optional[str]:
+        return obj.dependency.bin_name if obj.dependency else None
+
+    @staticmethod
+    def resolve_is_valid(obj) -> bool:
+        return obj.is_valid
+
+
+class InstalledBinaryFilterSchema(FilterSchema):
+    id: Optional[str] = Field(None, q='id__startswith')
+    name: Optional[str] = Field(None, q='name__icontains')
+    binprovider: Optional[str] = Field(None, q='binprovider')
+    machine_id: Optional[str] = Field(None, q='machine_id__startswith')
+    dependency_id: Optional[str] = Field(None, q='dependency_id__startswith')
+    version: Optional[str] = Field(None, q='version__icontains')
+
+
+# ============================================================================
+# Machine Endpoints
+# ============================================================================
+
+@router.get("/machines", response=List[MachineSchema], url_name="get_machines")
+@paginate(CustomPagination)
+def get_machines(request, filters: MachineFilterSchema = Query(...)):
+    """List all machines."""
+    from machine.models import Machine
+    return filters.filter(Machine.objects.all()).distinct()
+
+
+@router.get("/machine/{machine_id}", response=MachineSchema, url_name="get_machine")
+def get_machine(request, machine_id: str):
+    """Get a specific machine by ID."""
+    from machine.models import Machine
+    from django.db.models import Q
+    return Machine.objects.get(Q(id__startswith=machine_id) | Q(hostname__iexact=machine_id))
+
+
+@router.get("/machine/current", response=MachineSchema, url_name="get_current_machine")
+def get_current_machine(request):
+    """Get the current machine."""
+    from machine.models import Machine
+    return Machine.current()
+
+
+# ============================================================================
+# Dependency Endpoints
+# ============================================================================
+
+@router.get("/dependencies", response=List[DependencySchema], url_name="get_dependencies")
+@paginate(CustomPagination)
+def get_dependencies(request, filters: DependencyFilterSchema = Query(...)):
+    """List all dependencies."""
+    from machine.models import Dependency
+    return filters.filter(Dependency.objects.all()).distinct()
+
+
+@router.get("/dependency/{dependency_id}", response=DependencySchema, url_name="get_dependency")
+def get_dependency(request, dependency_id: str):
+    """Get a specific dependency by ID or bin_name."""
+    from machine.models import Dependency
+    from django.db.models import Q
+    try:
+        return Dependency.objects.get(Q(id__startswith=dependency_id))
+    except Dependency.DoesNotExist:
+        return Dependency.objects.get(bin_name__iexact=dependency_id)
+
+
+# ============================================================================
+# InstalledBinary Endpoints
+# ============================================================================
+
+@router.get("/binaries", response=List[InstalledBinarySchema], url_name="get_binaries")
+@paginate(CustomPagination)
+def get_binaries(request, filters: InstalledBinaryFilterSchema = Query(...)):
+    """List all installed binaries."""
+    from machine.models import InstalledBinary
+    return filters.filter(InstalledBinary.objects.all().select_related('machine', 'dependency')).distinct()
+
+
+@router.get("/binary/{binary_id}", response=InstalledBinarySchema, url_name="get_binary")
+def get_binary(request, binary_id: str):
+    """Get a specific installed binary by ID."""
+    from machine.models import InstalledBinary
+    return InstalledBinary.objects.select_related('machine', 'dependency').get(id__startswith=binary_id)
+
+
+@router.get("/binary/by-name/{name}", response=List[InstalledBinarySchema], url_name="get_binaries_by_name")
+def get_binaries_by_name(request, name: str):
+    """Get all installed binaries with the given name."""
+    from machine.models import InstalledBinary
+    return list(InstalledBinary.objects.filter(name__iexact=name).select_related('machine', 'dependency'))
--- a/archivebox/api/v1_workers.py
+++ b/archivebox/api/v1_workers.py
@@ -4,125 +4,157 @@ from uuid import UUID
 from typing import List, Any
 from datetime import datetime

-
 from ninja import Router, Schema


 router = Router(tags=['Workers and Tasks'])


-class TaskSchema(Schema):
+class QueueItemSchema(Schema):
+    """Schema for a single item in a worker's queue."""
    TYPE: str
-
    id: UUID
-    description: str
-
    status: str
    retry_at: datetime | None
-    
    created_at: datetime
    modified_at: datetime
-    created_by_id: int
-    
+    description: str
+
+    @staticmethod
+    def resolve_TYPE(obj) -> str:
+        return f'{obj._meta.app_label}.{obj._meta.model_name}'
+
    @staticmethod
    def resolve_description(obj) -> str:
        return str(obj)


-class ActorSchema(Schema):
-    # TYPE: str = 'workers.actor.ActorType'
-
-    # name: str
-    #pid: int | None
-    idle_count: int
-    launch_kwargs: dict[str, Any]
-    mode: str
-    
+class WorkerSchema(Schema):
+    """Schema for a Worker type."""
+    name: str
    model: str
-    statemachine: str
-    ACTIVE_STATE: str
-    EVENT_NAME: str
-    CLAIM_ORDER: list[str]
-    CLAIM_FROM_TOP_N: int
-    CLAIM_ATOMIC: bool
-    MAX_TICK_TIME: int
-    MAX_CONCURRENT_ACTORS: int
-    
-    future: list[TaskSchema]
-    pending: list[TaskSchema]
-    stalled: list[TaskSchema]
-    active: list[TaskSchema]
-    past: list[TaskSchema]
-    
+    max_tick_time: int
+    max_concurrent_tasks: int
+    poll_interval: float
+    idle_timeout: int
+    running_count: int
+    running_workers: List[dict[str, Any]]
+    queue_count: int
+    queue: List[QueueItemSchema]
+
    @staticmethod
    def resolve_model(obj) -> str:
-        return obj.Model.__name__
-    
-    @staticmethod
-    def resolve_statemachine(obj) -> str:
-        return obj.StateMachineClass.__name__
-    
-    @staticmethod
-    def resolve_name(obj) -> str:
-        return str(obj)
+        Model = obj.get_model()
+        return f'{Model._meta.app_label}.{Model._meta.model_name}'

    @staticmethod
-    def resolve_ACTIVE_STATE(obj) -> str:
-        return str(obj.ACTIVE_STATE)
-    
-    @staticmethod
-    def resolve_FINAL_STATES(obj) -> list[str]:
-        return [str(state) for state in obj.FINAL_STATES]
-    
-    @staticmethod
-    def resolve_future(obj) -> list[TaskSchema]:
-        return [obj for obj in obj.qs.filter(obj.future_q).order_by('-retry_at')]
-    
-    @staticmethod
-    def resolve_pending(obj) -> list[TaskSchema]:
-        return [obj for obj in obj.qs.filter(obj.pending_q).order_by('-retry_at')]
-    
-    @staticmethod
-    def resolve_stalled(obj) -> list[TaskSchema]:
-        return [obj for obj in obj.qs.filter(obj.stalled_q).order_by('-retry_at')]
-    
-    @staticmethod
-    def resolve_active(obj) -> list[TaskSchema]:
-        return [obj for obj in obj.qs.filter(obj.active_q).order_by('-retry_at')]
+    def resolve_max_tick_time(obj) -> int:
+        return obj.MAX_TICK_TIME

    @staticmethod
-    def resolve_past(obj) -> list[TaskSchema]:
-        return [obj for obj in obj.qs.filter(obj.final_q).order_by('-modified_at')]
+    def resolve_max_concurrent_tasks(obj) -> int:
+        return obj.MAX_CONCURRENT_TASKS
+
+    @staticmethod
+    def resolve_poll_interval(obj) -> float:
+        return obj.POLL_INTERVAL
+
+    @staticmethod
+    def resolve_idle_timeout(obj) -> int:
+        return obj.IDLE_TIMEOUT
+
+    @staticmethod
+    def resolve_running_count(obj) -> int:
+        return len(obj.get_running_workers())
+
+    @staticmethod
+    def resolve_running_workers(obj) -> List[dict[str, Any]]:
+        return obj.get_running_workers()
+
+    @staticmethod
+    def resolve_queue_count(obj) -> int:
+        return obj.get_queue().count()
+
+    @staticmethod
+    def resolve_queue(obj) -> List[QueueItemSchema]:
+        return list(obj.get_queue()[:50])  # Limit to 50 items


 class OrchestratorSchema(Schema):
-    # TYPE: str = 'workers.orchestrator.Orchestrator'
-
-    #pid: int | None
-    exit_on_idle: bool
-    mode: str
-
-    actors: list[ActorSchema]
-    
-    @staticmethod
-    def resolve_actors(obj) -> list[ActorSchema]:
-        return [actor() for actor in obj.actor_types.values()]
+    """Schema for the Orchestrator."""
+    is_running: bool
+    poll_interval: float
+    idle_timeout: int
+    max_workers_per_type: int
+    max_total_workers: int
+    total_worker_count: int
+    workers: List[WorkerSchema]


-@router.get("/orchestrators", response=List[OrchestratorSchema], url_name="get_orchestrators")
-def get_orchestrators(request):
-    """List all the task orchestrators (aka Orchestrators) that are currently running"""
-
+@router.get("/orchestrator", response=OrchestratorSchema, url_name="get_orchestrator")
+def get_orchestrator(request):
+    """Get the orchestrator status and all worker queues."""
    from workers.orchestrator import Orchestrator
+    from workers.worker import CrawlWorker, SnapshotWorker, ArchiveResultWorker
+
    orchestrator = Orchestrator()

-    return [orchestrator]
+    # Create temporary worker instances to query their queues
+    workers = [
+        CrawlWorker(worker_id=-1),
+        SnapshotWorker(worker_id=-1),
+        ArchiveResultWorker(worker_id=-1),
+    ]
+
+    return {
+        'is_running': orchestrator.is_running(),
+        'poll_interval': orchestrator.POLL_INTERVAL,
+        'idle_timeout': orchestrator.IDLE_TIMEOUT,
+        'max_workers_per_type': orchestrator.MAX_WORKERS_PER_TYPE,
+        'max_total_workers': orchestrator.MAX_TOTAL_WORKERS,
+        'total_worker_count': orchestrator.get_total_worker_count(),
+        'workers': workers,
+    }


-@router.get("/actors", response=List[ActorSchema], url_name="get_actors")
-def get_actors(request):
-    """List all the task consumer workers (aka Actors) that are currently running"""
+@router.get("/workers", response=List[WorkerSchema], url_name="get_workers")
+def get_workers(request):
+    """List all worker types and their current status."""
+    from workers.worker import CrawlWorker, SnapshotWorker, ArchiveResultWorker

-    from workers.orchestrator import Orchestrator
-    orchestrator = Orchestrator()
-    return orchestrator.actor_types.values()
+    # Create temporary instances to query their queues
+    return [
+        CrawlWorker(worker_id=-1),
+        SnapshotWorker(worker_id=-1),
+        ArchiveResultWorker(worker_id=-1),
+    ]
+
+
+@router.get("/worker/{worker_name}", response=WorkerSchema, url_name="get_worker")
+def get_worker(request, worker_name: str):
+    """Get status and queue for a specific worker type."""
+    from workers.worker import WORKER_TYPES
+
+    if worker_name not in WORKER_TYPES:
+        from ninja.errors import HttpError
+        raise HttpError(404, f"Unknown worker type: {worker_name}. Valid types: {list(WORKER_TYPES.keys())}")
+
+    WorkerClass = WORKER_TYPES[worker_name]
+    return WorkerClass(worker_id=-1)
+
+
+@router.get("/worker/{worker_name}/queue", response=List[QueueItemSchema], url_name="get_worker_queue")
+def get_worker_queue(request, worker_name: str, limit: int = 100):
+    """Get the current queue for a specific worker type."""
+    from workers.worker import WORKER_TYPES
+
+    if worker_name not in WORKER_TYPES:
+        from ninja.errors import HttpError
+        raise HttpError(404, f"Unknown worker type: {worker_name}. Valid types: {list(WORKER_TYPES.keys())}")
+
+    WorkerClass = WORKER_TYPES[worker_name]
+    worker = WorkerClass(worker_id=-1)
+    return list(worker.get_queue()[:limit])
+
+
+# Progress endpoint moved to core.views.live_progress_view for simplicity
--- a/archivebox/base_models/admin.py
+++ b/archivebox/base_models/admin.py
@@ -2,76 +2,226 @@

 __package__ = 'archivebox.base_models'

+import json
+
+from django import forms
 from django.contrib import admin
 from django.utils.html import format_html, mark_safe
 from django_object_actions import DjangoObjectActions


+class KeyValueWidget(forms.Widget):
+    """
+    A widget that renders JSON dict as editable key-value input fields
+    with + and - buttons to add/remove rows.
+    Includes autocomplete for available config keys from the plugin system.
+    """
+    template_name = None  # We render manually
+
+    class Media:
+        css = {
+            'all': []
+        }
+        js = []
+
+    def _get_config_options(self):
+        """Get available config options from plugins."""
+        try:
+            from archivebox.hooks import discover_plugin_configs
+            plugin_configs = discover_plugin_configs()
+            options = {}
+            for plugin_name, schema in plugin_configs.items():
+                for key, prop in schema.get('properties', {}).items():
+                    options[key] = {
+                        'plugin': plugin_name,
+                        'type': prop.get('type', 'string'),
+                        'default': prop.get('default', ''),
+                        'description': prop.get('description', ''),
+                    }
+            return options
+        except Exception:
+            return {}
+
+    def render(self, name, value, attrs=None, renderer=None):
+        # Parse JSON value to dict
+        if value is None:
+            data = {}
+        elif isinstance(value, str):
+            try:
+                data = json.loads(value) if value else {}
+            except json.JSONDecodeError:
+                data = {}
+        elif isinstance(value, dict):
+            data = value
+        else:
+            data = {}
+
+        widget_id = attrs.get('id', name) if attrs else name
+        config_options = self._get_config_options()
+
+        # Build datalist options
+        datalist_options = '\n'.join(
+            f'<option value="{self._escape(key)}">{self._escape(opt["description"][:60] or opt["type"])}</option>'
+            for key, opt in sorted(config_options.items())
+        )
+
+        # Build config metadata as JSON for JS
+        config_meta_json = json.dumps(config_options)
+
+        html = f'''
+        <div id="{widget_id}_container" class="key-value-editor" style="max-width: 700px;">
+            <datalist id="{widget_id}_keys">
+                {datalist_options}
+            </datalist>
+            <div id="{widget_id}_rows" class="key-value-rows">
+        '''
+
+        # Render existing key-value pairs
+        row_idx = 0
+        for key, val in data.items():
+            val_str = json.dumps(val) if not isinstance(val, str) else val
+            html += self._render_row(widget_id, row_idx, key, val_str)
+            row_idx += 1
+
+        # Always add one empty row for new entries
+        html += self._render_row(widget_id, row_idx, '', '')
+
+        html += f'''
+            </div>
+            <div style="display: flex; gap: 8px; align-items: center; margin-top: 8px;">
+                <button type="button" onclick="addKeyValueRow_{widget_id}()"
+                        style="padding: 4px 12px; cursor: pointer; background: #417690; color: white; border: none; border-radius: 4px;">
+                    + Add Row
+                </button>
+                <span id="{widget_id}_hint" style="font-size: 11px; color: #666; font-style: italic;"></span>
+            </div>
+            <input type="hidden" name="{name}" id="{widget_id}" value="">
+            <script>
+                (function() {{
+                    var configMeta_{widget_id} = {config_meta_json};
+
+                    function showKeyHint_{widget_id}(key) {{
+                        var hint = document.getElementById('{widget_id}_hint');
+                        var meta = configMeta_{widget_id}[key];
+                        if (meta) {{
+                            hint.innerHTML = '<b>' + key + '</b>: ' + (meta.description || meta.type) +
+                                (meta.default !== '' ? ' <span style="color:#888">(default: ' + meta.default + ')</span>' : '');
+                        }} else {{
+                            hint.textContent = key ? 'Custom key: ' + key : '';
+                        }}
+                    }}
+
+                    function updateHiddenField_{widget_id}() {{
+                        var container = document.getElementById('{widget_id}_rows');
+                        var rows = container.querySelectorAll('.key-value-row');
+                        var result = {{}};
+                        rows.forEach(function(row) {{
+                            var keyInput = row.querySelector('.kv-key');
+                            var valInput = row.querySelector('.kv-value');
+                            if (keyInput && valInput && keyInput.value.trim()) {{
+                                var key = keyInput.value.trim();
+                                var val = valInput.value.trim();
+                                // Try to parse as JSON (for booleans, numbers, etc)
+                                try {{
+                                    if (val === 'true') result[key] = true;
+                                    else if (val === 'false') result[key] = false;
+                                    else if (val === 'null') result[key] = null;
+                                    else if (!isNaN(val) && val !== '') result[key] = Number(val);
+                                    else if ((val.startsWith('{{') && val.endsWith('}}')) ||
+                                             (val.startsWith('[') && val.endsWith(']')) ||
+                                             (val.startsWith('"') && val.endsWith('"')))
+                                        result[key] = JSON.parse(val);
+                                    else result[key] = val;
+                                }} catch(e) {{
+                                    result[key] = val;
+                                }}
+                            }}
+                        }});
+                        document.getElementById('{widget_id}').value = JSON.stringify(result);
+                    }}
+
+                    window.addKeyValueRow_{widget_id} = function() {{
+                        var container = document.getElementById('{widget_id}_rows');
+                        var rows = container.querySelectorAll('.key-value-row');
+                        var newIdx = rows.length;
+                        var newRow = document.createElement('div');
+                        newRow.className = 'key-value-row';
+                        newRow.style.cssText = 'display: flex; gap: 8px; margin-bottom: 6px; align-items: center;';
+                        newRow.innerHTML = '<input type="text" class="kv-key" placeholder="KEY" list="{widget_id}_keys" ' +
+                            'style="flex: 1; padding: 6px 8px; border: 1px solid #ccc; border-radius: 4px; font-family: monospace; font-size: 12px;" ' +
+                            'onchange="updateHiddenField_{widget_id}()" oninput="updateHiddenField_{widget_id}(); showKeyHint_{widget_id}(this.value)" onfocus="showKeyHint_{widget_id}(this.value)">' +
+                            '<input type="text" class="kv-value" placeholder="value" ' +
+                            'style="flex: 2; padding: 6px 8px; border: 1px solid #ccc; border-radius: 4px; font-family: monospace; font-size: 12px;" ' +
+                            'onchange="updateHiddenField_{widget_id}()" oninput="updateHiddenField_{widget_id}()">' +
+                            '<button type="button" onclick="removeKeyValueRow_{widget_id}(this)" ' +
+                            'style="padding: 4px 10px; cursor: pointer; background: #ba2121; color: white; border: none; border-radius: 4px; font-weight: bold;">−</button>';
+                        container.appendChild(newRow);
+                        newRow.querySelector('.kv-key').focus();
+                    }};
+
+                    window.removeKeyValueRow_{widget_id} = function(btn) {{
+                        var row = btn.parentElement;
+                        row.remove();
+                        updateHiddenField_{widget_id}();
+                    }};
+
+                    window.showKeyHint_{widget_id} = showKeyHint_{widget_id};
+                    window.updateHiddenField_{widget_id} = updateHiddenField_{widget_id};
+
+                    // Initialize on load
+                    document.addEventListener('DOMContentLoaded', function() {{
+                        updateHiddenField_{widget_id}();
+                    }});
+                    // Also run immediately in case DOM is already ready
+                    if (document.readyState !== 'loading') {{
+                        updateHiddenField_{widget_id}();
+                    }}
+
+                    // Update on any input change
+                    document.getElementById('{widget_id}_rows').addEventListener('input', updateHiddenField_{widget_id});
+                }})();
+            </script>
+        </div>
+        '''
+        return mark_safe(html)
+
+    def _render_row(self, widget_id, idx, key, value):
+        return f'''
+            <div class="key-value-row" style="display: flex; gap: 8px; margin-bottom: 6px; align-items: center;">
+                <input type="text" class="kv-key" value="{self._escape(key)}" placeholder="KEY" list="{widget_id}_keys"
+                       style="flex: 1; padding: 6px 8px; border: 1px solid #ccc; border-radius: 4px; font-family: monospace; font-size: 12px;"
+                       onchange="updateHiddenField_{widget_id}()" oninput="updateHiddenField_{widget_id}(); showKeyHint_{widget_id}(this.value)" onfocus="showKeyHint_{widget_id}(this.value)">
+                <input type="text" class="kv-value" value="{self._escape(value)}" placeholder="value"
+                       style="flex: 2; padding: 6px 8px; border: 1px solid #ccc; border-radius: 4px; font-family: monospace; font-size: 12px;"
+                       onchange="updateHiddenField_{widget_id}()" oninput="updateHiddenField_{widget_id}()">
+                <button type="button" onclick="removeKeyValueRow_{widget_id}(this)"
+                        style="padding: 4px 10px; cursor: pointer; background: #ba2121; color: white; border: none; border-radius: 4px; font-weight: bold;">−</button>
+            </div>
+        '''
+
+    def _escape(self, s):
+        """Escape HTML special chars in attribute values."""
+        if not s:
+            return ''
+        return str(s).replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;')
+
+    def value_from_datadict(self, data, files, name):
+        value = data.get(name, '{}')
+        return value
+
+
 class ConfigEditorMixin:
    """
    Mixin for admin classes with a config JSON field.

-    Provides a readonly field that shows available config options
-    from all discovered plugin schemas.
+    Provides a key-value editor widget with autocomplete for available config keys.
    """

-    @admin.display(description='Available Config Options')
-    def available_config_options(self, obj):
-        """Show documentation for available config keys."""
-        try:
-            from archivebox.hooks import discover_plugin_configs
-            plugin_configs = discover_plugin_configs()
-        except ImportError:
-            return format_html('<i>Plugin config system not available</i>')
-
-        html_parts = [
-            '<details>',
-            '<summary style="cursor: pointer; font-weight: bold; padding: 4px;">',
-            'Click to see available config keys ({})</summary>'.format(
-                sum(len(s.get('properties', {})) for s in plugin_configs.values())
-            ),
-            '<div style="max-height: 400px; overflow-y: auto; padding: 8px; background: #f8f8f8; border-radius: 4px; font-family: monospace; font-size: 11px;">',
-        ]
-
-        for plugin_name, schema in sorted(plugin_configs.items()):
-            properties = schema.get('properties', {})
-            if not properties:
-                continue
-
-            html_parts.append(f'<div style="margin: 8px 0;"><strong style="color: #333;">{plugin_name}</strong></div>')
-            html_parts.append('<table style="width: 100%; border-collapse: collapse; margin-bottom: 12px;">')
-            html_parts.append('<tr style="background: #eee;"><th style="text-align: left; padding: 4px;">Key</th><th style="text-align: left; padding: 4px;">Type</th><th style="text-align: left; padding: 4px;">Default</th><th style="text-align: left; padding: 4px;">Description</th></tr>')
-
-            for key, prop in sorted(properties.items()):
-                prop_type = prop.get('type', 'string')
-                default = prop.get('default', '')
-                description = prop.get('description', '')
-
-                # Truncate long defaults
-                default_str = str(default)
-                if len(default_str) > 30:
-                    default_str = default_str[:27] + '...'
-
-                html_parts.append(
-                    f'<tr style="border-bottom: 1px solid #ddd;">'
-                    f'<td style="padding: 4px; font-weight: bold;">{key}</td>'
-                    f'<td style="padding: 4px; color: #666;">{prop_type}</td>'
-                    f'<td style="padding: 4px; color: #666;">{default_str}</td>'
-                    f'<td style="padding: 4px;">{description}</td>'
-                    f'</tr>'
-                )
-
-            html_parts.append('</table>')
-
-        html_parts.append('</div></details>')
-        html_parts.append(
-            '<p style="margin-top: 8px; color: #666; font-size: 11px;">'
-            '<strong>Usage:</strong> Add key-value pairs in JSON format, e.g., '
-            '<code>{"SAVE_WGET": false, "WGET_TIMEOUT": 120}</code>'
-            '</p>'
-        )
-
-        return mark_safe(''.join(html_parts))
+    def formfield_for_dbfield(self, db_field, request, **kwargs):
+        """Use KeyValueWidget for the config JSON field."""
+        if db_field.name == 'config':
+            kwargs['widget'] = KeyValueWidget()
+        return super().formfield_for_dbfield(db_field, request, **kwargs)


 class BaseModelAdmin(DjangoObjectActions, admin.ModelAdmin):
--- a/archivebox/cli/archivebox_add.py
+++ b/archivebox/cli/archivebox_add.py
@@ -72,9 +72,10 @@ def add(urls: str | list[str],
        cli_args[0] = 'archivebox'
    cmd_str = ' '.join(cli_args)

+    timestamp = timezone.now().strftime("%Y-%m-%d__%H-%M-%S")
    seed = Seed.from_file(
        sources_file,
-        label=f'{USER}@{HOSTNAME} $ {cmd_str}',
+        label=f'{USER}@{HOSTNAME} $ {cmd_str} [{timestamp}]',
        parser=parser,
        tag=tag,
        created_by=created_by_id,
--- a/archivebox/config/configset.py
+++ b/archivebox/config/configset.py
@@ -11,21 +11,53 @@ __package__ = "archivebox.config"
 import os
 import json
 from pathlib import Path
-from typing import Any, Dict, Optional, List, Type, TYPE_CHECKING, cast
+from typing import Any, Dict, Optional, List, Type, Tuple, TYPE_CHECKING, cast
 from configparser import ConfigParser

 from pydantic import Field
-from pydantic_settings import BaseSettings
+from pydantic_settings import BaseSettings, PydanticBaseSettingsSource
+
+
+class IniConfigSettingsSource(PydanticBaseSettingsSource):
+    """
+    Custom settings source that reads from ArchiveBox.conf (INI format).
+    Flattens all sections into a single namespace.
+    """
+
+    def get_field_value(self, field: Any, field_name: str) -> Tuple[Any, str, bool]:
+        config_vals = self._load_config_file()
+        field_value = config_vals.get(field_name.upper())
+        return field_value, field_name, False
+
+    def __call__(self) -> Dict[str, Any]:
+        return self._load_config_file()
+
+    def _load_config_file(self) -> Dict[str, Any]:
+        try:
+            from archivebox.config.constants import CONSTANTS
+            config_path = CONSTANTS.CONFIG_FILE
+        except ImportError:
+            return {}
+
+        if not config_path.exists():
+            return {}
+
+        parser = ConfigParser()
+        parser.optionxform = lambda x: x  # preserve case
+        parser.read(config_path)
+
+        # Flatten all sections into single namespace (ignore section headers)
+        return {key.upper(): value for section in parser.sections() for key, value in parser.items(section)}


 class BaseConfigSet(BaseSettings):
    """
    Base class for config sections.

-    Automatically loads values from:
-    1. Environment variables (highest priority)
-    2. ArchiveBox.conf file (if exists)
-    3. Default values (lowest priority)
+    Automatically loads values from (highest to lowest priority):
+    1. Environment variables
+    2. ArchiveBox.conf file (INI format, flattened)
+    3. Default values

    Subclasses define fields with defaults and types:

@@ -35,11 +67,30 @@ class BaseConfigSet(BaseSettings):
    """

    class Config:
-        # Use env vars with ARCHIVEBOX_ prefix or raw name
        env_prefix = ""
        extra = "ignore"
        validate_default = True

+    @classmethod
+    def settings_customise_sources(
+        cls,
+        settings_cls: Type[BaseSettings],
+        init_settings: PydanticBaseSettingsSource,
+        env_settings: PydanticBaseSettingsSource,
+        dotenv_settings: PydanticBaseSettingsSource,
+        file_secret_settings: PydanticBaseSettingsSource,
+    ) -> Tuple[PydanticBaseSettingsSource, ...]:
+        """
+        Define the order of settings sources (first = highest priority).
+        """
+        return (
+            init_settings,           # 1. Passed to __init__
+            env_settings,            # 2. Environment variables
+            IniConfigSettingsSource(settings_cls),  # 3. ArchiveBox.conf file
+            # dotenv_settings,       # Skip .env files
+            # file_secret_settings,  # Skip secrets files
+        )
+
    @classmethod
    def load_from_file(cls, config_path: Path) -> Dict[str, str]:
        """Load config values from INI file."""
@@ -47,7 +98,7 @@ class BaseConfigSet(BaseSettings):
            return {}

        parser = ConfigParser()
-        parser.optionxform = lambda x: x  # type: ignore  # preserve case
+        parser.optionxform = lambda x: x  # preserve case
        parser.read(config_path)

        # Flatten all sections into single namespace
--- a/archivebox/config/views.py
+++ b/archivebox/config/views.py
@@ -256,7 +256,7 @@ def plugins_list_view(request: HttpRequest, **kwargs) -> TableContext:
        # Show a helpful message when no plugins found
        rows['Name'].append('(no plugins found)')
        rows['Source'].append('-')
-        rows['Path'].append(format_html('<code>archivebox/plugins/</code> or <code>data/plugins/</code>'))
+        rows['Path'].append(mark_safe('<code>archivebox/plugins/</code> or <code>data/plugins/</code>'))
        rows['Hooks'].append('-')

    return TableContext(
--- a/archivebox/core/admin_archiveresults.py
+++ b/archivebox/core/admin_archiveresults.py
@@ -9,25 +9,17 @@ from django.core.exceptions import ValidationError
 from django.urls import reverse, resolve
 from django.utils import timezone

-from huey_monitor.admin import TaskModel
-
 from archivebox.config import DATA_DIR
 from archivebox.config.common import SERVER_CONFIG
 from archivebox.misc.paginators import AccelleratedPaginator
 from archivebox.base_models.admin import BaseModelAdmin
+from archivebox.hooks import get_extractor_icon


 from core.models import ArchiveResult, Snapshot



-
-def result_url(result: TaskModel) -> str:
-    url = reverse("admin:huey_monitor_taskmodel_change", args=[str(result.id)])
-    return format_html('<a href="{url}" class="fade-in-progress-url">See progress...</a>'.format(url=url))
-
-
-
 class ArchiveResultInline(admin.TabularInline):
    name = 'Archive Results Log'
    model = ArchiveResult
@@ -101,9 +93,9 @@ class ArchiveResultInline(admin.TabularInline):


 class ArchiveResultAdmin(BaseModelAdmin):
-    list_display = ('id', 'created_by', 'created_at', 'snapshot_info', 'tags_str', 'status', 'extractor', 'cmd_str', 'output_str')
+    list_display = ('id', 'created_by', 'created_at', 'snapshot_info', 'tags_str', 'status', 'extractor_with_icon', 'cmd_str', 'output_str')
    sort_fields = ('id', 'created_by', 'created_at', 'extractor', 'status')
-    readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'output_summary')
+    readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'output_summary', 'extractor_with_icon')
    search_fields = ('id', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
    fields = ('snapshot', 'extractor', 'status', 'retry_at', 'start_ts', 'end_ts', 'created_by', 'pwd', 'cmd_version', 'cmd', 'output', *readonly_fields)
    autocomplete_fields = ['snapshot']
@@ -144,17 +136,29 @@ class ArchiveResultAdmin(BaseModelAdmin):
    def tags_str(self, result):
        return result.snapshot.tags_str()

+    @admin.display(description='Extractor', ordering='extractor')
+    def extractor_with_icon(self, result):
+        icon = get_extractor_icon(result.extractor)
+        return format_html(
+            '<span title="{}">{}</span> {}',
+            result.extractor,
+            icon,
+            result.extractor,
+        )
+
    def cmd_str(self, result):
        return format_html(
            '<pre>{}</pre>',
            ' '.join(result.cmd) if isinstance(result.cmd, list) else str(result.cmd),
        )
-    
+
    def output_str(self, result):
+        # Determine output link path - use output if file exists, otherwise link to index
+        output_path = result.output if (result.status == 'succeeded' and result.output) else 'index.html'
        return format_html(
            '<a href="/archive/{}/{}" class="output-link">↗️</a><pre>{}</pre>',
            result.snapshot.timestamp,
-            result.output if (result.status == 'succeeded') and result.extractor not in ('title', 'archive_org') else 'index.html',
+            output_path,
            result.output,
        )

@@ -185,7 +189,7 @@ class ArchiveResultAdmin(BaseModelAdmin):
                is_hidden = filename.startswith('.')
                output_str += format_html('<span style="opacity: {}.2">{}{}</span><br/>', int(not is_hidden), indentation_str, filename.strip())

-        return output_str + format_html('</code></pre>')
+        return output_str + mark_safe('</code></pre>')



--- a/archivebox/core/admin_site.py
+++ b/archivebox/core/admin_site.py
@@ -35,8 +35,19 @@ def register_admin_site():

    admin.site = archivebox_admin
    sites.site = archivebox_admin
-    
-    # Plugin admin registration is now handled by individual app admins
-    # No longer using archivebox.pm.hook.register_admin()
-    
+
+    # Register admin views for each app
+    # (Previously handled by ABX plugin system, now called directly)
+    from core.admin import register_admin as register_core_admin
+    from crawls.admin import register_admin as register_crawls_admin
+    from api.admin import register_admin as register_api_admin
+    from machine.admin import register_admin as register_machine_admin
+    from workers.admin import register_admin as register_workers_admin
+
+    register_core_admin(archivebox_admin)
+    register_crawls_admin(archivebox_admin)
+    register_api_admin(archivebox_admin)
+    register_machine_admin(archivebox_admin)
+    register_workers_admin(archivebox_admin)
+
    return archivebox_admin
--- a/archivebox/core/admin_snapshots.py
+++ b/archivebox/core/admin_snapshots.py
@@ -25,7 +25,7 @@ from archivebox.workers.tasks import bg_archive_snapshots, bg_add

 from core.models import Tag
 from core.admin_tags import TagInline
-from core.admin_archiveresults import ArchiveResultInline, result_url
+from core.admin_archiveresults import ArchiveResultInline


 # GLOBAL_CONTEXT = {'VERSION': VERSION, 'VERSIONS_AVAILABLE': [], 'CAN_UPGRADE': False}
@@ -54,10 +54,10 @@ class SnapshotActionForm(ActionForm):
 class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
    list_display = ('created_at', 'title_str', 'status', 'files', 'size', 'url_str')
    sort_fields = ('title_str', 'url_str', 'created_at', 'status', 'crawl')
-    readonly_fields = ('admin_actions', 'status_info', 'tags_str', 'imported_timestamp', 'created_at', 'modified_at', 'downloaded_at', 'link_dir', 'available_config_options')
+    readonly_fields = ('admin_actions', 'status_info', 'tags_str', 'imported_timestamp', 'created_at', 'modified_at', 'downloaded_at', 'output_dir')
    search_fields = ('id', 'url', 'timestamp', 'title', 'tags__name')
    list_filter = ('created_at', 'downloaded_at', 'archiveresult__status', 'created_by', 'tags__name')
-    fields = ('url', 'title', 'created_by', 'bookmarked_at', 'status', 'retry_at', 'crawl', 'config', 'available_config_options', *readonly_fields[:-1])
+    fields = ('url', 'title', 'created_by', 'bookmarked_at', 'status', 'retry_at', 'crawl', 'config', *readonly_fields)
    ordering = ['-created_at']
    actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
    inlines = [TagInline, ArchiveResultInline]
@@ -93,12 +93,10 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
    #     self.request = request
    #     return super().get_queryset(request).prefetch_related('archiveresult_set').distinct()  # .annotate(archiveresult_count=Count('archiveresult'))

-    @admin.action(
-        description="Imported Timestamp"
-    )
+    @admin.display(description="Imported Timestamp")
    def imported_timestamp(self, obj):
        context = RequestContext(self.request, {
-            'bookmarked_date': obj.bookmarked,
+            'bookmarked_date': obj.bookmarked_at,
            'timestamp': obj.timestamp,
        })

@@ -145,22 +143,15 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):

    def status_info(self, obj):
        return format_html(
-            # URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/>
            '''
            Archived: {} ({} files {}) &nbsp; &nbsp;
            Favicon: <img src="{}" style="height: 20px"/> &nbsp; &nbsp;
-            Status code: {} &nbsp; &nbsp;<br/>
-            Server: {} &nbsp; &nbsp;
-            Content type: {} &nbsp; &nbsp;
            Extension: {} &nbsp; &nbsp;
            ''',
            '✅' if obj.is_archived else '❌',
            obj.num_outputs,
            self.size(obj) or '0kb',
            f'/archive/{obj.timestamp}/favicon.ico',
-            obj.status_code or '-',
-            obj.headers and obj.headers.get('Server') or '-',
-            obj.headers and obj.headers.get('Content-Type') or '-',
            obj.extension or '-',
        )

@@ -184,8 +175,8 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
            obj.archive_path,
            obj.archive_path,
            obj.archive_path,
-            'fetched' if obj.latest_title or obj.title else 'pending',
-            urldecode(htmldecode(obj.latest_title or obj.title or ''))[:128] or 'Pending...'
+            'fetched' if obj.title else 'pending',
+            urldecode(htmldecode(obj.title or ''))[:128] or 'Pending...'
        ) + mark_safe(f' <span class="tags">{tags}</span>')

    @admin.display(
@@ -259,14 +250,13 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
        description="ℹ️ Get Title"
    )
    def update_titles(self, request, queryset):
-        from core.models import Snapshot
        count = queryset.count()

        # Queue snapshots for archiving via the state machine system
-        result = bg_archive_snapshots(queryset, kwargs={"overwrite": True, "methods": ["title", "favicon"], "out_dir": DATA_DIR})
+        queued = bg_archive_snapshots(queryset, kwargs={"overwrite": True, "methods": ["title", "favicon"], "out_dir": DATA_DIR})
        messages.success(
            request,
-            mark_safe(f"Title and favicon are updating in the background for {count} URLs. {result_url(result)}"),
+            f"Queued {queued} snapshots for title/favicon update. The orchestrator will process them in the background.",
        )

    @admin.action(
@@ -275,11 +265,11 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
    def update_snapshots(self, request, queryset):
        count = queryset.count()

-        result = bg_archive_snapshots(queryset, kwargs={"overwrite": False, "out_dir": DATA_DIR})
+        queued = bg_archive_snapshots(queryset, kwargs={"overwrite": False, "out_dir": DATA_DIR})

        messages.success(
            request,
-            mark_safe(f"Re-trying any previously failed methods for {count} URLs in the background. {result_url(result)}"),
+            f"Queued {queued} snapshots for re-archiving. The orchestrator will process them in the background.",
        )


@@ -291,11 +281,11 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
            timestamp = timezone.now().isoformat('T', 'seconds')
            new_url = snapshot.url.split('#')[0] + f'#{timestamp}'

-            result = bg_add({'urls': new_url, 'tag': snapshot.tags_str()})
+            bg_add({'urls': new_url, 'tag': snapshot.tags_str()})

        messages.success(
            request,
-            mark_safe(f"Creating new fresh snapshots for {queryset.count()} URLs in the background. {result_url(result)}"),
+            f"Creating {queryset.count()} new fresh snapshots. The orchestrator will process them in the background.",
        )

    @admin.action(
@@ -304,11 +294,11 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
    def overwrite_snapshots(self, request, queryset):
        count = queryset.count()

-        result = bg_archive_snapshots(queryset, kwargs={"overwrite": True, "out_dir": DATA_DIR})
+        queued = bg_archive_snapshots(queryset, kwargs={"overwrite": True, "out_dir": DATA_DIR})

        messages.success(
            request,
-            mark_safe(f"Clearing all previous results and re-downloading {count} URLs in the background. {result_url(result)}"),
+            f"Queued {queued} snapshots for full re-archive (overwriting existing). The orchestrator will process them in the background.",
        )

    @admin.action(
--- a/archivebox/core/apps.py
+++ b/archivebox/core/apps.py
@@ -1,5 +1,7 @@
 __package__ = 'archivebox.core'

+import sys
+
 from django.apps import AppConfig


@@ -10,6 +12,41 @@ class CoreConfig(AppConfig):
        """Register the archivebox.core.admin_site as the main django admin site"""
        from core.admin_site import register_admin_site
        register_admin_site()
-        

+        # Auto-start the orchestrator when running the web server
+        self._maybe_start_orchestrator()

+    def _maybe_start_orchestrator(self):
+        """Start the orchestrator if we're running a web server."""
+        import os
+
+        # Don't start orchestrator during migrations, shell, tests, etc.
+        # Only start when running: runserver, daphne, gunicorn, uwsgi
+        if not self._is_web_server():
+            return
+
+        # Don't start if RUN_ORCHESTRATOR env var is explicitly set to false
+        if os.environ.get('RUN_ORCHESTRATOR', '').lower() in ('false', '0', 'no'):
+            return
+
+        # Don't start in autoreload child process (avoid double-start)
+        if os.environ.get('RUN_MAIN') != 'true' and 'runserver' in sys.argv:
+            return
+
+        try:
+            from workers.orchestrator import Orchestrator
+
+            if not Orchestrator.is_running():
+                # Start orchestrator as daemon (won't exit on idle when started by server)
+                orchestrator = Orchestrator(exit_on_idle=False)
+                orchestrator.start()
+        except Exception as e:
+            # Don't crash the server if orchestrator fails to start
+            import logging
+            logging.getLogger('archivebox').warning(f'Failed to auto-start orchestrator: {e}')
+
+    def _is_web_server(self) -> bool:
+        """Check if we're running a web server command."""
+        # Check for common web server indicators
+        server_commands = ('runserver', 'daphne', 'gunicorn', 'uwsgi', 'server')
+        return any(cmd in ' '.join(sys.argv).lower() for cmd in server_commands)
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -23,7 +23,11 @@ from archivebox.config import CONSTANTS
 from archivebox.misc.system import get_dir_size, atomic_write
 from archivebox.misc.util import parse_date, base_url, domain as url_domain, to_json, ts_to_date_str, urlencode, htmlencode, urldecode
 from archivebox.misc.hashing import get_dir_info
-from archivebox.hooks import ARCHIVE_METHODS_INDEXING_PRECEDENCE
+from archivebox.hooks import (
+    ARCHIVE_METHODS_INDEXING_PRECEDENCE,
+    get_extractors, get_extractor_name, get_extractor_icon,
+    DEFAULT_EXTRACTOR_ICONS,
+)
 from archivebox.base_models.models import (
    ModelWithUUID, ModelWithSerializers, ModelWithOutputDir,
    ModelWithConfig, ModelWithNotes, ModelWithHealthStats,
@@ -343,45 +347,37 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
    def icons(self) -> str:
        """Generate HTML icons showing which extractors have succeeded for this snapshot"""
        from django.utils.html import format_html, mark_safe
-        from collections import defaultdict

        cache_key = f'result_icons:{self.pk}:{(self.downloaded_at or self.modified_at or self.created_at or self.bookmarked_at).timestamp()}'

        def calc_icons():
            if hasattr(self, '_prefetched_objects_cache') and 'archiveresult_set' in self._prefetched_objects_cache:
-                archive_results = [r for r in self.archiveresult_set.all() if r.status == "succeeded" and r.output]
+                archive_results = {r.extractor: r for r in self.archiveresult_set.all() if r.status == "succeeded" and r.output}
            else:
-                archive_results = self.archiveresult_set.filter(status="succeeded", output__isnull=False)
+                archive_results = {r.extractor: r for r in self.archiveresult_set.filter(status="succeeded", output__isnull=False)}

            path = self.archive_path
            canon = self.canonical_outputs()
            output = ""
            output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{}</a> &nbsp;'
-            icons = {
-                "singlefile": "❶", "wget": "🆆", "dom": "🅷", "pdf": "📄",
-                "screenshot": "💻", "media": "📼", "git": "🅶", "archive_org": "🏛",
-                "readability": "🆁", "mercury": "🅼", "warc": "📦"
-            }
-            exclude = ["favicon", "title", "headers", "htmltotext", "archive_org"]

-            extractor_outputs = defaultdict(lambda: None)
-            for extractor, _ in ArchiveResult.EXTRACTOR_CHOICES:
-                for result in archive_results:
-                    if result.extractor == extractor:
-                        extractor_outputs[extractor] = result
+            # Get all extractors from hooks system (sorted by numeric prefix)
+            all_extractors = [get_extractor_name(e) for e in get_extractors()]

-            for extractor, _ in ArchiveResult.EXTRACTOR_CHOICES:
-                if extractor not in exclude:
-                    existing = extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
-                    output += format_html(output_template, path, canon.get(extractor, ''), str(bool(existing)), extractor, icons.get(extractor, "?"))
-                if extractor == "wget":
-                    exists = extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
-                    output += format_html(output_template, path, canon.get("warc", "warc/"), str(bool(exists)), "warc", icons.get("warc", "?"))
-                if extractor == "archive_org":
-                    exists = extractor in extractor_outputs and extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
-                    output += '<a href="{}" class="exists-{}" title="{}">{}</a> '.format(canon.get("archive_org", ""), str(exists), "archive_org", icons.get("archive_org", "?"))
+            for extractor in all_extractors:
+                result = archive_results.get(extractor)
+                existing = result and result.status == 'succeeded' and result.output
+                icon = get_extractor_icon(extractor)
+                output += format_html(
+                    output_template,
+                    path,
+                    canon.get(extractor, extractor + '/'),
+                    str(bool(existing)),
+                    extractor,
+                    icon
+                )

-            return format_html('<span class="files-icons" style="font-size: 1.1em; opacity: 0.8; min-width: 240px; display: inline-block">{}<span>', mark_safe(output))
+            return format_html('<span class="files-icons" style="font-size: 1.1em; opacity: 0.8; min-width: 240px; display: inline-block">{}</span>', mark_safe(output))

        cache_result = cache.get(cache_key)
        if cache_result:
@@ -767,12 +763,11 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
        FAILED = 'failed', 'Failed'
        SKIPPED = 'skipped', 'Skipped'

-    EXTRACTOR_CHOICES = (
-        ('htmltotext', 'htmltotext'), ('git', 'git'), ('singlefile', 'singlefile'), ('media', 'media'),
-        ('archive_org', 'archive_org'), ('readability', 'readability'), ('mercury', 'mercury'),
-        ('favicon', 'favicon'), ('pdf', 'pdf'), ('headers', 'headers'), ('screenshot', 'screenshot'),
-        ('dom', 'dom'), ('title', 'title'), ('wget', 'wget'),
-    )
+    @classmethod
+    def get_extractor_choices(cls):
+        """Get extractor choices from discovered hooks (for forms/admin)."""
+        extractors = [get_extractor_name(e) for e in get_extractors()]
+        return tuple((e, e) for e in extractors)

    # Keep AutoField for backward compatibility with 0.7.x databases
    # UUID field is added separately by migration for new records
@@ -783,7 +778,8 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
    modified_at = models.DateTimeField(auto_now=True)

    snapshot: Snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE)  # type: ignore
-    extractor = models.CharField(choices=EXTRACTOR_CHOICES, max_length=32, blank=False, null=False, db_index=True)
+    # No choices= constraint - extractor names come from plugin system and can be any string
+    extractor = models.CharField(max_length=32, blank=False, null=False, db_index=True)
    pwd = models.CharField(max_length=256, default=None, null=True, blank=True)
    cmd = models.JSONField(default=None, null=True, blank=True)
    cmd_version = models.CharField(max_length=128, default=None, null=True, blank=True)
@@ -835,6 +831,25 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
    def output_exists(self) -> bool:
        return os.path.exists(Path(self.snapshot_dir) / self.extractor)

+    def embed_path(self) -> Optional[str]:
+        """
+        Get the relative path to the embeddable output file for this result.
+
+        Returns the output field if set and file exists, otherwise tries to
+        find a reasonable default based on the extractor type.
+        """
+        if self.output:
+            return self.output
+
+        # Try to find output file based on extractor's canonical output path
+        canonical = self.snapshot.canonical_outputs()
+        extractor_key = f'{self.extractor}_path'
+        if extractor_key in canonical:
+            return canonical[extractor_key]
+
+        # Fallback to extractor directory
+        return f'{self.extractor}/'
+
    def create_output_dir(self):
        output_dir = Path(self.snapshot_dir) / self.extractor
        output_dir.mkdir(parents=True, exist_ok=True)
@@ -891,6 +906,7 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
            output_dir=extractor_dir,
            config_objects=config_objects,
            url=self.snapshot.url,
+            snapshot_id=str(self.snapshot.id),
        )
        end_ts = timezone.now()

@@ -1000,6 +1016,7 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
                hook,
                output_dir=self.output_dir,
                config_objects=config_objects,
+                url=self.snapshot.url,
                snapshot_id=str(self.snapshot.id),
                extractor=self.extractor,
            )
--- a/archivebox/core/settings.py
+++ b/archivebox/core/settings.py
@@ -68,9 +68,6 @@ INSTALLED_APPS = [
    # 3rd-party apps from PyPI that need to be loaded last
    "admin_data_views",  # handles rendering some convenient automatic read-only views of data in Django admin
    "django_extensions",  # provides Django Debug Toolbar (and other non-debug helpers)
-    "django_huey",  # provides multi-queue support for django huey https://github.com/gaiacoop/django-huey
-    "bx_django_utils",  # needed for huey_monitor https://github.com/boxine/bx_django_utils
-    "huey_monitor",  # adds an admin UI for monitoring background huey tasks https://github.com/boxine/django-huey-monitor
 ]


@@ -215,70 +212,6 @@ MIGRATION_MODULES = {"signal_webhooks": None}
 # as much as I'd love this to be a UUID or ULID field, it's not supported yet as of Django 5.0
 DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"

-HUEY = {
-    "huey_class": "huey.SqliteHuey",
-    "filename": CONSTANTS.QUEUE_DATABASE_FILENAME,
-    "name": "commands",
-    "results": True,
-    "store_none": True,
-    "immediate": False,
-    "utc": True,
-    "consumer": {
-        "workers": 1,
-        "worker_type": "thread",
-        "initial_delay": 0.1,  # Smallest polling interval, same as -d.
-        "backoff": 1.15,  # Exponential backoff using this rate, -b.
-        "max_delay": 10.0,  # Max possible polling interval, -m.
-        "scheduler_interval": 1,  # Check schedule every second, -s.
-        "periodic": True,  # Enable crontab feature.
-        "check_worker_health": True,  # Enable worker health checks.
-        "health_check_interval": 1,  # Check worker health every second.
-    },
-}
-
-# https://huey.readthedocs.io/en/latest/contrib.html#setting-things-up
-# https://github.com/gaiacoop/django-huey
-DJANGO_HUEY = {
-    "default": "commands",
-    "queues": {
-        HUEY["name"]: HUEY.copy(),
-        # more registered here at plugin import-time by BaseQueue.register()
-        # Additional huey queues configured via settings
-    },
-}
-
-
-class HueyDBRouter:
-    """
-    A router to store all the Huey result k:v / Huey Monitor models in the queue.sqlite3 database.
-    We keep the databases separate because the queue database receives many more reads/writes per second
-    and we want to avoid single-write lock contention with the main database. Also all the in-progress task
-    data is ephemeral/not-important-long-term. This makes it easier to for the user to clear non-critical
-    temp data by just deleting queue.sqlite3 and leaving index.sqlite3.
-    """
-
-    route_app_labels = {"huey_monitor", "django_huey", "djhuey"}
-    db_name = "queue"
-
-    def db_for_read(self, model, **hints):
-        if model._meta.app_label in self.route_app_labels:
-            return self.db_name
-        return "default"
-
-    def db_for_write(self, model, **hints):
-        if model._meta.app_label in self.route_app_labels:
-            return self.db_name
-        return "default"
-
-    def allow_relation(self, obj1, obj2, **hints):
-        if obj1._meta.app_label in self.route_app_labels or obj2._meta.app_label in self.route_app_labels:
-            return obj1._meta.app_label == obj2._meta.app_label
-        return None
-
-    def allow_migrate(self, db, app_label, model_name=None, **hints):
-        if app_label in self.route_app_labels:
-            return db == self.db_name
-        return db == "default"


 # class FilestoreDBRouter:
@@ -311,7 +244,7 @@ class HueyDBRouter:
 #             return db == self.db_name
 #         return db == "default"

-DATABASE_ROUTERS = ["core.settings.HueyDBRouter"]
+DATABASE_ROUTERS = []

 CACHES = {
    "default": {"BACKEND": "django.core.cache.backends.locmem.LocMemCache"},
--- a/archivebox/core/templatetags/core_tags.py
+++ b/archivebox/core/templatetags/core_tags.py
@@ -1,9 +1,13 @@
 from django import template
 from django.contrib.admin.templatetags.base import InclusionAdminNode
-
+from django.utils.safestring import mark_safe

 from typing import Union

+from archivebox.hooks import (
+    get_extractor_icon, get_extractor_template, get_extractor_name,
+)
+

 register = template.Library()

@@ -44,3 +48,115 @@ def url_replace(context, **kwargs):
    dict_ = context['request'].GET.copy()
    dict_.update(**kwargs)
    return dict_.urlencode()
+
+
+@register.simple_tag
+def extractor_icon(extractor: str) -> str:
+    """
+    Render the icon for an extractor.
+
+    Usage: {% extractor_icon "screenshot" %}
+    """
+    return mark_safe(get_extractor_icon(extractor))
+
+
+@register.simple_tag(takes_context=True)
+def extractor_thumbnail(context, result) -> str:
+    """
+    Render the thumbnail template for an archive result.
+
+    Usage: {% extractor_thumbnail result %}
+
+    Context variables passed to template:
+        - result: ArchiveResult object
+        - snapshot: Parent Snapshot object
+        - output_path: Path to output relative to snapshot dir (from embed_path())
+        - extractor: Extractor base name
+    """
+    extractor = get_extractor_name(result.extractor)
+    template_str = get_extractor_template(extractor, 'thumbnail')
+
+    if not template_str:
+        return ''
+
+    # Use embed_path() for the display path (includes canonical paths)
+    output_path = result.embed_path() if hasattr(result, 'embed_path') else (result.output or '')
+
+    # Create a mini template and render it with context
+    try:
+        tpl = template.Template(template_str)
+        ctx = template.Context({
+            'result': result,
+            'snapshot': result.snapshot,
+            'output_path': output_path,
+            'extractor': extractor,
+        })
+        return mark_safe(tpl.render(ctx))
+    except Exception:
+        return ''
+
+
+@register.simple_tag(takes_context=True)
+def extractor_embed(context, result) -> str:
+    """
+    Render the embed iframe template for an archive result.
+
+    Usage: {% extractor_embed result %}
+    """
+    extractor = get_extractor_name(result.extractor)
+    template_str = get_extractor_template(extractor, 'embed')
+
+    if not template_str:
+        return ''
+
+    output_path = result.embed_path() if hasattr(result, 'embed_path') else (result.output or '')
+
+    try:
+        tpl = template.Template(template_str)
+        ctx = template.Context({
+            'result': result,
+            'snapshot': result.snapshot,
+            'output_path': output_path,
+            'extractor': extractor,
+        })
+        return mark_safe(tpl.render(ctx))
+    except Exception:
+        return ''
+
+
+@register.simple_tag(takes_context=True)
+def extractor_fullscreen(context, result) -> str:
+    """
+    Render the fullscreen template for an archive result.
+
+    Usage: {% extractor_fullscreen result %}
+    """
+    extractor = get_extractor_name(result.extractor)
+    template_str = get_extractor_template(extractor, 'fullscreen')
+
+    if not template_str:
+        return ''
+
+    output_path = result.embed_path() if hasattr(result, 'embed_path') else (result.output or '')
+
+    try:
+        tpl = template.Template(template_str)
+        ctx = template.Context({
+            'result': result,
+            'snapshot': result.snapshot,
+            'output_path': output_path,
+            'extractor': extractor,
+        })
+        return mark_safe(tpl.render(ctx))
+    except Exception:
+        return ''
+
+
+@register.filter
+def extractor_name(value: str) -> str:
+    """
+    Get the base name of an extractor (strips numeric prefix).
+
+    Usage: {{ result.extractor|extractor_name }}
+    """
+    return get_extractor_name(value)
--- a/archivebox/core/urls.py
+++ b/archivebox/core/urls.py
@@ -8,7 +8,7 @@ from django.views.generic.base import RedirectView
 from archivebox.misc.serve_static import serve_static

 from core.admin_site import archivebox_admin
-from core.views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView
+from core.views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView, live_progress_view

 from workers.views import JobsDashboardView

@@ -43,8 +43,10 @@ urlpatterns = [


    path('accounts/', include('django.contrib.auth.urls')),
+
+    path('admin/live-progress/', live_progress_view, name='live_progress'),
    path('admin/', archivebox_admin.urls),
-    
+
    path("api/",      include('api.urls'), name='api'),

    path('health/', HealthCheckView.as_view(), name='healthcheck'),
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -34,6 +34,7 @@ from archivebox.search import query_search_index
 from core.models import Snapshot
 from core.forms import AddLinkForm
 from crawls.models import Seed, Crawl
+from archivebox.hooks import get_extractors, get_extractor_name



@@ -54,8 +55,10 @@ class SnapshotView(View):
    @staticmethod
    def render_live_index(request, snapshot):
        TITLE_LOADING_MSG = 'Not yet archived...'
-        HIDDEN_RESULTS = ('favicon', 'headers', 'title', 'htmltotext', 'warc', 'archive_org')

+        # Dict of extractor -> ArchiveResult object
+        archiveresult_objects = {}
+        # Dict of extractor -> result info dict (for template compatibility)
        archiveresults = {}

        results = snapshot.archiveresult_set.all()
@@ -65,18 +68,21 @@ class SnapshotView(View):
            abs_path = result.snapshot_dir / (embed_path or 'None')

            if (result.status == 'succeeded'
-                and (result.extractor not in HIDDEN_RESULTS)
                and embed_path
                and os.access(abs_path, os.R_OK)
                and abs_path.exists()):
                if os.path.isdir(abs_path) and not any(abs_path.glob('*.*')):
                    continue

+                # Store the full ArchiveResult object for template tags
+                archiveresult_objects[result.extractor] = result
+
                result_info = {
                    'name': result.extractor,
                    'path': embed_path,
                    'ts': ts_to_date_str(result.end_ts),
                    'size': abs_path.stat().st_size or '?',
+                    'result': result,  # Include the full object for template tags
                }
                archiveresults[result.extractor] = result_info

@@ -101,11 +107,11 @@ class SnapshotView(View):
        }


-        # iterate through all the files in the snapshot dir and add the biggest ones to1 the result list
+        # iterate through all the files in the snapshot dir and add the biggest ones to the result list
        snap_dir = Path(snapshot.output_dir)
        if not os.path.isdir(snap_dir) and os.access(snap_dir, os.R_OK):
            return {}
-        
+
        for result_file in (*snap_dir.glob('*'), *snap_dir.glob('*/*')):
            extension = result_file.suffix.lstrip('.').lower()
            if result_file.is_dir() or result_file.name.startswith('.') or extension not in allowed_extensions:
@@ -121,12 +127,16 @@ class SnapshotView(View):
                    'path': result_file.relative_to(snap_dir),
                    'ts': ts_to_date_str(result_file.stat().st_mtime or 0),
                    'size': file_size,
+                    'result': None,  # No ArchiveResult object for filesystem-discovered files
                }

-        preferred_types = ('singlefile', 'screenshot', 'wget', 'dom', 'media', 'pdf', 'readability', 'mercury')
+        # Get available extractors from hooks (sorted by numeric prefix for ordering)
+        # Convert to base names for display ordering
+        all_extractors = [get_extractor_name(e) for e in get_extractors()]
+        preferred_types = tuple(all_extractors)
        all_types = preferred_types + tuple(result_type for result_type in archiveresults.keys() if result_type not in preferred_types)

-        best_result = {'path': 'None'}
+        best_result = {'path': 'None', 'result': None}
        for result_type in preferred_types:
            if result_type in archiveresults:
                best_result = archiveresults[result_type]
@@ -157,6 +167,7 @@ class SnapshotView(View):
            'PREVIEW_ORIGINALS': SERVER_CONFIG.PREVIEW_ORIGINALS,
            'archiveresults': sorted(archiveresults.values(), key=lambda r: all_types.index(r['name']) if r['name'] in all_types else -r['size']),
            'best_result': best_result,
+            'snapshot': snapshot,  # Pass the snapshot object for template tags
        }
        return render(template_name='core/snapshot_live.html', request=request, context=context)

@@ -436,7 +447,7 @@ class AddView(UserPassesTestMixin, FormView):
    def form_valid(self, form):
        urls = form.cleaned_data["url"]
        print(f'[+] Adding URL: {urls}')
-        parser = form.cleaned_data["parser"]
+        parser = form.cleaned_data.get("parser", "auto")  # default to auto-detect parser
        tag = form.cleaned_data["tag"]
        depth = 0 if form.cleaned_data["depth"] == "0" else 1
        extractors = ','.join(form.cleaned_data["archive_methods"])
@@ -452,18 +463,19 @@ class AddView(UserPassesTestMixin, FormView):
        if extractors:
            input_kwargs.update({"extractors": extractors})

-        
+
        from archivebox.config.permissions import HOSTNAME
-    
-    
+
+
        # 1. save the provided urls to sources/2024-11-05__23-59-59__web_ui_add_by_user_<user_pk>.txt
        sources_file = CONSTANTS.SOURCES_DIR / f'{timezone.now().strftime("%Y-%m-%d__%H-%M-%S")}__web_ui_add_by_user_{self.request.user.pk}.txt'
        sources_file.write_text(urls if isinstance(urls, str) else '\n'.join(urls))
-        
+
        # 2. create a new Seed pointing to the sources/2024-11-05__23-59-59__web_ui_add_by_user_<user_pk>.txt
+        timestamp = timezone.now().strftime("%Y-%m-%d__%H-%M-%S")
        seed = Seed.from_file(
            sources_file,
-            label=f'{self.request.user.username}@{HOSTNAME}{self.request.path}',
+            label=f'{self.request.user.username}@{HOSTNAME}{self.request.path} {timestamp}',
            parser=parser,
            tag=tag,
            created_by=self.request.user.pk,
@@ -472,7 +484,7 @@ class AddView(UserPassesTestMixin, FormView):
                # 'INDEX_ONLY': index_only,
                # 'OVERWRITE': False,
                'DEPTH': depth,
-                'EXTRACTORS': parser,
+                'EXTRACTORS': extractors or '',
                # 'DEFAULT_PERSONA': persona or 'Default',
            })
        # 3. create a new Crawl pointing to the Seed
@@ -490,10 +502,15 @@ class AddView(UserPassesTestMixin, FormView):
            self.request,
            mark_safe(f"Adding {rough_url_count} URLs in the background. (refresh in a minute start seeing results) {crawl.admin_change_url}"),
        )
-        # if not bg:
-        #     from workers.orchestrator import Orchestrator
-        #     orchestrator = Orchestrator(exit_on_idle=True, max_concurrent_actors=4)
-        #     orchestrator.start()
+
+        # Start orchestrator in background to process the queued crawl
+        try:
+            from archivebox.workers.tasks import ensure_orchestrator_running
+            ensure_orchestrator_running()
+        except Exception as e:
+            # Orchestrator may already be running via supervisord, or fail to start
+            # This is not fatal - the crawl will be processed when orchestrator runs
+            print(f'[!] Failed to start orchestrator: {e}')

        return redirect(crawl.admin_change_url)

@@ -513,6 +530,141 @@ class HealthCheckView(View):
        )


+import json
+from django.http import JsonResponse
+
+def live_progress_view(request):
+    """Simple JSON endpoint for live progress status - used by admin progress monitor."""
+    try:
+        from workers.orchestrator import Orchestrator
+        from crawls.models import Crawl
+        from core.models import Snapshot, ArchiveResult
+
+        # Get orchestrator status
+        orchestrator_running = Orchestrator.is_running()
+        total_workers = Orchestrator().get_total_worker_count() if orchestrator_running else 0
+
+        # Get model counts by status
+        crawls_pending = Crawl.objects.filter(status=Crawl.StatusChoices.QUEUED).count()
+        crawls_started = Crawl.objects.filter(status=Crawl.StatusChoices.STARTED).count()
+
+        # Get recent crawls (last 24 hours)
+        from datetime import timedelta
+        one_day_ago = timezone.now() - timedelta(days=1)
+        crawls_recent = Crawl.objects.filter(created_at__gte=one_day_ago).count()
+
+        snapshots_pending = Snapshot.objects.filter(status=Snapshot.StatusChoices.QUEUED).count()
+        snapshots_started = Snapshot.objects.filter(status=Snapshot.StatusChoices.STARTED).count()
+
+        archiveresults_pending = ArchiveResult.objects.filter(status=ArchiveResult.StatusChoices.QUEUED).count()
+        archiveresults_started = ArchiveResult.objects.filter(status=ArchiveResult.StatusChoices.STARTED).count()
+        archiveresults_succeeded = ArchiveResult.objects.filter(status=ArchiveResult.StatusChoices.SUCCEEDED).count()
+        archiveresults_failed = ArchiveResult.objects.filter(status=ArchiveResult.StatusChoices.FAILED).count()
+
+        # Build hierarchical active crawls with nested snapshots and archive results
+        active_crawls = []
+        for crawl in Crawl.objects.filter(
+            status__in=[Crawl.StatusChoices.QUEUED, Crawl.StatusChoices.STARTED]
+        ).order_by('-modified_at')[:10]:
+            # Get snapshots for this crawl
+            crawl_snapshots = Snapshot.objects.filter(crawl=crawl)
+            total_snapshots = crawl_snapshots.count()
+            completed_snapshots = crawl_snapshots.filter(status=Snapshot.StatusChoices.SEALED).count()
+            pending_snapshots = crawl_snapshots.filter(status=Snapshot.StatusChoices.QUEUED).count()
+
+            # Calculate crawl progress
+            crawl_progress = int((completed_snapshots / total_snapshots) * 100) if total_snapshots > 0 else 0
+
+            # Get active snapshots for this crawl
+            active_snapshots_for_crawl = []
+            for snapshot in crawl_snapshots.filter(
+                status__in=[Snapshot.StatusChoices.QUEUED, Snapshot.StatusChoices.STARTED]
+            ).order_by('-modified_at')[:5]:
+                # Get archive results for this snapshot
+                snapshot_results = ArchiveResult.objects.filter(snapshot=snapshot)
+                total_extractors = snapshot_results.count()
+                completed_extractors = snapshot_results.filter(status=ArchiveResult.StatusChoices.SUCCEEDED).count()
+                failed_extractors = snapshot_results.filter(status=ArchiveResult.StatusChoices.FAILED).count()
+                pending_extractors = snapshot_results.filter(status=ArchiveResult.StatusChoices.QUEUED).count()
+
+                # Calculate snapshot progress
+                snapshot_progress = int(((completed_extractors + failed_extractors) / total_extractors) * 100) if total_extractors > 0 else 0
+
+                # Get active extractors for this snapshot
+                active_extractors = [
+                    {
+                        'id': str(ar.id),
+                        'extractor': ar.extractor,
+                        'status': ar.status,
+                        'started': ar.start_ts.isoformat() if ar.start_ts else None,
+                        'progress': 50,
+                    }
+                    for ar in snapshot_results.filter(status=ArchiveResult.StatusChoices.STARTED).order_by('-start_ts')[:5]
+                ]
+
+                active_snapshots_for_crawl.append({
+                    'id': str(snapshot.id),
+                    'url': snapshot.url[:80],
+                    'status': snapshot.status,
+                    'started': snapshot.modified_at.isoformat() if snapshot.modified_at else None,
+                    'progress': snapshot_progress,
+                    'total_extractors': total_extractors,
+                    'completed_extractors': completed_extractors,
+                    'failed_extractors': failed_extractors,
+                    'pending_extractors': pending_extractors,
+                    'active_extractors': active_extractors,
+                })
+
+            active_crawls.append({
+                'id': str(crawl.id),
+                'label': str(crawl)[:60],
+                'status': crawl.status,
+                'started': crawl.modified_at.isoformat() if crawl.modified_at else None,
+                'progress': crawl_progress,
+                'max_depth': crawl.max_depth,
+                'total_snapshots': total_snapshots,
+                'completed_snapshots': completed_snapshots,
+                'failed_snapshots': 0,
+                'pending_snapshots': pending_snapshots,
+                'active_snapshots': active_snapshots_for_crawl,
+            })
+
+        return JsonResponse({
+            'orchestrator_running': orchestrator_running,
+            'total_workers': total_workers,
+            'crawls_pending': crawls_pending,
+            'crawls_started': crawls_started,
+            'crawls_recent': crawls_recent,
+            'snapshots_pending': snapshots_pending,
+            'snapshots_started': snapshots_started,
+            'archiveresults_pending': archiveresults_pending,
+            'archiveresults_started': archiveresults_started,
+            'archiveresults_succeeded': archiveresults_succeeded,
+            'archiveresults_failed': archiveresults_failed,
+            'active_crawls': active_crawls,
+            'server_time': timezone.now().isoformat(),
+        })
+    except Exception as e:
+        import traceback
+        return JsonResponse({
+            'error': str(e),
+            'traceback': traceback.format_exc(),
+            'orchestrator_running': False,
+            'total_workers': 0,
+            'crawls_pending': 0,
+            'crawls_started': 0,
+            'crawls_recent': 0,
+            'snapshots_pending': 0,
+            'snapshots_started': 0,
+            'archiveresults_pending': 0,
+            'archiveresults_started': 0,
+            'archiveresults_succeeded': 0,
+            'archiveresults_failed': 0,
+            'active_crawls': [],
+            'server_time': timezone.now().isoformat(),
+        }, status=500)
+
+
 def find_config_section(key: str) -> str:
    CONFIGS = get_all_configs()
    
--- a/archivebox/crawls/admin.py
+++ b/archivebox/crawls/admin.py
@@ -1,10 +1,18 @@
 __package__ = 'archivebox.crawls'

-from django.utils.html import format_html, format_html_join
-from django.contrib import admin
+import json
+from pathlib import Path
+
+from django.utils.html import format_html, format_html_join, mark_safe
+from django.contrib import admin, messages
+from django.urls import path
+from django.http import JsonResponse
+from django.views.decorators.http import require_POST

 from archivebox import DATA_DIR

+from django_object_actions import action
+
 from archivebox.base_models.admin import BaseModelAdmin, ConfigEditorMixin

 from core.models import Snapshot
@@ -16,8 +24,8 @@ class SeedAdmin(ConfigEditorMixin, BaseModelAdmin):
    sort_fields = ('id', 'created_at', 'created_by', 'label', 'notes', 'uri', 'extractor', 'tags_str')
    search_fields = ('id', 'created_by__username', 'label', 'notes', 'uri', 'extractor', 'tags_str')

-    readonly_fields = ('created_at', 'modified_at', 'scheduled_crawls', 'crawls', 'snapshots', 'contents', 'available_config_options')
-    fields = ('label', 'notes', 'uri', 'extractor', 'tags_str', 'config', 'available_config_options', 'created_by', *readonly_fields[:-1])
+    readonly_fields = ('created_at', 'modified_at', 'scheduled_crawls', 'crawls', 'snapshots', 'contents')
+    fields = ('label', 'notes', 'uri', 'extractor', 'tags_str', 'config', 'created_by', *readonly_fields)

    list_filter = ('extractor', 'created_by')
    ordering = ['-created_at']
@@ -34,19 +42,19 @@ class SeedAdmin(ConfigEditorMixin, BaseModelAdmin):
        return format_html_join('<br/>', ' - <a href="{}">{}</a>', (
            (scheduledcrawl.admin_change_url, scheduledcrawl)
            for scheduledcrawl in  obj.scheduled_crawl_set.all().order_by('-created_at')[:20]
-        )) or format_html('<i>No Scheduled Crawls yet...</i>')
+        )) or mark_safe('<i>No Scheduled Crawls yet...</i>')

    def crawls(self, obj):
        return format_html_join('<br/>', ' - <a href="{}">{}</a>', (
            (crawl.admin_change_url, crawl)
            for crawl in obj.crawl_set.all().order_by('-created_at')[:20]
-        )) or format_html('<i>No Crawls yet...</i>')
+        )) or mark_safe('<i>No Crawls yet...</i>')

    def snapshots(self, obj):
        return format_html_join('<br/>', ' - <a href="{}">{}</a>', (
            (snapshot.admin_change_url, snapshot)
            for snapshot in obj.snapshot_set.all().order_by('-created_at')[:20]
-        )) or format_html('<i>No Snapshots yet...</i>')
+        )) or mark_safe('<i>No Snapshots yet...</i>')

    def contents(self, obj):
        if obj.uri.startswith('file:///data/'):
@@ -69,14 +77,81 @@ class CrawlAdmin(ConfigEditorMixin, BaseModelAdmin):
    sort_fields = ('id', 'created_at', 'created_by', 'max_depth', 'label', 'notes', 'seed_str', 'schedule_str', 'status', 'retry_at')
    search_fields = ('id', 'created_by__username', 'max_depth', 'label', 'notes', 'seed_id', 'schedule_id', 'status', 'seed__uri')

-    readonly_fields = ('created_at', 'modified_at', 'snapshots', 'seed_contents', 'available_config_options')
-    fields = ('label', 'notes', 'urls', 'config', 'available_config_options', 'status', 'retry_at', 'max_depth', 'seed', 'schedule', 'created_by', *readonly_fields[:-1])
+    readonly_fields = ('created_at', 'modified_at', 'snapshots', 'seed_urls_editor')
+    fields = ('label', 'notes', 'seed_urls_editor', 'config', 'status', 'retry_at', 'max_depth', 'seed', 'schedule', 'created_by', 'created_at', 'modified_at', 'snapshots')

    list_filter = ('max_depth', 'seed', 'schedule', 'created_by', 'status', 'retry_at')
    ordering = ['-created_at', '-retry_at']
    list_per_page = 100
    actions = ["delete_selected"]
-    
+    change_actions = ['recrawl']
+
+    @action(label='Recrawl', description='Create a new crawl with the same settings')
+    def recrawl(self, request, obj):
+        """Duplicate this crawl as a new crawl with the same seed and settings."""
+        from django.utils import timezone
+
+        new_crawl = Crawl.objects.create(
+            seed=obj.seed,
+            urls=obj.urls,
+            max_depth=obj.max_depth,
+            config=obj.config,
+            schedule=obj.schedule,
+            label=f"{obj.label} (recrawl)" if obj.label else "",
+            notes=obj.notes,
+            created_by=request.user,
+            status=Crawl.StatusChoices.QUEUED,
+            retry_at=timezone.now(),
+        )
+
+        messages.success(
+            request,
+            f'Created new crawl {new_crawl.id} with the same settings. '
+            f'It will start processing shortly.'
+        )
+
+        # Redirect to the new crawl's change page
+        from django.shortcuts import redirect
+        return redirect('admin:crawls_crawl_change', new_crawl.id)
+
+    def get_urls(self):
+        urls = super().get_urls()
+        custom_urls = [
+            path('<path:object_id>/save_seed_contents/',
+                 self.admin_site.admin_view(self.save_seed_contents_view),
+                 name='crawls_crawl_save_seed_contents'),
+        ]
+        return custom_urls + urls
+
+    def save_seed_contents_view(self, request, object_id):
+        """Handle saving seed file contents via AJAX."""
+        if request.method != 'POST':
+            return JsonResponse({'success': False, 'error': 'POST required'}, status=405)
+
+        try:
+            crawl = Crawl.objects.get(pk=object_id)
+        except Crawl.DoesNotExist:
+            return JsonResponse({'success': False, 'error': 'Crawl not found'}, status=404)
+
+        if not (crawl.seed and crawl.seed.uri and crawl.seed.uri.startswith('file:///data/')):
+            return JsonResponse({'success': False, 'error': 'Seed is not a local file'}, status=400)
+
+        try:
+            data = json.loads(request.body)
+            contents = data.get('contents', '')
+        except json.JSONDecodeError:
+            return JsonResponse({'success': False, 'error': 'Invalid JSON'}, status=400)
+
+        source_file = DATA_DIR / crawl.seed.uri.replace('file:///data/', '', 1)
+
+        try:
+            # Ensure parent directory exists
+            source_file.parent.mkdir(parents=True, exist_ok=True)
+            source_file.write_text(contents)
+            return JsonResponse({'success': True, 'message': f'Saved {len(contents)} bytes to {source_file.name}'})
+        except Exception as e:
+            return JsonResponse({'success': False, 'error': str(e)}, status=500)
+
    def num_snapshots(self, obj):
        return obj.snapshot_set.count()

@@ -84,35 +159,175 @@ class CrawlAdmin(ConfigEditorMixin, BaseModelAdmin):
        return format_html_join('<br/>', '<a href="{}">{}</a>', (
            (snapshot.admin_change_url, snapshot)
            for snapshot in obj.snapshot_set.all().order_by('-created_at')[:20]
-        )) or format_html('<i>No Snapshots yet...</i>')
-        
+        )) or mark_safe('<i>No Snapshots yet...</i>')
+
    @admin.display(description='Schedule', ordering='schedule')
    def schedule_str(self, obj):
        if not obj.schedule:
-            return format_html('<i>None</i>')
+            return mark_safe('<i>None</i>')
        return format_html('<a href="{}">{}</a>', obj.schedule.admin_change_url, obj.schedule)
-    
+
    @admin.display(description='Seed', ordering='seed')
    def seed_str(self, obj):
        if not obj.seed:
-            return format_html('<i>None</i>')
+            return mark_safe('<i>None</i>')
        return format_html('<a href="{}">{}</a>', obj.seed.admin_change_url, obj.seed)
-    
-    def seed_contents(self, obj):
-        if not (obj.seed and obj.seed.uri):
-            return format_html('<i>None</i>')
-        
-        if obj.seed.uri.startswith('file:///data/'):
-            source_file = DATA_DIR / obj.seed.uri.replace('file:///data/', '', 1)
-            contents = ""
+
+    @admin.display(description='URLs')
+    def seed_urls_editor(self, obj):
+        """Combined editor showing seed URL and file contents."""
+        widget_id = f'seed_urls_{obj.pk}'
+
+        # Get the seed URI (or use urls field if no seed)
+        seed_uri = ''
+        if obj.seed and obj.seed.uri:
+            seed_uri = obj.seed.uri
+        elif obj.urls:
+            seed_uri = obj.urls
+
+        # Check if it's a local file we can edit
+        is_file = seed_uri.startswith('file:///data/')
+        contents = ""
+        error = None
+        source_file = None
+
+        if is_file:
+            source_file = DATA_DIR / seed_uri.replace('file:///data/', '', 1)
            try:
-                contents = source_file.read_text().strip()[:14_000]
+                contents = source_file.read_text().strip()
            except Exception as e:
-                contents = f'Error reading {source_file}: {e}'
-                
-            return format_html('<b><code>{}</code>:</b><br/><pre>{}</pre>', source_file, contents)
-        
-        return format_html('See URLs here: <a href="{}">{}</a>', obj.seed.uri, obj.seed.uri)
+                error = f'Error reading {source_file}: {e}'
+
+        # Escape for safe HTML embedding
+        escaped_uri = seed_uri.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;')
+        escaped_contents = (contents or '').replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;')
+
+        # Count lines for auto-expand logic
+        line_count = len(contents.split('\n')) if contents else 0
+        uri_rows = min(max(1, seed_uri.count('\n') + 1), 3)
+
+        html = f'''
+        <div id="{widget_id}_container" style="max-width: 900px;">
+            <!-- Seed URL input (auto-expands) -->
+            <div style="margin-bottom: 12px;">
+                <label style="font-weight: bold; display: block; margin-bottom: 4px;">Seed URL:</label>
+                <textarea id="{widget_id}_uri"
+                          style="width: 100%; font-family: monospace; font-size: 13px;
+                                 padding: 8px; border: 1px solid #ccc; border-radius: 4px;
+                                 resize: vertical; min-height: 32px; overflow: hidden;"
+                          rows="{uri_rows}"
+                          placeholder="file:///data/sources/... or https://..."
+                          {"readonly" if not obj.pk else ""}>{escaped_uri}</textarea>
+            </div>
+
+            {"" if not is_file else f'''
+            <!-- File contents editor -->
+            <div style="margin-bottom: 8px;">
+                <label style="font-weight: bold; display: block; margin-bottom: 4px;">
+                    File Contents: <code style="font-weight: normal; color: #666;">{source_file}</code>
+                </label>
+                {"<div style='color: #dc3545; margin-bottom: 8px;'>" + error + "</div>" if error else ""}
+                <textarea id="{widget_id}_contents"
+                          style="width: 100%; height: {min(400, max(150, line_count * 18))}px; font-family: monospace; font-size: 12px;
+                                 padding: 8px; border: 1px solid #ccc; border-radius: 4px; resize: vertical;"
+                          placeholder="Enter URLs, one per line...">{escaped_contents}</textarea>
+            </div>
+
+            <div style="display: flex; gap: 12px; align-items: center; flex-wrap: wrap;">
+                <button type="button" id="{widget_id}_save_btn"
+                        onclick="saveSeedUrls_{widget_id}()"
+                        style="padding: 8px 20px; background: #417690; color: white; border: none;
+                               border-radius: 4px; cursor: pointer; font-weight: bold;">
+                    Save URLs
+                </button>
+                <span id="{widget_id}_line_count" style="color: #666; font-size: 12px;"></span>
+                <span id="{widget_id}_status" style="color: #666; font-size: 12px;"></span>
+            </div>
+            '''}
+
+            {"" if is_file else f'''
+            <div style="margin-top: 8px; color: #666;">
+                <a href="{seed_uri}" target="_blank">{seed_uri}</a>
+            </div>
+            '''}
+
+            <script>
+                (function() {{
+                    var uriInput = document.getElementById('{widget_id}_uri');
+                    var contentsInput = document.getElementById('{widget_id}_contents');
+                    var status = document.getElementById('{widget_id}_status');
+                    var lineCount = document.getElementById('{widget_id}_line_count');
+                    var saveBtn = document.getElementById('{widget_id}_save_btn');
+
+                    // Auto-resize URI input
+                    function autoResizeUri() {{
+                        uriInput.style.height = 'auto';
+                        uriInput.style.height = Math.min(100, uriInput.scrollHeight) + 'px';
+                    }}
+                    uriInput.addEventListener('input', autoResizeUri);
+                    autoResizeUri();
+
+                    if (contentsInput) {{
+                        function updateLineCount() {{
+                            var lines = contentsInput.value.split('\\n').filter(function(l) {{ return l.trim(); }});
+                            lineCount.textContent = lines.length + ' URLs';
+                        }}
+
+                        contentsInput.addEventListener('input', function() {{
+                            updateLineCount();
+                            if (status) {{
+                                status.textContent = '(unsaved changes)';
+                                status.style.color = '#c4820e';
+                            }}
+                        }});
+
+                        updateLineCount();
+                    }}
+
+                    window.saveSeedUrls_{widget_id} = function() {{
+                        if (!saveBtn) return;
+                        saveBtn.disabled = true;
+                        saveBtn.textContent = 'Saving...';
+                        if (status) status.textContent = '';
+
+                        fetch(window.location.pathname + 'save_seed_contents/', {{
+                            method: 'POST',
+                            headers: {{
+                                'Content-Type': 'application/json',
+                                'X-CSRFToken': document.querySelector('[name=csrfmiddlewaretoken]').value
+                            }},
+                            body: JSON.stringify({{ contents: contentsInput ? contentsInput.value : '' }})
+                        }})
+                        .then(function(response) {{ return response.json(); }})
+                        .then(function(data) {{
+                            if (data.success) {{
+                                if (status) {{
+                                    status.textContent = '✓ ' + data.message;
+                                    status.style.color = '#28a745';
+                                }}
+                            }} else {{
+                                if (status) {{
+                                    status.textContent = '✗ ' + data.error;
+                                    status.style.color = '#dc3545';
+                                }}
+                            }}
+                        }})
+                        .catch(function(err) {{
+                            if (status) {{
+                                status.textContent = '✗ Error: ' + err;
+                                status.style.color = '#dc3545';
+                            }}
+                        }})
+                        .finally(function() {{
+                            saveBtn.disabled = false;
+                            saveBtn.textContent = 'Save URLs';
+                        }});
+                    }};
+                }})();
+            </script>
+        </div>
+        '''
+        return mark_safe(html)



@@ -143,14 +358,14 @@ class CrawlScheduleAdmin(BaseModelAdmin):
        return format_html_join('<br/>', ' - <a href="{}">{}</a>', (
            (crawl.admin_change_url, crawl)
            for crawl in obj.crawl_set.all().order_by('-created_at')[:20]
-        )) or format_html('<i>No Crawls yet...</i>')
+        )) or mark_safe('<i>No Crawls yet...</i>')
    
    def snapshots(self, obj):
        crawl_ids = obj.crawl_set.values_list('pk', flat=True)
        return format_html_join('<br/>', ' - <a href="{}">{}</a>', (
            (snapshot.admin_change_url, snapshot)
            for snapshot in Snapshot.objects.filter(crawl_id__in=crawl_ids).order_by('-created_at')[:20]
-        )) or format_html('<i>No Snapshots yet...</i>')
+        )) or mark_safe('<i>No Snapshots yet...</i>')


 def register_admin(admin_site):
--- a/archivebox/hooks.py
+++ b/archivebox/hooks.py
@@ -865,3 +865,189 @@ def export_plugin_config_to_env(
    return env


+# =============================================================================
+# Plugin Template Discovery
+# =============================================================================
+#
+# Plugins can provide custom templates for rendering their output in the UI.
+# Templates are discovered by filename convention inside each plugin's templates/ dir:
+#
+#     archivebox/plugins/<plugin_name>/
+#         templates/
+#             icon.html          # Icon for admin table view (small inline HTML)
+#             thumbnail.html     # Preview thumbnail for snapshot cards
+#             embed.html         # Iframe embed content for main preview
+#             fullscreen.html    # Fullscreen view template
+#
+# Template context variables available:
+#     {{ result }}         - ArchiveResult object
+#     {{ snapshot }}       - Parent Snapshot object
+#     {{ output_path }}    - Path to output file/dir relative to snapshot dir
+#     {{ extractor }}      - Extractor name (e.g., 'screenshot', 'singlefile')
+#
+
+# Default templates used when plugin doesn't provide one
+DEFAULT_TEMPLATES = {
+    'icon': '''<span title="{{ extractor }}">{{ icon }}</span>''',
+    'thumbnail': '''
+        <img src="{{ output_path }}"
+             alt="{{ extractor }} output"
+             style="max-width: 100%; max-height: 100px; object-fit: cover;"
+             onerror="this.style.display='none'">
+    ''',
+    'embed': '''
+        <iframe src="{{ output_path }}"
+                style="width: 100%; height: 100%; border: none;"
+                sandbox="allow-same-origin allow-scripts">
+        </iframe>
+    ''',
+    'fullscreen': '''
+        <iframe src="{{ output_path }}"
+                style="width: 100%; height: 100vh; border: none;"
+                sandbox="allow-same-origin allow-scripts allow-forms">
+        </iframe>
+    ''',
+}
+
+# Default icons for known extractors (emoji or short HTML)
+DEFAULT_EXTRACTOR_ICONS = {
+    'screenshot': '📷',
+    'pdf': '📄',
+    'singlefile': '📦',
+    'dom': '🌐',
+    'wget': '📥',
+    'media': '🎬',
+    'git': '📂',
+    'readability': '📖',
+    'mercury': '☿️',
+    'favicon': '⭐',
+    'title': '📝',
+    'headers': '📋',
+    'archive_org': '🏛️',
+    'htmltotext': '📃',
+    'warc': '🗄️',
+}
+
+
+def get_plugin_template(extractor: str, template_name: str) -> Optional[str]:
+    """
+    Get a plugin template by extractor name and template type.
+
+    Args:
+        extractor: Extractor name (e.g., 'screenshot', '15_singlefile')
+        template_name: One of 'icon', 'thumbnail', 'embed', 'fullscreen'
+
+    Returns:
+        Template content as string, or None if not found.
+    """
+    base_name = get_extractor_name(extractor)
+
+    for base_dir in (BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR):
+        if not base_dir.exists():
+            continue
+
+        # Look for plugin directory matching extractor name
+        for plugin_dir in base_dir.iterdir():
+            if not plugin_dir.is_dir():
+                continue
+
+            # Match by directory name (exact or partial)
+            if plugin_dir.name == base_name or plugin_dir.name.endswith(f'_{base_name}'):
+                template_path = plugin_dir / 'templates' / f'{template_name}.html'
+                if template_path.exists():
+                    return template_path.read_text()
+
+    return None
+
+
+def get_extractor_template(extractor: str, template_name: str) -> str:
+    """
+    Get template for an extractor, falling back to defaults.
+
+    Args:
+        extractor: Extractor name (e.g., 'screenshot', '15_singlefile')
+        template_name: One of 'icon', 'thumbnail', 'embed', 'fullscreen'
+
+    Returns:
+        Template content as string (plugin template or default).
+    """
+    # Try plugin-provided template first
+    template = get_plugin_template(extractor, template_name)
+    if template:
+        return template
+
+    # Fall back to default template
+    return DEFAULT_TEMPLATES.get(template_name, '')
+
+
+def get_extractor_icon(extractor: str) -> str:
+    """
+    Get the icon for an extractor.
+
+    First checks for plugin-provided icon.html template,
+    then falls back to DEFAULT_EXTRACTOR_ICONS.
+
+    Args:
+        extractor: Extractor name (e.g., 'screenshot', '15_singlefile')
+
+    Returns:
+        Icon HTML/emoji string.
+    """
+    base_name = get_extractor_name(extractor)
+
+    # Try plugin-provided icon template
+    icon_template = get_plugin_template(extractor, 'icon')
+    if icon_template:
+        return icon_template.strip()
+
+    # Fall back to default icon
+    return DEFAULT_EXTRACTOR_ICONS.get(base_name, '📁')
+
+
+def get_all_extractor_icons() -> Dict[str, str]:
+    """
+    Get icons for all discovered extractors.
+
+    Returns:
+        Dict mapping extractor base names to their icons.
+    """
+    icons = {}
+    for extractor in get_extractors():
+        base_name = get_extractor_name(extractor)
+        icons[base_name] = get_extractor_icon(extractor)
+    return icons
+
+
+def discover_plugin_templates() -> Dict[str, Dict[str, str]]:
+    """
+    Discover all plugin templates organized by extractor.
+
+    Returns:
+        Dict mapping extractor names to dicts of template_name -> template_path.
+        e.g., {'screenshot': {'icon': '/path/to/icon.html', 'thumbnail': '/path/to/thumbnail.html'}}
+    """
+    templates: Dict[str, Dict[str, str]] = {}
+
+    for base_dir in (BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR):
+        if not base_dir.exists():
+            continue
+
+        for plugin_dir in base_dir.iterdir():
+            if not plugin_dir.is_dir():
+                continue
+
+            templates_dir = plugin_dir / 'templates'
+            if not templates_dir.exists():
+                continue
+
+            plugin_templates = {}
+            for template_file in templates_dir.glob('*.html'):
+                template_name = template_file.stem  # icon, thumbnail, embed, fullscreen
+                plugin_templates[template_name] = str(template_file)
+
+            if plugin_templates:
+                templates[plugin_dir.name] = plugin_templates
+
+    return templates
+
+
--- a/archivebox/machine/admin.py
+++ b/archivebox/machine/admin.py
@@ -3,16 +3,16 @@ __package__ = 'archivebox.machine'
 from django.contrib import admin
 from django.utils.html import format_html

-from archivebox.base_models.admin import BaseModelAdmin
-from machine.models import Machine, NetworkInterface, InstalledBinary
+from archivebox.base_models.admin import BaseModelAdmin, ConfigEditorMixin
+from machine.models import Machine, NetworkInterface, InstalledBinary, Dependency


-class MachineAdmin(BaseModelAdmin):
+class MachineAdmin(ConfigEditorMixin, BaseModelAdmin):
    list_display = ('id', 'created_at', 'hostname', 'ips', 'os_platform', 'hw_in_docker', 'hw_in_vm', 'hw_manufacturer', 'hw_product', 'os_arch', 'os_family', 'os_release', 'hw_uuid', 'health')
    sort_fields = ('id', 'created_at', 'hostname', 'ips', 'os_platform', 'hw_in_docker', 'hw_in_vm', 'hw_manufacturer', 'hw_product', 'os_arch', 'os_family', 'os_release', 'hw_uuid')

    readonly_fields = ('guid', 'created_at', 'modified_at', 'ips')
-    fields = (*readonly_fields, 'hostname', 'hw_in_docker', 'hw_in_vm', 'hw_manufacturer', 'hw_product', 'hw_uuid', 'os_arch', 'os_family', 'os_platform', 'os_kernel', 'os_release', 'stats', 'num_uses_succeeded', 'num_uses_failed')
+    fields = (*readonly_fields, 'hostname', 'hw_in_docker', 'hw_in_vm', 'hw_manufacturer', 'hw_product', 'hw_uuid', 'os_arch', 'os_family', 'os_platform', 'os_kernel', 'os_release', 'stats', 'config', 'num_uses_succeeded', 'num_uses_failed')

    list_filter = ('hw_in_docker', 'hw_in_vm', 'os_arch', 'os_family', 'os_platform')
    ordering = ['-created_at']
@@ -48,15 +48,43 @@ class NetworkInterfaceAdmin(BaseModelAdmin):
        )


+class DependencyAdmin(ConfigEditorMixin, BaseModelAdmin):
+    list_display = ('id', 'created_at', 'bin_name', 'bin_providers', 'is_installed', 'installed_count')
+    sort_fields = ('id', 'created_at', 'bin_name', 'bin_providers')
+    search_fields = ('id', 'bin_name', 'bin_providers')
+
+    readonly_fields = ('id', 'created_at', 'modified_at', 'is_installed', 'installed_count')
+    fields = ('bin_name', 'bin_providers', 'custom_cmds', 'config', *readonly_fields)
+
+    list_filter = ('bin_providers', 'created_at')
+    ordering = ['-created_at']
+    list_per_page = 100
+    actions = ["delete_selected"]
+
+    @admin.display(description='Installed', boolean=True)
+    def is_installed(self, dependency):
+        return dependency.is_installed
+
+    @admin.display(description='# Binaries')
+    def installed_count(self, dependency):
+        count = dependency.installed_binaries.count()
+        if count:
+            return format_html(
+                '<a href="/admin/machine/installedbinary/?dependency__id__exact={}">{}</a>',
+                dependency.id, count,
+            )
+        return '0'
+
+
 class InstalledBinaryAdmin(BaseModelAdmin):
-    list_display = ('id', 'created_at', 'machine_info', 'name', 'binprovider', 'version', 'abspath', 'sha256', 'health')
+    list_display = ('id', 'created_at', 'machine_info', 'name', 'dependency_link', 'binprovider', 'version', 'abspath', 'sha256', 'health')
    sort_fields = ('id', 'created_at', 'machine_info', 'name', 'binprovider', 'version', 'abspath', 'sha256')
-    search_fields = ('id', 'machine__id', 'name', 'binprovider', 'version', 'abspath', 'sha256')
+    search_fields = ('id', 'machine__id', 'name', 'binprovider', 'version', 'abspath', 'sha256', 'dependency__bin_name')

    readonly_fields = ('created_at', 'modified_at')
-    fields = ('machine', 'name', 'binprovider', 'abspath', 'version', 'sha256', *readonly_fields, 'num_uses_succeeded', 'num_uses_failed')
+    fields = ('machine', 'dependency', 'name', 'binprovider', 'abspath', 'version', 'sha256', *readonly_fields, 'num_uses_succeeded', 'num_uses_failed')

-    list_filter = ('name', 'binprovider', 'machine_id')
+    list_filter = ('name', 'binprovider', 'machine_id', 'dependency')
    ordering = ['-created_at']
    list_per_page = 100
    actions = ["delete_selected"]
@@ -68,8 +96,18 @@ class InstalledBinaryAdmin(BaseModelAdmin):
            installed_binary.machine.id, str(installed_binary.machine.id)[:8], installed_binary.machine.hostname,
        )

+    @admin.display(description='Dependency', ordering='dependency__bin_name')
+    def dependency_link(self, installed_binary):
+        if installed_binary.dependency:
+            return format_html(
+                '<a href="/admin/machine/dependency/{}/change">{}</a>',
+                installed_binary.dependency.id, installed_binary.dependency.bin_name,
+            )
+        return '-'
+

 def register_admin(admin_site):
    admin_site.register(Machine, MachineAdmin)
    admin_site.register(NetworkInterface, NetworkInterfaceAdmin)
+    admin_site.register(Dependency, DependencyAdmin)
    admin_site.register(InstalledBinary, InstalledBinaryAdmin)
--- a/archivebox/misc/db.py
+++ b/archivebox/misc/db.py
@@ -37,15 +37,13 @@ def apply_migrations(out_dir: Path = DATA_DIR) -> List[str]:
    """Apply pending Django migrations"""
    from django.core.management import call_command

-    out1, out2 = StringIO(), StringIO()
+    out1 = StringIO()

    call_command("migrate", interactive=False, database='default', stdout=out1)
    out1.seek(0)
-    call_command("migrate", "huey_monitor", interactive=False, database='queue', stdout=out2)
-    out2.seek(0)

    return [
-        line.strip() for line in out1.readlines() + out2.readlines() if line.strip()
+        line.strip() for line in out1.readlines() if line.strip()
    ]


--- a/archivebox/misc/logging_util.py
+++ b/archivebox/misc/logging_util.py
@@ -480,6 +480,138 @@ def printable_filesize(num_bytes: Union[int, float]) -> str:
    return '%3.1f %s' % (num_bytes, 'TB')


+@enforce_types
+def format_duration(seconds: float) -> str:
+    """Format duration in human-readable form."""
+    if seconds < 1:
+        return f'{seconds*1000:.0f}ms'
+    elif seconds < 60:
+        return f'{seconds:.1f}s'
+    elif seconds < 3600:
+        minutes = int(seconds // 60)
+        secs = int(seconds % 60)
+        return f'{minutes}min {secs}s' if secs else f'{minutes}min'
+    else:
+        hours = int(seconds // 3600)
+        minutes = int((seconds % 3600) // 60)
+        return f'{hours}hr {minutes}min' if minutes else f'{hours}hr'
+
+
+@enforce_types
+def truncate_url(url: str, max_length: int = 60) -> str:
+    """Truncate URL to max_length, keeping domain and adding ellipsis."""
+    if len(url) <= max_length:
+        return url
+    # Try to keep the domain and beginning of path
+    if '://' in url:
+        protocol, rest = url.split('://', 1)
+        if '/' in rest:
+            domain, path = rest.split('/', 1)
+            available = max_length - len(protocol) - len(domain) - 6  # for "://", "/", "..."
+            if available > 10:
+                return f'{protocol}://{domain}/{path[:available]}...'
+    # Fallback: just truncate
+    return url[:max_length-3] + '...'
+
+
+@enforce_types
+def log_worker_event(
+    worker_type: str,
+    event: str,
+    indent_level: int = 0,
+    pid: Optional[int] = None,
+    worker_id: Optional[str] = None,
+    url: Optional[str] = None,
+    extractor: Optional[str] = None,
+    metadata: Optional[Dict[str, Any]] = None,
+    error: Optional[Exception] = None,
+) -> None:
+    """
+    Log a worker event with structured metadata and indentation.
+
+    Args:
+        worker_type: Type of worker (Orchestrator, CrawlWorker, SnapshotWorker, etc.)
+        event: Event name (Starting, Completed, Failed, etc.)
+        indent_level: Indentation level (0=Orchestrator, 1=CrawlWorker, 2=SnapshotWorker, 3=ArchiveResultWorker)
+        pid: Process ID
+        worker_id: Worker ID (UUID for CrawlWorker, url for SnapshotWorker, extractor for ArchiveResultWorker)
+        url: URL being processed (for SnapshotWorker/ArchiveResultWorker)
+        extractor: Extractor name (for ArchiveResultWorker)
+        metadata: Dict of metadata to show in curly braces
+        error: Exception if event is an error
+    """
+    indent = '    ' * indent_level
+
+    # Build worker identifier
+    worker_parts = [worker_type]
+    if pid:
+        worker_parts.append(f'pid={pid}')
+    if worker_id and worker_type in ('CrawlWorker', 'Orchestrator'):
+        worker_parts.append(f'id={worker_id}')
+    if url and worker_type == 'SnapshotWorker':
+        worker_parts.append(f'url={truncate_url(url)}')
+    if extractor and worker_type == 'ArchiveResultWorker':
+        worker_parts.append(f'extractor={extractor}')
+
+    worker_label = f'{worker_parts[0]}[{", ".join(worker_parts[1:])}]'
+
+    # Build metadata string
+    metadata_str = ''
+    if metadata:
+        # Format metadata nicely
+        meta_parts = []
+        for k, v in metadata.items():
+            if isinstance(v, float):
+                # Format floats nicely (durations, sizes)
+                if 'duration' in k.lower():
+                    meta_parts.append(f'{k}: {format_duration(v)}')
+                elif 'size' in k.lower():
+                    meta_parts.append(f'{k}: {printable_filesize(int(v))}')
+                else:
+                    meta_parts.append(f'{k}: {v:.2f}')
+            elif isinstance(v, int):
+                # Format integers - check if it's a size
+                if 'size' in k.lower() or 'bytes' in k.lower():
+                    meta_parts.append(f'{k}: {printable_filesize(v)}')
+                else:
+                    meta_parts.append(f'{k}: {v}')
+            elif isinstance(v, (list, tuple)):
+                meta_parts.append(f'{k}: {len(v)}')
+            else:
+                meta_parts.append(f'{k}: {v}')
+        metadata_str = ' {' + ', '.join(meta_parts) + '}'
+
+    # Determine color based on event
+    color = 'white'
+    if event in ('Starting...', 'Started', 'STARTED', 'Started in background'):
+        color = 'green'
+    elif event in ('Processing...', 'PROCESSING'):
+        color = 'blue'
+    elif event in ('Completed', 'COMPLETED', 'All work complete'):
+        color = 'blue'
+    elif event in ('Failed', 'ERROR', 'Failed to spawn worker'):
+        color = 'red'
+    elif event in ('Shutting down', 'SHUTDOWN'):
+        color = 'grey53'
+
+    # Build final message
+    error_str = f' {type(error).__name__}: {error}' if error else ''
+    # Build colored message - worker_label needs to be inside color tags
+    # But first we need to format the color tags separately from the worker label
+    from archivebox.misc.logging import CONSOLE
+    from rich.text import Text
+
+    # Create a Rich Text object for proper formatting
+    text = Text()
+    text.append(indent)  # Indentation
+    # Append worker label and event with color
+    text.append(f'{worker_label} {event}{error_str}', style=color)
+    # Append metadata without color
+    text.append(metadata_str)
+
+    CONSOLE.print(text)
+
+
@enforce_types
 def printable_folders(folders: Dict[str, Optional["Snapshot"]], with_headers: bool=False) -> str:
    return '\n'.join(
--- a/archivebox/plugins/archive_org/templates/icon.html
+++ b/archivebox/plugins/archive_org/templates/icon.html
@@ -0,0 +1 @@
+🏛️
--- a/archivebox/plugins/canonical_outputs/on_Snapshot__92_canonical_outputs.py
+++ b/archivebox/plugins/canonical_outputs/on_Snapshot__92_canonical_outputs.py
@@ -7,7 +7,7 @@ new plugin-based output structure to the legacy canonical output paths that
 ArchiveBox has historically used. This maintains backward compatibility with
 existing tools and scripts that expect outputs at specific locations.

-Canonical output paths (from Snapshot.canonical_outputs()):
+Canonical output paths:
    - favicon.ico → favicon/favicon.ico
    - singlefile.html → singlefile/singlefile.html
    - readability/content.html → readability/content.html
@@ -27,27 +27,20 @@ New plugin outputs:
    - redirects.json → redirects/redirects.json
    - console.jsonl → consolelog/console.jsonl

-Usage: on_Snapshot__91_canonical_outputs.py --url=<url> --snapshot-id=<uuid>
+Usage: on_Snapshot__92_canonical_outputs.py --url=<url> --snapshot-id=<uuid>

 Environment variables:
    SAVE_CANONICAL_SYMLINKS: Enable canonical symlinks (default: true)
+    DATA_DIR: ArchiveBox data directory
+    ARCHIVE_DIR: Archive output directory
 """

-__package__ = 'archivebox.plugins.canonical_outputs'
-
 import os
 import sys
+import json
 from pathlib import Path
-from typing import Dict, Optional
-
-# Configure Django if running standalone
-if __name__ == '__main__':
-    parent_dir = str(Path(__file__).resolve().parent.parent.parent)
-    if parent_dir not in sys.path:
-        sys.path.insert(0, parent_dir)
-    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'archivebox.core.settings')
-    import django
-    django.setup()
+from datetime import datetime, timezone
+from typing import Dict

 import rich_click as click

@@ -150,10 +143,7 @@ def create_canonical_symlinks(snapshot_dir: Path) -> Dict[str, bool]:
@click.option('--snapshot-id', required=True, help='Snapshot UUID')
 def main(url: str, snapshot_id: str):
    """Create symlinks from plugin outputs to canonical legacy locations."""
-    from datetime import datetime
-    from archivebox.core.models import Snapshot
-
-    start_ts = datetime.now()
+    start_ts = datetime.now(timezone.utc)
    status = 'failed'
    output = None
    error = ''
@@ -161,31 +151,20 @@ def main(url: str, snapshot_id: str):

    try:
        # Check if enabled
-        from archivebox.config import CONSTANTS
        save_canonical = os.getenv('SAVE_CANONICAL_SYMLINKS', 'true').lower() in ('true', '1', 'yes', 'on')

        if not save_canonical:
-            click.echo('Skipping canonical symlinks (SAVE_CANONICAL_SYMLINKS=False)')
            status = 'skipped'
-            end_ts = datetime.now()
-            click.echo(f'START_TS={start_ts.isoformat()}')
-            click.echo(f'END_TS={end_ts.isoformat()}')
-            click.echo(f'STATUS={status}')
-            click.echo(f'RESULT_JSON={{"extractor": "canonical_outputs", "status": "{status}", "url": "{url}", "snapshot_id": "{snapshot_id}"}}')
+            click.echo(json.dumps({'status': status, 'output': 'SAVE_CANONICAL_SYMLINKS=false'}))
            sys.exit(0)

-        # Get snapshot
-        try:
-            snapshot = Snapshot.objects.get(id=snapshot_id)
-        except Snapshot.DoesNotExist:
-            error = f'Snapshot {snapshot_id} not found'
-            raise ValueError(error)
+        # Working directory is the extractor output dir (e.g., <snapshot>/canonical_outputs/)
+        # Parent is the snapshot directory
+        output_dir = Path.cwd()
+        snapshot_dir = output_dir.parent

-        # Get snapshot directory
-        snapshot_dir = Path(snapshot.output_dir)
        if not snapshot_dir.exists():
-            error = f'Snapshot directory not found: {snapshot_dir}'
-            raise FileNotFoundError(error)
+            raise FileNotFoundError(f'Snapshot directory not found: {snapshot_dir}')

        # Create canonical symlinks
        results = create_canonical_symlinks(snapshot_dir)
@@ -203,37 +182,18 @@ def main(url: str, snapshot_id: str):
        status = 'failed'
        click.echo(f'Error: {error}', err=True)

-    end_ts = datetime.now()
-    duration = (end_ts - start_ts).total_seconds()
+    end_ts = datetime.now(timezone.utc)

-    # Print results
-    click.echo(f'START_TS={start_ts.isoformat()}')
-    click.echo(f'END_TS={end_ts.isoformat()}')
-    click.echo(f'DURATION={duration:.2f}')
-    if output:
-        click.echo(f'OUTPUT={output}')
-    click.echo(f'STATUS={status}')
-
-    if error:
-        click.echo(f'ERROR={error}', err=True)
-
-    # Print JSON result
-    import json
-    result_json = {
-        'extractor': 'canonical_outputs',
-        'url': url,
-        'snapshot_id': snapshot_id,
+    # Print JSON result for hook runner
+    result = {
        'status': status,
-        'start_ts': start_ts.isoformat(),
-        'end_ts': end_ts.isoformat(),
-        'duration': round(duration, 2),
        'output': output,
-        'symlinks_created': symlinks_created,
        'error': error or None,
+        'symlinks_created': symlinks_created,
    }
-    click.echo(f'RESULT_JSON={json.dumps(result_json)}')
+    click.echo(json.dumps(result))

-    sys.exit(0 if status == 'succeeded' else 1)
+    sys.exit(0 if status in ('succeeded', 'skipped') else 1)


 if __name__ == '__main__':
--- a/archivebox/plugins/chrome_session/on_Crawl__00_install_chrome.py
+++ b/archivebox/plugins/chrome_session/on_Crawl__00_install_chrome.py
@@ -1,149 +0,0 @@
-#!/usr/bin/env python3
-"""
-Install Chrome/Chromium if not already available.
-
-Runs at crawl start to ensure Chrome is installed.
-Uses playwright to install chromium if no system Chrome found.
-Outputs JSONL for InstalledBinary.
-"""
-
-import json
-import sys
-import os
-import shutil
-from pathlib import Path
-
-
-def find_chrome():
-    """Try to find system Chrome/Chromium."""
-    # Comprehensive list of Chrome/Chromium binary names and paths
-    chromium_names_linux = [
-        'chromium',
-        'chromium-browser',
-        'chromium-browser-beta',
-        'chromium-browser-unstable',
-        'chromium-browser-canary',
-        'chromium-browser-dev',
-    ]
-
-    chrome_names_linux = [
-        'google-chrome',
-        'google-chrome-stable',
-        'google-chrome-beta',
-        'google-chrome-canary',
-        'google-chrome-unstable',
-        'google-chrome-dev',
-        'chrome',
-    ]
-
-    chrome_paths_macos = [
-        '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
-        '/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary',
-        '/Applications/Chromium.app/Contents/MacOS/Chromium',
-    ]
-
-    chrome_paths_linux = [
-        '/usr/bin/google-chrome',
-        '/usr/bin/google-chrome-stable',
-        '/usr/bin/chromium',
-        '/usr/bin/chromium-browser',
-        '/snap/bin/chromium',
-        '/opt/google/chrome/chrome',
-    ]
-
-    all_chrome_names = chrome_names_linux + chromium_names_linux
-    all_chrome_paths = chrome_paths_macos + chrome_paths_linux
-
-    # Check env var first
-    env_path = os.environ.get('CHROME_BINARY', '')
-    if env_path and Path(env_path).is_file():
-        return env_path
-
-    # Try shutil.which for various names
-    for name in all_chrome_names:
-        abspath = shutil.which(name)
-        if abspath:
-            return abspath
-
-    # Check common paths
-    for path in all_chrome_paths:
-        if Path(path).is_file():
-            return path
-
-    return None
-
-
-def main():
-    try:
-        # First try to find system Chrome
-        system_chrome = find_chrome()
-        if system_chrome:
-            print(json.dumps({
-                'type': 'InstalledBinary',
-                'name': 'chrome',
-                'abspath': str(system_chrome),
-                'version': None,
-                'sha256': None,
-                'binprovider': 'env',
-            }))
-            sys.exit(0)
-
-        # If not found in system, try to install chromium via apt/brew
-        from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides
-
-        AptProvider.model_rebuild()
-        BrewProvider.model_rebuild()
-        EnvProvider.model_rebuild()
-
-        # Try chromium-browser or chromium via system package managers
-        for binary_name in ['chromium', 'chromium-browser', 'google-chrome']:
-            try:
-                chrome_binary = Binary(
-                    name=binary_name,
-                    binproviders=[AptProvider(), BrewProvider(), EnvProvider()]
-                )
-
-                # Try to load, install if not found
-                try:
-                    loaded = chrome_binary.load()
-                    if not loaded or not loaded.abspath:
-                        raise Exception("Not loaded")
-                except Exception:
-                    # Install via system package manager
-                    loaded = chrome_binary.install()
-
-                if loaded and loaded.abspath:
-                    # Output InstalledBinary JSONL
-                    print(json.dumps({
-                        'type': 'InstalledBinary',
-                        'name': 'chrome',
-                        'abspath': str(loaded.abspath),
-                        'version': str(loaded.version) if loaded.version else None,
-                        'sha256': loaded.sha256,
-                        'binprovider': loaded.loaded_binprovider.name if loaded.loaded_binprovider else 'unknown',
-                    }))
-                    sys.exit(0)
-            except Exception:
-                continue
-
-        # If all attempts failed
-        print(json.dumps({
-            'type': 'Dependency',
-            'bin_name': 'chrome',
-            'bin_providers': 'apt,brew,env',
-        }))
-        print("Failed to install Chrome/Chromium", file=sys.stderr)
-        sys.exit(1)
-
-    except Exception as e:
-        print(json.dumps({
-            'type': 'Dependency',
-            'bin_name': 'chrome',
-            'bin_providers': 'apt,brew,env',
-        }))
-        print(f"Error installing Chrome: {e}", file=sys.stderr)
-        sys.exit(1)
-
-
-if __name__ == '__main__':
-    main()
--- a/archivebox/plugins/chrome_session/tests/test_chrome_session.py
+++ b/archivebox/plugins/chrome_session/tests/test_chrome_session.py
@@ -2,7 +2,7 @@
 Integration tests for chrome_session plugin

 Tests verify:
-1. Install hook finds system Chrome or installs chromium
+1. Validate hook checks for Chrome/Chromium binary
 2. Verify deps with abx-pkg
 3. Chrome session script exists
 """
@@ -14,7 +14,7 @@ from pathlib import Path
 import pytest

 PLUGIN_DIR = Path(__file__).parent.parent
-CHROME_INSTALL_HOOK = PLUGIN_DIR / 'on_Crawl__00_install_chrome.py'
+CHROME_VALIDATE_HOOK = PLUGIN_DIR / 'on_Crawl__00_validate_chrome.py'
 CHROME_SESSION_HOOK = PLUGIN_DIR / 'on_Snapshot__20_chrome_session.js'


@@ -23,37 +23,50 @@ def test_hook_script_exists():
    assert CHROME_SESSION_HOOK.exists(), f"Hook not found: {CHROME_SESSION_HOOK}"


-def test_chrome_install_hook():
-    """Test chrome install hook to find or install Chrome/Chromium."""
+def test_chrome_validate_hook():
+    """Test chrome validate hook checks for Chrome/Chromium binary."""
    result = subprocess.run(
-        [sys.executable, str(CHROME_INSTALL_HOOK)],
+        [sys.executable, str(CHROME_VALIDATE_HOOK)],
        capture_output=True,
        text=True,
-        timeout=600
+        timeout=30
    )

-    assert result.returncode == 0, f"Install hook failed: {result.stderr}"
-
-    # Verify InstalledBinary JSONL output
-    found_binary = False
-    for line in result.stdout.strip().split('\n'):
-        if line.strip():
-            try:
-                record = json.loads(line)
-                if record.get('type') == 'InstalledBinary':
-                    assert record['name'] == 'chrome'
-                    assert record['abspath']
-                    assert Path(record['abspath']).exists(), f"Chrome binary should exist at {record['abspath']}"
-                    found_binary = True
-                    break
-            except json.JSONDecodeError:
-                pass
-
-    assert found_binary, "Should output InstalledBinary record"
+    # Hook exits 0 if binary found, 1 if not found (with Dependency record)
+    if result.returncode == 0:
+        # Binary found - verify InstalledBinary JSONL output
+        found_binary = False
+        for line in result.stdout.strip().split('\n'):
+            if line.strip():
+                try:
+                    record = json.loads(line)
+                    if record.get('type') == 'InstalledBinary':
+                        assert record['name'] == 'chrome'
+                        assert record['abspath']
+                        assert Path(record['abspath']).exists(), f"Chrome binary should exist at {record['abspath']}"
+                        found_binary = True
+                        break
+                except json.JSONDecodeError:
+                    pass
+        assert found_binary, "Should output InstalledBinary record when binary found"
+    else:
+        # Binary not found - verify Dependency JSONL output
+        found_dependency = False
+        for line in result.stdout.strip().split('\n'):
+            if line.strip():
+                try:
+                    record = json.loads(line)
+                    if record.get('type') == 'Dependency':
+                        assert record['bin_name'] == 'chrome'
+                        found_dependency = True
+                        break
+                except json.JSONDecodeError:
+                    pass
+        assert found_dependency, "Should output Dependency record when binary not found"


 def test_verify_deps_with_abx_pkg():
-    """Verify chrome is available via abx-pkg after hook installation."""
+    """Verify chrome is available via abx-pkg."""
    from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides

    AptProvider.model_rebuild()
@@ -75,10 +88,10 @@ def test_verify_deps_with_abx_pkg():
        except Exception:
            continue

-    # If we get here, chrome should still be available from system
+    # If we get here, chrome not available
    import shutil
-    assert shutil.which('chromium') or shutil.which('chrome') or shutil.which('google-chrome'), \
-        "Chrome should be available after install hook"
+    if not (shutil.which('chromium') or shutil.which('chrome') or shutil.which('google-chrome')):
+        pytest.skip("Chrome/Chromium not available - Dependency record should have been emitted")


 if __name__ == '__main__':
--- a/archivebox/plugins/dom/templates/embed.html
+++ b/archivebox/plugins/dom/templates/embed.html
@@ -0,0 +1,6 @@
+<!-- DOM embed - full iframe of captured DOM HTML -->
+<iframe src="{{ output_path }}"
+        class="extractor-embed dom-embed"
+        style="width: 100%; height: 100%; min-height: 500px; border: none;"
+        sandbox="allow-same-origin allow-scripts allow-forms">
+</iframe>
--- a/archivebox/plugins/dom/templates/fullscreen.html
+++ b/archivebox/plugins/dom/templates/fullscreen.html
@@ -0,0 +1,6 @@
+<!-- DOM fullscreen - full page iframe -->
+<iframe src="{{ output_path }}"
+        class="extractor-fullscreen dom-fullscreen"
+        style="width: 100%; height: 100vh; border: none;"
+        sandbox="allow-same-origin allow-scripts allow-forms allow-top-navigation-by-user-activation">
+</iframe>
--- a/archivebox/plugins/dom/templates/icon.html
+++ b/archivebox/plugins/dom/templates/icon.html
@@ -0,0 +1 @@
+🌐
--- a/archivebox/plugins/dom/templates/thumbnail.html
+++ b/archivebox/plugins/dom/templates/thumbnail.html
@@ -0,0 +1,8 @@
+<!-- DOM thumbnail - scaled down iframe preview of captured DOM HTML -->
+<div class="extractor-thumbnail dom-thumbnail" style="width: 100%; height: 100px; overflow: hidden; background: #fff;">
+    <iframe src="{{ output_path }}"
+            style="width: 400%; height: 400px; transform: scale(0.25); transform-origin: top left; pointer-events: none; border: none;"
+            loading="lazy"
+            sandbox="allow-same-origin">
+    </iframe>
+</div>
--- a/archivebox/plugins/favicon/templates/icon.html
+++ b/archivebox/plugins/favicon/templates/icon.html
@@ -0,0 +1 @@
+⭐
--- a/archivebox/plugins/git/on_Crawl__00_install_git.py
+++ b/archivebox/plugins/git/on_Crawl__00_install_git.py
@@ -1,68 +0,0 @@
-#!/usr/bin/env python3
-"""
-Install git if not already available.
-
-Runs at crawl start to ensure git is installed.
-Outputs JSONL for InstalledBinary.
-"""
-
-import json
-import sys
-from pathlib import Path
-
-
-def main():
-    try:
-        from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides
-
-        AptProvider.model_rebuild()
-        BrewProvider.model_rebuild()
-        EnvProvider.model_rebuild()
-
-        # git binary and package have same name
-        git_binary = Binary(
-            name='git',
-            binproviders=[AptProvider(), BrewProvider(), EnvProvider()]
-        )
-
-        # Try to load, install if not found
-        try:
-            loaded = git_binary.load()
-            if not loaded or not loaded.abspath:
-                raise Exception("Not loaded")
-        except Exception:
-            # Install via system package manager
-            loaded = git_binary.install()
-
-        if loaded and loaded.abspath:
-            # Output InstalledBinary JSONL
-            print(json.dumps({
-                'type': 'InstalledBinary',
-                'name': 'git',
-                'abspath': str(loaded.abspath),
-                'version': str(loaded.version) if loaded.version else None,
-                'sha256': loaded.sha256,
-                'binprovider': loaded.loaded_binprovider.name if loaded.loaded_binprovider else 'unknown',
-            }))
-            sys.exit(0)
-        else:
-            print(json.dumps({
-                'type': 'Dependency',
-                'bin_name': 'git',
-                'bin_providers': 'apt,brew,env',
-            }))
-            print("Failed to install git", file=sys.stderr)
-            sys.exit(1)
-
-    except Exception as e:
-        print(json.dumps({
-            'type': 'Dependency',
-            'bin_name': 'git',
-            'bin_providers': 'apt,brew,env',
-        }))
-        print(f"Error installing git: {e}", file=sys.stderr)
-        sys.exit(1)
-
-
-if __name__ == '__main__':
-    main()
--- a/archivebox/plugins/git/templates/embed.html
+++ b/archivebox/plugins/git/templates/embed.html
@@ -0,0 +1,6 @@
+<!-- Git embed - directory listing of cloned repo -->
+<iframe src="{{ output_path }}"
+        class="extractor-embed git-embed"
+        style="width: 100%; height: 100%; min-height: 400px; border: none; background: #fff;"
+        sandbox="allow-same-origin">
+</iframe>
--- a/archivebox/plugins/git/templates/fullscreen.html
+++ b/archivebox/plugins/git/templates/fullscreen.html
@@ -0,0 +1,6 @@
+<!-- Git fullscreen - full directory listing -->
+<iframe src="{{ output_path }}"
+        class="extractor-fullscreen git-fullscreen"
+        style="width: 100%; height: 100vh; border: none; background: #fff;"
+        sandbox="allow-same-origin">
+</iframe>
--- a/archivebox/plugins/git/templates/icon.html
+++ b/archivebox/plugins/git/templates/icon.html
@@ -0,0 +1 @@
+📂
--- a/archivebox/plugins/git/templates/thumbnail.html
+++ b/archivebox/plugins/git/templates/thumbnail.html
@@ -0,0 +1,5 @@
+<!-- Git thumbnail - shows git repository icon and info -->
+<div class="extractor-thumbnail git-thumbnail" style="width: 100%; height: 100px; overflow: hidden; background: #f6f8fa; display: flex; flex-direction: column; align-items: center; justify-content: center; padding: 10px;">
+    <span style="font-size: 32px;">📂</span>
+    <span style="font-size: 11px; color: #586069; margin-top: 4px;">Git Repository</span>
+</div>
--- a/archivebox/plugins/git/tests/test_git.py
+++ b/archivebox/plugins/git/tests/test_git.py
@@ -2,7 +2,7 @@
 Integration tests for git plugin

 Tests verify:
-1. Install hook installs git via abx-pkg
+1. Validate hook checks for git binary
 2. Verify deps with abx-pkg
 3. Standalone git extractor execution
 """
@@ -17,50 +17,64 @@ import pytest

 PLUGIN_DIR = Path(__file__).parent.parent
 GIT_HOOK = PLUGIN_DIR / 'on_Snapshot__12_git.py'
-GIT_INSTALL_HOOK = PLUGIN_DIR / 'on_Crawl__00_install_git.py'
+GIT_VALIDATE_HOOK = PLUGIN_DIR / 'on_Crawl__00_validate_git.py'
 TEST_URL = 'https://github.com/example/repo.git'

 def test_hook_script_exists():
    assert GIT_HOOK.exists()

-def test_git_install_hook():
-    """Test git install hook to install git if needed."""
+def test_git_validate_hook():
+    """Test git validate hook checks for git binary."""
    result = subprocess.run(
-        [sys.executable, str(GIT_INSTALL_HOOK)],
+        [sys.executable, str(GIT_VALIDATE_HOOK)],
        capture_output=True,
        text=True,
-        timeout=600
+        timeout=30
    )

-    assert result.returncode == 0, f"Install hook failed: {result.stderr}"
-
-    # Verify InstalledBinary JSONL output
-    found_binary = False
-    for line in result.stdout.strip().split('\n'):
-        if line.strip():
-            try:
-                record = json.loads(line)
-                if record.get('type') == 'InstalledBinary':
-                    assert record['name'] == 'git'
-                    assert record['abspath']
-                    found_binary = True
-                    break
-            except json.JSONDecodeError:
-                pass
-
-    assert found_binary, "Should output InstalledBinary record"
+    # Hook exits 0 if binary found, 1 if not found (with Dependency record)
+    if result.returncode == 0:
+        # Binary found - verify InstalledBinary JSONL output
+        found_binary = False
+        for line in result.stdout.strip().split('\n'):
+            if line.strip():
+                try:
+                    record = json.loads(line)
+                    if record.get('type') == 'InstalledBinary':
+                        assert record['name'] == 'git'
+                        assert record['abspath']
+                        found_binary = True
+                        break
+                except json.JSONDecodeError:
+                    pass
+        assert found_binary, "Should output InstalledBinary record when binary found"
+    else:
+        # Binary not found - verify Dependency JSONL output
+        found_dependency = False
+        for line in result.stdout.strip().split('\n'):
+            if line.strip():
+                try:
+                    record = json.loads(line)
+                    if record.get('type') == 'Dependency':
+                        assert record['bin_name'] == 'git'
+                        assert 'env' in record['bin_providers']
+                        found_dependency = True
+                        break
+                except json.JSONDecodeError:
+                    pass
+        assert found_dependency, "Should output Dependency record when binary not found"

 def test_verify_deps_with_abx_pkg():
-    """Verify git is available via abx-pkg after hook installation."""
-    from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider
-
-    AptProvider.model_rebuild()
-    BrewProvider.model_rebuild()
-    EnvProvider.model_rebuild()
+    """Verify git is available via abx-pkg."""
+    from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides

    git_binary = Binary(name='git', binproviders=[AptProvider(), BrewProvider(), EnvProvider()])
    git_loaded = git_binary.load()
-    assert git_loaded and git_loaded.abspath, "git should be available after install hook"
+
+    if git_loaded and git_loaded.abspath:
+        assert True, "git is available"
+    else:
+        pytest.skip("git not available - Dependency record should have been emitted")

 def test_reports_missing_git():
    with tempfile.TemporaryDirectory() as tmpdir:
--- a/archivebox/plugins/headers/templates/icon.html
+++ b/archivebox/plugins/headers/templates/icon.html
@@ -0,0 +1 @@
+📋
--- a/archivebox/plugins/htmltotext/templates/icon.html
+++ b/archivebox/plugins/htmltotext/templates/icon.html
@@ -0,0 +1 @@
+📃
--- a/archivebox/plugins/media/on_Crawl__00_install_ytdlp.py
+++ b/archivebox/plugins/media/on_Crawl__00_install_ytdlp.py
@@ -1,67 +0,0 @@
-#!/usr/bin/env python3
-"""
-Install yt-dlp if not already available.
-
-Runs at crawl start to ensure yt-dlp is installed.
-Outputs JSONL for InstalledBinary.
-"""
-
-import json
-import sys
-from pathlib import Path
-
-
-def main():
-    try:
-        from abx_pkg import Binary, PipProvider, EnvProvider, BinProviderOverrides
-
-        PipProvider.model_rebuild()
-        EnvProvider.model_rebuild()
-
-        # yt-dlp binary and package have same name
-        ytdlp_binary = Binary(
-            name='yt-dlp',
-            binproviders=[PipProvider(), EnvProvider()]
-        )
-
-        # Try to load, install if not found
-        try:
-            loaded = ytdlp_binary.load()
-            if not loaded or not loaded.abspath:
-                raise Exception("Not loaded")
-        except Exception:
-            # Install via pip
-            loaded = ytdlp_binary.install()
-
-        if loaded and loaded.abspath:
-            # Output InstalledBinary JSONL
-            print(json.dumps({
-                'type': 'InstalledBinary',
-                'name': 'yt-dlp',
-                'abspath': str(loaded.abspath),
-                'version': str(loaded.version) if loaded.version else None,
-                'sha256': loaded.sha256,
-                'binprovider': loaded.loaded_binprovider.name if loaded.loaded_binprovider else 'unknown',
-            }))
-            sys.exit(0)
-        else:
-            print(json.dumps({
-                'type': 'Dependency',
-                'bin_name': 'yt-dlp',
-                'bin_providers': 'pip,brew,env',
-            }))
-            print("Failed to install yt-dlp", file=sys.stderr)
-            sys.exit(1)
-
-    except Exception as e:
-        print(json.dumps({
-            'type': 'Dependency',
-            'bin_name': 'yt-dlp',
-            'bin_providers': 'pip,brew,env',
-        }))
-        print(f"Error installing yt-dlp: {e}", file=sys.stderr)
-        sys.exit(1)
-
-
-if __name__ == '__main__':
-    main()
--- a/archivebox/plugins/media/on_Crawl__00_validate_ytdlp.py
+++ b/archivebox/plugins/media/on_Crawl__00_validate_ytdlp.py
@@ -0,0 +1,278 @@
+#!/usr/bin/env python3
+"""
+Validation hook for yt-dlp and its dependencies (node, ffmpeg).
+
+Runs at crawl start to verify yt-dlp and required binaries are available.
+Outputs JSONL for InstalledBinary and Machine config updates.
+"""
+
+import os
+import sys
+import json
+import shutil
+import hashlib
+import subprocess
+from pathlib import Path
+
+
+def get_binary_version(abspath: str, version_flag: str = '--version') -> str | None:
+    """Get version string from binary."""
+    try:
+        result = subprocess.run(
+            [abspath, version_flag],
+            capture_output=True,
+            text=True,
+            timeout=5,
+        )
+        if result.returncode == 0 and result.stdout:
+            first_line = result.stdout.strip().split('\n')[0]
+            return first_line[:64]
+    except Exception:
+        pass
+    return None
+
+
+def get_binary_hash(abspath: str) -> str | None:
+    """Get SHA256 hash of binary."""
+    try:
+        with open(abspath, 'rb') as f:
+            return hashlib.sha256(f.read()).hexdigest()
+    except Exception:
+        return None
+
+
+def find_ytdlp() -> dict | None:
+    """Find yt-dlp binary."""
+    try:
+        from abx_pkg import Binary, PipProvider, EnvProvider
+
+        class YtdlpBinary(Binary):
+            name: str = 'yt-dlp'
+            binproviders_supported = [PipProvider(), EnvProvider()]
+
+        binary = YtdlpBinary()
+        loaded = binary.load()
+        if loaded and loaded.abspath:
+            return {
+                'name': 'yt-dlp',
+                'abspath': str(loaded.abspath),
+                'version': str(loaded.version) if loaded.version else None,
+                'sha256': loaded.sha256 if hasattr(loaded, 'sha256') else None,
+                'binprovider': loaded.binprovider.name if loaded.binprovider else 'env',
+            }
+    except ImportError:
+        pass
+    except Exception:
+        pass
+
+    # Fallback to shutil.which
+    abspath = shutil.which('yt-dlp') or os.environ.get('YTDLP_BINARY', '')
+    if abspath and Path(abspath).is_file():
+        return {
+            'name': 'yt-dlp',
+            'abspath': abspath,
+            'version': get_binary_version(abspath),
+            'sha256': get_binary_hash(abspath),
+            'binprovider': 'env',
+        }
+
+    return None
+
+
+def find_node() -> dict | None:
+    """Find node binary."""
+    try:
+        from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider
+
+        class NodeBinary(Binary):
+            name: str = 'node'
+            binproviders_supported = [AptProvider(), BrewProvider(), EnvProvider()]
+            overrides: dict = {'apt': {'packages': ['nodejs']}}
+
+        binary = NodeBinary()
+        loaded = binary.load()
+        if loaded and loaded.abspath:
+            return {
+                'name': 'node',
+                'abspath': str(loaded.abspath),
+                'version': str(loaded.version) if loaded.version else None,
+                'sha256': loaded.sha256 if hasattr(loaded, 'sha256') else None,
+                'binprovider': loaded.binprovider.name if loaded.binprovider else 'env',
+            }
+    except ImportError:
+        pass
+    except Exception:
+        pass
+
+    # Fallback to shutil.which
+    abspath = shutil.which('node') or os.environ.get('NODE_BINARY', '')
+    if abspath and Path(abspath).is_file():
+        return {
+            'name': 'node',
+            'abspath': abspath,
+            'version': get_binary_version(abspath),
+            'sha256': get_binary_hash(abspath),
+            'binprovider': 'env',
+        }
+
+    return None
+
+
+def find_ffmpeg() -> dict | None:
+    """Find ffmpeg binary."""
+    try:
+        from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider
+
+        class FfmpegBinary(Binary):
+            name: str = 'ffmpeg'
+            binproviders_supported = [AptProvider(), BrewProvider(), EnvProvider()]
+
+        binary = FfmpegBinary()
+        loaded = binary.load()
+        if loaded and loaded.abspath:
+            return {
+                'name': 'ffmpeg',
+                'abspath': str(loaded.abspath),
+                'version': str(loaded.version) if loaded.version else None,
+                'sha256': loaded.sha256 if hasattr(loaded, 'sha256') else None,
+                'binprovider': loaded.binprovider.name if loaded.binprovider else 'env',
+            }
+    except ImportError:
+        pass
+    except Exception:
+        pass
+
+    # Fallback to shutil.which
+    abspath = shutil.which('ffmpeg') or os.environ.get('FFMPEG_BINARY', '')
+    if abspath and Path(abspath).is_file():
+        return {
+            'name': 'ffmpeg',
+            'abspath': abspath,
+            'version': get_binary_version(abspath),
+            'sha256': get_binary_hash(abspath),
+            'binprovider': 'env',
+        }
+
+    return None
+
+
+def main():
+    # Check for yt-dlp (required)
+    ytdlp_result = find_ytdlp()
+
+    # Check for node (required for JS extraction)
+    node_result = find_node()
+
+    # Check for ffmpeg (required for video conversion)
+    ffmpeg_result = find_ffmpeg()
+
+    missing_deps = []
+
+    # Emit results for yt-dlp
+    if ytdlp_result and ytdlp_result.get('abspath'):
+        print(json.dumps({
+            'type': 'InstalledBinary',
+            'name': ytdlp_result['name'],
+            'abspath': ytdlp_result['abspath'],
+            'version': ytdlp_result['version'],
+            'sha256': ytdlp_result['sha256'],
+            'binprovider': ytdlp_result['binprovider'],
+        }))
+
+        print(json.dumps({
+            'type': 'Machine',
+            '_method': 'update',
+            'key': 'config/YTDLP_BINARY',
+            'value': ytdlp_result['abspath'],
+        }))
+
+        if ytdlp_result['version']:
+            print(json.dumps({
+                'type': 'Machine',
+                '_method': 'update',
+                'key': 'config/YTDLP_VERSION',
+                'value': ytdlp_result['version'],
+            }))
+    else:
+        print(json.dumps({
+            'type': 'Dependency',
+            'bin_name': 'yt-dlp',
+            'bin_providers': 'pip,env',
+        }))
+        missing_deps.append('yt-dlp')
+
+    # Emit results for node
+    if node_result and node_result.get('abspath'):
+        print(json.dumps({
+            'type': 'InstalledBinary',
+            'name': node_result['name'],
+            'abspath': node_result['abspath'],
+            'version': node_result['version'],
+            'sha256': node_result['sha256'],
+            'binprovider': node_result['binprovider'],
+        }))
+
+        print(json.dumps({
+            'type': 'Machine',
+            '_method': 'update',
+            'key': 'config/NODE_BINARY',
+            'value': node_result['abspath'],
+        }))
+
+        if node_result['version']:
+            print(json.dumps({
+                'type': 'Machine',
+                '_method': 'update',
+                'key': 'config/NODE_VERSION',
+                'value': node_result['version'],
+            }))
+    else:
+        print(json.dumps({
+            'type': 'Dependency',
+            'bin_name': 'node',
+            'bin_providers': 'apt,brew,env',
+        }))
+        missing_deps.append('node')
+
+    # Emit results for ffmpeg
+    if ffmpeg_result and ffmpeg_result.get('abspath'):
+        print(json.dumps({
+            'type': 'InstalledBinary',
+            'name': ffmpeg_result['name'],
+            'abspath': ffmpeg_result['abspath'],
+            'version': ffmpeg_result['version'],
+            'sha256': ffmpeg_result['sha256'],
+            'binprovider': ffmpeg_result['binprovider'],
+        }))
+
+        print(json.dumps({
+            'type': 'Machine',
+            '_method': 'update',
+            'key': 'config/FFMPEG_BINARY',
+            'value': ffmpeg_result['abspath'],
+        }))
+
+        if ffmpeg_result['version']:
+            print(json.dumps({
+                'type': 'Machine',
+                '_method': 'update',
+                'key': 'config/FFMPEG_VERSION',
+                'value': ffmpeg_result['version'],
+            }))
+    else:
+        print(json.dumps({
+            'type': 'Dependency',
+            'bin_name': 'ffmpeg',
+            'bin_providers': 'apt,brew,env',
+        }))
+        missing_deps.append('ffmpeg')
+
+    if missing_deps:
+        print(f"Missing dependencies: {', '.join(missing_deps)}", file=sys.stderr)
+        sys.exit(1)
+    else:
+        sys.exit(0)
+
+
+if __name__ == '__main__':
+    main()
--- a/archivebox/plugins/media/templates/embed.html
+++ b/archivebox/plugins/media/templates/embed.html
@@ -0,0 +1,9 @@
+<!-- Media embed - video/audio player -->
+<div class="extractor-embed media-embed" style="width: 100%; height: 100%; min-height: 400px; background: #1a1a1a; display: flex; align-items: center; justify-content: center;">
+    <video src="{{ output_path }}"
+           style="max-width: 100%; max-height: 100%;"
+           controls
+           preload="metadata">
+        Your browser does not support the video tag.
+    </video>
+</div>
--- a/archivebox/plugins/media/templates/fullscreen.html
+++ b/archivebox/plugins/media/templates/fullscreen.html
@@ -0,0 +1,10 @@
+<!-- Media fullscreen - full video/audio player -->
+<div class="extractor-fullscreen media-fullscreen" style="width: 100%; height: 100vh; background: #000; display: flex; align-items: center; justify-content: center;">
+    <video src="{{ output_path }}"
+           style="max-width: 100%; max-height: 100%;"
+           controls
+           autoplay
+           preload="auto">
+        Your browser does not support the video tag.
+    </video>
+</div>
--- a/archivebox/plugins/media/templates/icon.html
+++ b/archivebox/plugins/media/templates/icon.html
@@ -0,0 +1 @@
+🎬
--- a/archivebox/plugins/media/templates/thumbnail.html
+++ b/archivebox/plugins/media/templates/thumbnail.html
@@ -0,0 +1,14 @@
+<!-- Media thumbnail - shows video/audio player or placeholder -->
+<div class="extractor-thumbnail media-thumbnail" style="width: 100%; height: 100px; overflow: hidden; background: #1a1a1a; display: flex; align-items: center; justify-content: center;">
+    <video src="{{ output_path }}"
+           style="width: 100%; height: 100px; object-fit: contain;"
+           poster=""
+           preload="metadata"
+           muted
+           onerror="this.style.display='none'; this.nextElementSibling.style.display='flex';">
+    </video>
+    <div style="display: none; flex-direction: column; align-items: center; color: #888; font-size: 12px;">
+        <span style="font-size: 32px;">🎬</span>
+        <span>Media</span>
+    </div>
+</div>
--- a/archivebox/plugins/media/tests/test_media.py
+++ b/archivebox/plugins/media/tests/test_media.py
@@ -21,7 +21,7 @@ import pytest
 PLUGIN_DIR = Path(__file__).parent.parent
 PLUGINS_ROOT = PLUGIN_DIR.parent
 MEDIA_HOOK = PLUGIN_DIR / 'on_Snapshot__51_media.py'
-MEDIA_INSTALL_HOOK = PLUGIN_DIR / 'on_Crawl__00_install_ytdlp.py'
+MEDIA_VALIDATE_HOOK = PLUGIN_DIR / 'on_Crawl__00_validate_ytdlp.py'
 TEST_URL = 'https://example.com/video.mp4'

 def test_hook_script_exists():
@@ -29,46 +29,72 @@ def test_hook_script_exists():
    assert MEDIA_HOOK.exists(), f"Hook not found: {MEDIA_HOOK}"


-def test_ytdlp_install_hook():
-    """Test yt-dlp install hook to install yt-dlp if needed."""
-    # Run yt-dlp install hook
+def test_ytdlp_validate_hook():
+    """Test yt-dlp validate hook checks for yt-dlp and dependencies (node, ffmpeg)."""
+    # Run yt-dlp validate hook
    result = subprocess.run(
-        [sys.executable, str(MEDIA_INSTALL_HOOK)],
+        [sys.executable, str(MEDIA_VALIDATE_HOOK)],
        capture_output=True,
        text=True,
-        timeout=600
+        timeout=30
    )

-    assert result.returncode == 0, f"Install hook failed: {result.stderr}"
+    # Hook exits 0 if all binaries found, 1 if any not found
+    # Parse output for InstalledBinary and Dependency records
+    found_binaries = {'node': False, 'ffmpeg': False, 'yt-dlp': False}
+    found_dependencies = {'node': False, 'ffmpeg': False, 'yt-dlp': False}

-    # Verify InstalledBinary JSONL output
-    found_binary = False
    for line in result.stdout.strip().split('\n'):
        if line.strip():
            try:
                record = json.loads(line)
                if record.get('type') == 'InstalledBinary':
-                    assert record['name'] == 'yt-dlp'
-                    assert record['abspath']
-                    found_binary = True
-                    break
+                    name = record['name']
+                    if name in found_binaries:
+                        assert record['abspath'], f"{name} should have abspath"
+                        found_binaries[name] = True
+                elif record.get('type') == 'Dependency':
+                    name = record['bin_name']
+                    if name in found_dependencies:
+                        found_dependencies[name] = True
            except json.JSONDecodeError:
                pass

-    assert found_binary, "Should output InstalledBinary record"
+    # Each binary should either be found (InstalledBinary) or missing (Dependency)
+    for binary_name in ['yt-dlp', 'node', 'ffmpeg']:
+        assert found_binaries[binary_name] or found_dependencies[binary_name], \
+            f"{binary_name} should have either InstalledBinary or Dependency record"


 def test_verify_deps_with_abx_pkg():
-    """Verify yt-dlp is available via abx-pkg after hook installation."""
-    from abx_pkg import Binary, PipProvider, EnvProvider, BinProviderOverrides
+    """Verify yt-dlp, node, and ffmpeg are available via abx-pkg."""
+    from abx_pkg import Binary, PipProvider, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides

-    PipProvider.model_rebuild()
-    EnvProvider.model_rebuild()
+    missing_binaries = []

    # Verify yt-dlp is available
    ytdlp_binary = Binary(name='yt-dlp', binproviders=[PipProvider(), EnvProvider()])
    ytdlp_loaded = ytdlp_binary.load()
-    assert ytdlp_loaded and ytdlp_loaded.abspath, "yt-dlp should be available after install hook"
+    if not (ytdlp_loaded and ytdlp_loaded.abspath):
+        missing_binaries.append('yt-dlp')
+
+    # Verify node is available (yt-dlp needs it for JS extraction)
+    node_binary = Binary(
+        name='node',
+        binproviders=[AptProvider(), BrewProvider(), EnvProvider()]
+    )
+    node_loaded = node_binary.load()
+    if not (node_loaded and node_loaded.abspath):
+        missing_binaries.append('node')
+
+    # Verify ffmpeg is available (yt-dlp needs it for video conversion)
+    ffmpeg_binary = Binary(name='ffmpeg', binproviders=[AptProvider(), BrewProvider(), EnvProvider()])
+    ffmpeg_loaded = ffmpeg_binary.load()
+    if not (ffmpeg_loaded and ffmpeg_loaded.abspath):
+        missing_binaries.append('ffmpeg')
+
+    if missing_binaries:
+        pytest.skip(f"Binaries not available: {', '.join(missing_binaries)} - Dependency records should have been emitted")

 def test_handles_non_media_url():
    """Test that media extractor handles non-media URLs gracefully via hook."""
--- a/archivebox/plugins/mercury/on_Crawl__00_install_mercury.py
+++ b/archivebox/plugins/mercury/on_Crawl__00_install_mercury.py
@@ -1,68 +0,0 @@
-#!/usr/bin/env python3
-"""
-Install mercury-parser if not already available.
-
-Runs at crawl start to ensure mercury-parser is installed.
-Outputs JSONL for InstalledBinary.
-"""
-
-import json
-import sys
-from pathlib import Path
-
-
-def main():
-    try:
-        from abx_pkg import Binary, NpmProvider, EnvProvider, BinProviderOverrides
-
-        NpmProvider.model_rebuild()
-        EnvProvider.model_rebuild()
-
-        # Note: npm package is @postlight/mercury-parser, binary is mercury-parser
-        mercury_binary = Binary(
-            name='mercury-parser',
-            binproviders=[NpmProvider(), EnvProvider()],
-            overrides={'npm': {'packages': ['@postlight/mercury-parser']}}
-        )
-
-        # Try to load, install if not found
-        try:
-            loaded = mercury_binary.load()
-            if not loaded or not loaded.abspath:
-                raise Exception("Not loaded")
-        except Exception:
-            # Install via npm
-            loaded = mercury_binary.install()
-
-        if loaded and loaded.abspath:
-            # Output InstalledBinary JSONL
-            print(json.dumps({
-                'type': 'InstalledBinary',
-                'name': 'mercury-parser',
-                'abspath': str(loaded.abspath),
-                'version': str(loaded.version) if loaded.version else None,
-                'sha256': loaded.sha256,
-                'binprovider': loaded.loaded_binprovider.name if loaded.loaded_binprovider else 'unknown',
-            }))
-            sys.exit(0)
-        else:
-            print(json.dumps({
-                'type': 'Dependency',
-                'bin_name': 'mercury-parser',
-                'bin_providers': 'npm,env',
-            }))
-            print("Failed to install mercury-parser", file=sys.stderr)
-            sys.exit(1)
-
-    except Exception as e:
-        print(json.dumps({
-            'type': 'Dependency',
-            'bin_name': 'mercury-parser',
-            'bin_providers': 'npm,env',
-        }))
-        print(f"Error installing mercury-parser: {e}", file=sys.stderr)
-        sys.exit(1)
-
-
-if __name__ == '__main__':
-    main()
--- a/archivebox/plugins/mercury/on_Crawl__00_validate_mercury.py
+++ b/archivebox/plugins/mercury/on_Crawl__00_validate_mercury.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+"""
+Validation hook for postlight-parser binary.
+
+Runs at crawl start to verify postlight-parser is available.
+Outputs JSONL for InstalledBinary and Machine config updates.
+"""
+
+import os
+import sys
+import json
+import shutil
+import hashlib
+import subprocess
+from pathlib import Path
+
+
+def get_binary_version(abspath: str) -> str | None:
+    """Get version string from binary."""
+    try:
+        result = subprocess.run(
+            [abspath, '--version'],
+            capture_output=True,
+            text=True,
+            timeout=5,
+        )
+        if result.returncode == 0 and result.stdout:
+            first_line = result.stdout.strip().split('\n')[0]
+            return first_line[:64]
+    except Exception:
+        pass
+    return None
+
+
+def get_binary_hash(abspath: str) -> str | None:
+    """Get SHA256 hash of binary."""
+    try:
+        with open(abspath, 'rb') as f:
+            return hashlib.sha256(f.read()).hexdigest()
+    except Exception:
+        return None
+
+
+def find_mercury() -> dict | None:
+    """Find postlight-parser binary."""
+    try:
+        from abx_pkg import Binary, NpmProvider, EnvProvider
+
+        class MercuryBinary(Binary):
+            name: str = 'postlight-parser'
+            binproviders_supported = [NpmProvider(), EnvProvider()]
+            overrides: dict = {'npm': {'packages': ['@postlight/parser']}}
+
+        binary = MercuryBinary()
+        loaded = binary.load()
+        if loaded and loaded.abspath:
+            return {
+                'name': 'postlight-parser',
+                'abspath': str(loaded.abspath),
+                'version': str(loaded.version) if loaded.version else None,
+                'sha256': loaded.sha256 if hasattr(loaded, 'sha256') else None,
+                'binprovider': loaded.binprovider.name if loaded.binprovider else 'env',
+            }
+    except ImportError:
+        pass
+    except Exception:
+        pass
+
+    # Fallback to shutil.which
+    abspath = shutil.which('postlight-parser') or os.environ.get('MERCURY_BINARY', '')
+    if abspath and Path(abspath).is_file():
+        return {
+            'name': 'postlight-parser',
+            'abspath': abspath,
+            'version': get_binary_version(abspath),
+            'sha256': get_binary_hash(abspath),
+            'binprovider': 'env',
+        }
+
+    return None
+
+
+def main():
+    result = find_mercury()
+
+    if result and result.get('abspath'):
+        print(json.dumps({
+            'type': 'InstalledBinary',
+            'name': result['name'],
+            'abspath': result['abspath'],
+            'version': result['version'],
+            'sha256': result['sha256'],
+            'binprovider': result['binprovider'],
+        }))
+
+        print(json.dumps({
+            'type': 'Machine',
+            '_method': 'update',
+            'key': 'config/MERCURY_BINARY',
+            'value': result['abspath'],
+        }))
+
+        if result['version']:
+            print(json.dumps({
+                'type': 'Machine',
+                '_method': 'update',
+                'key': 'config/MERCURY_VERSION',
+                'value': result['version'],
+            }))
+
+        sys.exit(0)
+    else:
+        print(json.dumps({
+            'type': 'Dependency',
+            'bin_name': 'postlight-parser',
+            'bin_providers': 'npm,env',
+        }))
+        print(f"postlight-parser binary not found", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
--- a/archivebox/plugins/mercury/on_Snapshot__53_mercury.py
+++ b/archivebox/plugins/mercury/on_Snapshot__53_mercury.py
@@ -6,10 +6,10 @@ Usage: on_Snapshot__mercury.py --url=<url> --snapshot-id=<uuid>
 Output: Creates mercury/ directory with content.html, content.txt, article.json

 Environment variables:
-    MERCURY_BINARY: Path to mercury-parser binary
+    MERCURY_BINARY: Path to postlight-parser binary
    TIMEOUT: Timeout in seconds (default: 60)

-Note: Requires mercury-parser: npm install -g @postlight/mercury-parser
+Note: Requires postlight-parser: npm install -g @postlight/parser
 """

 import json
@@ -25,7 +25,7 @@ import rich_click as click

 # Extractor metadata
 EXTRACTOR_NAME = 'mercury'
-BIN_NAME = 'mercury-parser'
+BIN_NAME = 'postlight-parser'
 BIN_PROVIDERS = 'npm,env'
 OUTPUT_DIR = 'mercury'

@@ -42,12 +42,12 @@ def get_env_int(name: str, default: int = 0) -> int:


 def find_mercury() -> str | None:
-    """Find mercury-parser binary."""
+    """Find postlight-parser binary."""
    mercury = get_env('MERCURY_BINARY')
    if mercury and os.path.isfile(mercury):
        return mercury

-    for name in ['mercury-parser', 'mercury']:
+    for name in ['postlight-parser']:
        binary = shutil.which(name)
        if binary:
            return binary
@@ -56,7 +56,7 @@ def find_mercury() -> str | None:


 def get_version(binary: str) -> str:
-    """Get mercury-parser version."""
+    """Get postlight-parser version."""
    try:
        result = subprocess.run([binary, '--version'], capture_output=True, text=True, timeout=10)
        return result.stdout.strip()[:64]
@@ -83,12 +83,12 @@ def extract_mercury(url: str, binary: str) -> tuple[bool, str | None, str]:

        if result_text.returncode != 0:
            stderr = result_text.stderr.decode('utf-8', errors='replace')
-            return False, None, f'mercury-parser failed: {stderr[:200]}'
+            return False, None, f'postlight-parser failed: {stderr[:200]}'

        try:
            text_json = json.loads(result_text.stdout)
        except json.JSONDecodeError:
-            return False, None, 'mercury-parser returned invalid JSON'
+            return False, None, 'postlight-parser returned invalid JSON'

        if text_json.get('failed'):
            return False, None, 'Mercury was not able to extract article'
@@ -139,7 +139,7 @@ def main(url: str, snapshot_id: str):
        # Find binary
        binary = find_mercury()
        if not binary:
-            print(f'ERROR: mercury-parser binary not found', file=sys.stderr)
+            print(f'ERROR: postlight-parser binary not found', file=sys.stderr)
            print(f'DEPENDENCY_NEEDED={BIN_NAME}', file=sys.stderr)
            print(f'BIN_PROVIDERS={BIN_PROVIDERS}', file=sys.stderr)
            sys.exit(1)
--- a/archivebox/plugins/mercury/templates/embed.html
+++ b/archivebox/plugins/mercury/templates/embed.html
@@ -0,0 +1,6 @@
+<!-- Mercury embed - Mercury parser article view -->
+<iframe src="{{ output_path }}"
+        class="extractor-embed mercury-embed"
+        style="width: 100%; height: 100%; min-height: 500px; border: none; background: #fefefe;"
+        sandbox="allow-same-origin">
+</iframe>
--- a/archivebox/plugins/mercury/templates/fullscreen.html
+++ b/archivebox/plugins/mercury/templates/fullscreen.html
@@ -0,0 +1,6 @@
+<!-- Mercury fullscreen - full Mercury parser article -->
+<iframe src="{{ output_path }}"
+        class="extractor-fullscreen mercury-fullscreen"
+        style="width: 100%; height: 100vh; border: none; background: #fefefe;"
+        sandbox="allow-same-origin">
+</iframe>
--- a/archivebox/plugins/mercury/templates/icon.html
+++ b/archivebox/plugins/mercury/templates/icon.html
@@ -0,0 +1 @@
+☿️
--- a/archivebox/plugins/mercury/templates/thumbnail.html
+++ b/archivebox/plugins/mercury/templates/thumbnail.html
@@ -0,0 +1,8 @@
+<!-- Mercury thumbnail - shows Mercury parser extracted article content -->
+<div class="extractor-thumbnail mercury-thumbnail" style="width: 100%; height: 100px; overflow: hidden; background: #fefefe; padding: 8px; font-family: Georgia, serif; font-size: 11px; line-height: 1.4; color: #333;">
+    <iframe src="{{ output_path }}"
+            style="width: 100%; height: 300px; border: none; pointer-events: none;"
+            loading="lazy"
+            sandbox="allow-same-origin">
+    </iframe>
+</div>
--- a/archivebox/plugins/mercury/tests/test_mercury.py
+++ b/archivebox/plugins/mercury/tests/test_mercury.py
@@ -21,7 +21,7 @@ import pytest
 PLUGIN_DIR = Path(__file__).parent.parent
 PLUGINS_ROOT = PLUGIN_DIR.parent
 MERCURY_HOOK = PLUGIN_DIR / 'on_Snapshot__53_mercury.py'
-MERCURY_INSTALL_HOOK = PLUGIN_DIR / 'on_Crawl__00_install_mercury.py'
+MERCURY_VALIDATE_HOOK = PLUGIN_DIR / 'on_Crawl__00_validate_mercury.py'
 TEST_URL = 'https://example.com'

 def test_hook_script_exists():
@@ -29,53 +29,70 @@ def test_hook_script_exists():
    assert MERCURY_HOOK.exists(), f"Hook not found: {MERCURY_HOOK}"


-def test_mercury_install_hook():
-    """Test mercury install hook to install mercury-parser if needed."""
-    # Run mercury install hook
+def test_mercury_validate_hook():
+    """Test mercury validate hook checks for postlight-parser."""
+    # Run mercury validate hook
    result = subprocess.run(
-        [sys.executable, str(MERCURY_INSTALL_HOOK)],
+        [sys.executable, str(MERCURY_VALIDATE_HOOK)],
        capture_output=True,
        text=True,
-        timeout=600
+        timeout=30
    )

-    assert result.returncode == 0, f"Install hook failed: {result.stderr}"
-
-    # Verify InstalledBinary JSONL output
-    found_binary = False
-    for line in result.stdout.strip().split('\n'):
-        if line.strip():
-            try:
-                record = json.loads(line)
-                if record.get('type') == 'InstalledBinary':
-                    assert record['name'] == 'mercury-parser'
-                    assert record['abspath']
-                    found_binary = True
-                    break
-            except json.JSONDecodeError:
-                pass
-
-    assert found_binary, "Should output InstalledBinary record"
+    # Hook exits 0 if binary found, 1 if not found (with Dependency record)
+    if result.returncode == 0:
+        # Binary found - verify InstalledBinary JSONL output
+        found_binary = False
+        for line in result.stdout.strip().split('\n'):
+            if line.strip():
+                try:
+                    record = json.loads(line)
+                    if record.get('type') == 'InstalledBinary':
+                        assert record['name'] == 'postlight-parser'
+                        assert record['abspath']
+                        found_binary = True
+                        break
+                except json.JSONDecodeError:
+                    pass
+        assert found_binary, "Should output InstalledBinary record when binary found"
+    else:
+        # Binary not found - verify Dependency JSONL output
+        found_dependency = False
+        for line in result.stdout.strip().split('\n'):
+            if line.strip():
+                try:
+                    record = json.loads(line)
+                    if record.get('type') == 'Dependency':
+                        assert record['bin_name'] == 'postlight-parser'
+                        assert 'npm' in record['bin_providers']
+                        found_dependency = True
+                        break
+                except json.JSONDecodeError:
+                    pass
+        assert found_dependency, "Should output Dependency record when binary not found"


 def test_verify_deps_with_abx_pkg():
-    """Verify mercury-parser is available via abx-pkg after hook installation."""
+    """Verify postlight-parser is available via abx-pkg."""
    from abx_pkg import Binary, NpmProvider, EnvProvider, BinProviderOverrides

-    NpmProvider.model_rebuild()
-    EnvProvider.model_rebuild()
-
-    # Verify mercury-parser is available
+    # Verify postlight-parser is available
    mercury_binary = Binary(
-        name='mercury-parser',
+        name='postlight-parser',
        binproviders=[NpmProvider(), EnvProvider()],
-        overrides={'npm': {'packages': ['@postlight/mercury-parser']}}
+        overrides={'npm': {'packages': ['@postlight/parser']}}
    )
    mercury_loaded = mercury_binary.load()
-    assert mercury_loaded and mercury_loaded.abspath, "mercury-parser should be available after install hook"
+
+    # If validate hook found it (exit 0), this should succeed
+    # If validate hook didn't find it (exit 1), this may fail unless binprovider installed it
+    if mercury_loaded and mercury_loaded.abspath:
+        assert True, "postlight-parser is available"
+    else:
+        pytest.skip("postlight-parser not available - Dependency record should have been emitted")

 def test_extracts_with_mercury_parser():
-    """Test full workflow: extract with mercury-parser from real HTML via hook."""
+    """Test full workflow: extract with postlight-parser from real HTML via hook."""
    # Prerequisites checked by earlier test

    with tempfile.TemporaryDirectory() as tmpdir:
--- a/archivebox/plugins/merkletree/on_Snapshot__93_merkletree.py
+++ b/archivebox/plugins/merkletree/on_Snapshot__93_merkletree.py
@@ -2,46 +2,28 @@
 """
 Create a Merkle tree of all archived outputs.

-This plugin runs after all extractors and post-processing complete (priority 92)
-and generates a cryptographic Merkle tree of all files in the snapshot directory.
-This provides:
-    - Tamper detection: verify archive integrity
-    - Efficient updates: only re-hash changed files
-    - Compact proofs: prove file inclusion without sending all files
-    - Deduplication: identify identical content across snapshots
+This plugin runs after all extractors complete (priority 93) and generates
+a cryptographic Merkle tree of all files in the snapshot directory.

-Output: merkletree/merkletree.json containing:
-    - root_hash: SHA256 hash of the Merkle root
-    - tree: Full tree structure with internal nodes
-    - files: List of all files with their hashes
-    - metadata: Timestamp, file count, total size
+Output: merkletree.json containing root_hash, tree structure, file list, metadata

-Usage: on_Snapshot__92_merkletree.py --url=<url> --snapshot-id=<uuid>
+Usage: on_Snapshot__93_merkletree.py --url=<url> --snapshot-id=<uuid>

 Environment variables:
    SAVE_MERKLETREE: Enable merkle tree generation (default: true)
+    DATA_DIR: ArchiveBox data directory
+    ARCHIVE_DIR: Archive output directory
 """

-__package__ = 'archivebox.plugins.merkletree'
-
 import os
 import sys
 import json
 import hashlib
 from pathlib import Path
-from datetime import datetime
+from datetime import datetime, timezone
 from typing import Dict, List, Optional, Tuple, Any

-# Configure Django if running standalone
-if __name__ == '__main__':
-    parent_dir = str(Path(__file__).resolve().parent.parent.parent)
-    if parent_dir not in sys.path:
-        sys.path.insert(0, parent_dir)
-    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'archivebox.core.settings')
-    import django
-    django.setup()
-
-import rich_click as click
+import click


 def sha256_file(filepath: Path) -> str:
@@ -49,12 +31,10 @@ def sha256_file(filepath: Path) -> str:
    h = hashlib.sha256()
    try:
        with open(filepath, 'rb') as f:
-            # Read in 64kb chunks
            while chunk := f.read(65536):
                h.update(chunk)
        return h.hexdigest()
    except (OSError, PermissionError):
-        # If we can't read the file, return a null hash
        return '0' * 64


@@ -64,74 +44,45 @@ def sha256_data(data: bytes) -> str:


 def collect_files(snapshot_dir: Path, exclude_dirs: Optional[List[str]] = None) -> List[Tuple[Path, str, int]]:
-    """
-    Recursively collect all files in snapshot directory.
-
-    Args:
-        snapshot_dir: Root directory to scan
-        exclude_dirs: Directory names to exclude (e.g., ['merkletree', '.git'])
-
-    Returns:
-        List of (relative_path, sha256_hash, file_size) tuples
-    """
+    """Recursively collect all files in snapshot directory."""
    exclude_dirs = exclude_dirs or ['merkletree', '.git', '__pycache__']
    files = []

    for root, dirs, filenames in os.walk(snapshot_dir):
-        # Filter out excluded directories
        dirs[:] = [d for d in dirs if d not in exclude_dirs]

        for filename in filenames:
            filepath = Path(root) / filename
            rel_path = filepath.relative_to(snapshot_dir)

-            # Skip symlinks (we hash the target, not the link)
            if filepath.is_symlink():
                continue

-            # Compute hash and size
            file_hash = sha256_file(filepath)
            file_size = filepath.stat().st_size if filepath.exists() else 0
-
            files.append((rel_path, file_hash, file_size))

-    # Sort by path for deterministic tree
    files.sort(key=lambda x: str(x[0]))
    return files


 def build_merkle_tree(file_hashes: List[str]) -> Tuple[str, List[List[str]]]:
-    """
-    Build a Merkle tree from a list of leaf hashes.
-
-    Args:
-        file_hashes: List of SHA256 hashes (leaves)
-
-    Returns:
-        (root_hash, tree_levels) where tree_levels is a list of hash lists per level
-    """
+    """Build a Merkle tree from a list of leaf hashes."""
    if not file_hashes:
-        # Empty tree
        return sha256_data(b''), [[]]

-    # Initialize with leaf level
    tree_levels = [file_hashes.copy()]

-    # Build tree bottom-up
    while len(tree_levels[-1]) > 1:
        current_level = tree_levels[-1]
        next_level = []

-        # Process pairs
        for i in range(0, len(current_level), 2):
            left = current_level[i]
-
            if i + 1 < len(current_level):
-                # Combine left + right
                right = current_level[i + 1]
                combined = left + right
            else:
-                # Odd number of nodes: duplicate the last one
                combined = left + left

            parent_hash = sha256_data(combined.encode('utf-8'))
@@ -139,67 +90,41 @@ def build_merkle_tree(file_hashes: List[str]) -> Tuple[str, List[List[str]]]:

        tree_levels.append(next_level)

-    # Root is the single hash at the top level
    root_hash = tree_levels[-1][0]
    return root_hash, tree_levels


 def create_merkle_tree(snapshot_dir: Path) -> Dict[str, Any]:
-    """
-    Create a complete Merkle tree of all files in snapshot directory.
-
-    Args:
-        snapshot_dir: The snapshot directory to scan
-
-    Returns:
-        Dict containing root_hash, tree structure, file list, and metadata
-    """
-    # Collect all files
+    """Create a complete Merkle tree of all files in snapshot directory."""
    files = collect_files(snapshot_dir)
-
-    # Extract just the hashes for tree building
    file_hashes = [file_hash for _, file_hash, _ in files]
-
-    # Build Merkle tree
    root_hash, tree_levels = build_merkle_tree(file_hashes)
-
-    # Calculate total size
    total_size = sum(size for _, _, size in files)

-    # Prepare file list with metadata
    file_list = [
-        {
-            'path': str(path),
-            'hash': file_hash,
-            'size': size,
-        }
+        {'path': str(path), 'hash': file_hash, 'size': size}
        for path, file_hash, size in files
    ]

-    # Prepare result
-    result = {
+    return {
        'root_hash': root_hash,
        'tree_levels': tree_levels,
        'files': file_list,
        'metadata': {
-            'timestamp': datetime.now().isoformat(),
+            'timestamp': datetime.now(timezone.utc).isoformat(),
            'file_count': len(files),
            'total_size': total_size,
            'tree_depth': len(tree_levels),
        },
    }

-    return result
-

@click.command()
@click.option('--url', required=True, help='URL being archived')
@click.option('--snapshot-id', required=True, help='Snapshot UUID')
 def main(url: str, snapshot_id: str):
    """Generate Merkle tree of all archived outputs."""
-    from archivebox.core.models import Snapshot
-
-    start_ts = datetime.now()
+    start_ts = datetime.now(timezone.utc)
    status = 'failed'
    output = None
    error = ''
@@ -211,30 +136,19 @@ def main(url: str, snapshot_id: str):
        save_merkletree = os.getenv('SAVE_MERKLETREE', 'true').lower() in ('true', '1', 'yes', 'on')

        if not save_merkletree:
-            click.echo('Skipping merkle tree (SAVE_MERKLETREE=False)')
            status = 'skipped'
-            end_ts = datetime.now()
-            click.echo(f'START_TS={start_ts.isoformat()}')
-            click.echo(f'END_TS={end_ts.isoformat()}')
-            click.echo(f'STATUS={status}')
-            click.echo(f'RESULT_JSON={{"extractor": "merkletree", "status": "{status}", "url": "{url}", "snapshot_id": "{snapshot_id}"}}')
+            click.echo(json.dumps({'status': status, 'output': 'SAVE_MERKLETREE=false'}))
            sys.exit(0)

-        # Get snapshot
-        try:
-            snapshot = Snapshot.objects.get(id=snapshot_id)
-        except Snapshot.DoesNotExist:
-            error = f'Snapshot {snapshot_id} not found'
-            raise ValueError(error)
+        # Working directory is the extractor output dir (e.g., <snapshot>/merkletree/)
+        # Parent is the snapshot directory
+        output_dir = Path.cwd()
+        snapshot_dir = output_dir.parent

-        # Get snapshot directory
-        snapshot_dir = Path(snapshot.output_dir)
        if not snapshot_dir.exists():
-            error = f'Snapshot directory not found: {snapshot_dir}'
-            raise FileNotFoundError(error)
+            raise FileNotFoundError(f'Snapshot directory not found: {snapshot_dir}')

-        # Create output directory
-        output_dir = snapshot_dir / 'merkletree'
+        # Ensure output directory exists
        output_dir.mkdir(exist_ok=True)
        output_path = output_dir / 'merkletree.json'

@@ -246,49 +160,31 @@ def main(url: str, snapshot_id: str):
            json.dump(merkle_data, f, indent=2)

        status = 'succeeded'
-        output = str(output_path)
+        output = 'merkletree.json'
        root_hash = merkle_data['root_hash']
        file_count = merkle_data['metadata']['file_count']
        total_size = merkle_data['metadata']['total_size']

-        click.echo(f'Merkle tree created: {file_count} files, root={root_hash[:16]}..., size={total_size:,} bytes')
+        click.echo(f'Merkle tree: {file_count} files, root={root_hash[:16]}..., size={total_size:,} bytes')

    except Exception as e:
        error = f'{type(e).__name__}: {e}'
        status = 'failed'
        click.echo(f'Error: {error}', err=True)

-    end_ts = datetime.now()
-    duration = (end_ts - start_ts).total_seconds()
+    end_ts = datetime.now(timezone.utc)

-    # Print results
-    click.echo(f'START_TS={start_ts.isoformat()}')
-    click.echo(f'END_TS={end_ts.isoformat()}')
-    click.echo(f'DURATION={duration:.2f}')
-    if output:
-        click.echo(f'OUTPUT={output}')
-    click.echo(f'STATUS={status}')
-
-    if error:
-        click.echo(f'ERROR={error}', err=True)
-
-    # Print JSON result
-    result_json = {
-        'extractor': 'merkletree',
-        'url': url,
-        'snapshot_id': snapshot_id,
+    # Print JSON result for hook runner
+    result = {
        'status': status,
-        'start_ts': start_ts.isoformat(),
-        'end_ts': end_ts.isoformat(),
-        'duration': round(duration, 2),
        'output': output,
+        'error': error or None,
        'root_hash': root_hash,
        'file_count': file_count,
-        'error': error or None,
    }
-    click.echo(f'RESULT_JSON={json.dumps(result_json)}')
+    click.echo(json.dumps(result))

-    sys.exit(0 if status == 'succeeded' else 1)
+    sys.exit(0 if status in ('succeeded', 'skipped') else 1)


 if __name__ == '__main__':
--- a/archivebox/plugins/parse_dom_outlinks/templates/icon.html
+++ b/archivebox/plugins/parse_dom_outlinks/templates/icon.html
@@ -0,0 +1 @@
+🔗
--- a/archivebox/plugins/parse_html_urls/on_Snapshot__60_parse_html_urls.py
+++ b/archivebox/plugins/parse_html_urls/on_Snapshot__60_parse_html_urls.py
@@ -133,7 +133,8 @@ def fetch_content(url: str) -> str:

@click.command()
@click.option('--url', required=True, help='HTML URL to parse')
-def main(url: str):
+@click.option('--snapshot-id', required=False, help='Snapshot UUID (unused but required by hook runner)')
+def main(url: str, snapshot_id: str = None):
    """Parse HTML and extract href URLs."""

    # Skip only if parse_dom_outlinks already ran AND found URLs (it uses Chrome for better coverage)
--- a/archivebox/plugins/parse_html_urls/templates/icon.html
+++ b/archivebox/plugins/parse_html_urls/templates/icon.html
@@ -0,0 +1 @@
+🔗
--- a/archivebox/plugins/parse_jsonl_urls/on_Snapshot__64_parse_jsonl_urls.py
+++ b/archivebox/plugins/parse_jsonl_urls/on_Snapshot__64_parse_jsonl_urls.py
@@ -127,7 +127,8 @@ def fetch_content(url: str) -> str:

@click.command()
@click.option('--url', required=True, help='JSONL file URL to parse')
-def main(url: str):
+@click.option('--snapshot-id', required=False, help='Snapshot UUID (unused but required by hook runner)')
+def main(url: str, snapshot_id: str = None):
    """Parse JSONL bookmark file and extract URLs."""

    try:
--- a/archivebox/plugins/parse_jsonl_urls/templates/icon.html
+++ b/archivebox/plugins/parse_jsonl_urls/templates/icon.html
@@ -0,0 +1 @@
+📋
--- a/archivebox/plugins/parse_netscape_urls/on_Snapshot__63_parse_netscape_urls.py
+++ b/archivebox/plugins/parse_netscape_urls/on_Snapshot__63_parse_netscape_urls.py
@@ -52,7 +52,8 @@ def fetch_content(url: str) -> str:

@click.command()
@click.option('--url', required=True, help='Netscape bookmark file URL to parse')
-def main(url: str):
+@click.option('--snapshot-id', required=False, help='Snapshot UUID (unused but required by hook runner)')
+def main(url: str, snapshot_id: str = None):
    """Parse Netscape bookmark HTML and extract URLs."""

    try:
--- a/archivebox/plugins/parse_netscape_urls/templates/icon.html
+++ b/archivebox/plugins/parse_netscape_urls/templates/icon.html
@@ -0,0 +1 @@
+🔖
--- a/archivebox/plugins/parse_rss_urls/on_Snapshot__61_parse_rss_urls.py
+++ b/archivebox/plugins/parse_rss_urls/on_Snapshot__61_parse_rss_urls.py
@@ -51,7 +51,8 @@ def fetch_content(url: str) -> str:

@click.command()
@click.option('--url', required=True, help='RSS/Atom feed URL to parse')
-def main(url: str):
+@click.option('--snapshot-id', required=False, help='Snapshot UUID (unused but required by hook runner)')
+def main(url: str, snapshot_id: str = None):
    """Parse RSS/Atom feed and extract article URLs."""

    if feedparser is None:
--- a/archivebox/plugins/parse_rss_urls/templates/icon.html
+++ b/archivebox/plugins/parse_rss_urls/templates/icon.html
@@ -0,0 +1 @@
+📡
--- a/archivebox/plugins/parse_txt_urls/on_Snapshot__62_parse_txt_urls.py
+++ b/archivebox/plugins/parse_txt_urls/on_Snapshot__62_parse_txt_urls.py
@@ -100,7 +100,8 @@ def fetch_content(url: str) -> str:

@click.command()
@click.option('--url', required=True, help='URL to parse (file:// or https://)')
-def main(url: str):
+@click.option('--snapshot-id', required=False, help='Snapshot UUID (unused but required by hook runner)')
+def main(url: str, snapshot_id: str = None):
    """Parse plain text and extract URLs."""

    try:
--- a/archivebox/plugins/parse_txt_urls/templates/icon.html
+++ b/archivebox/plugins/parse_txt_urls/templates/icon.html
@@ -0,0 +1 @@
+📃
--- a/archivebox/plugins/pdf/templates/embed.html
+++ b/archivebox/plugins/pdf/templates/embed.html
@@ -0,0 +1,5 @@
+<!-- PDF embed - full PDF viewer -->
+<embed src="{{ output_path }}#toolbar=1&navpanes=1"
+       type="application/pdf"
+       class="extractor-embed pdf-embed"
+       style="width: 100%; height: 100%; min-height: 500px;">
--- a/archivebox/plugins/pdf/templates/fullscreen.html
+++ b/archivebox/plugins/pdf/templates/fullscreen.html
@@ -0,0 +1,5 @@
+<!-- PDF fullscreen - full PDF viewer -->
+<embed src="{{ output_path }}#toolbar=1&navpanes=1&view=FitH"
+       type="application/pdf"
+       class="extractor-fullscreen pdf-fullscreen"
+       style="width: 100%; height: 100vh;">
--- a/archivebox/plugins/pdf/templates/icon.html
+++ b/archivebox/plugins/pdf/templates/icon.html
@@ -0,0 +1 @@
+📄
--- a/archivebox/plugins/pdf/templates/thumbnail.html
+++ b/archivebox/plugins/pdf/templates/thumbnail.html
@@ -0,0 +1,6 @@
+<!-- PDF thumbnail - shows first page preview -->
+<div class="extractor-thumbnail pdf-thumbnail" style="width: 100%; height: 100px; overflow: hidden; background: #f5f5f5;">
+    <embed src="{{ output_path }}#toolbar=0&navpanes=0&scrollbar=0&page=1&view=FitH"
+           type="application/pdf"
+           style="width: 100%; height: 200px; margin-top: -20px; pointer-events: none;">
+</div>
--- a/archivebox/plugins/readability/on_Crawl__00_install_readability.py
+++ b/archivebox/plugins/readability/on_Crawl__00_install_readability.py
@@ -1,68 +0,0 @@
-#!/usr/bin/env python3
-"""
-Install readability-extractor if not already available.
-
-Runs at crawl start to ensure readability-extractor is installed.
-Outputs JSONL for InstalledBinary.
-"""
-
-import json
-import sys
-from pathlib import Path
-
-
-def main():
-    try:
-        from abx_pkg import Binary, NpmProvider, EnvProvider, BinProviderOverrides
-
-        NpmProvider.model_rebuild()
-        EnvProvider.model_rebuild()
-
-        # Note: npm package is from github:ArchiveBox/readability-extractor
-        readability_binary = Binary(
-            name='readability-extractor',
-            binproviders=[NpmProvider(), EnvProvider()],
-            overrides={'npm': {'packages': ['github:ArchiveBox/readability-extractor']}}
-        )
-
-        # Try to load, install if not found
-        try:
-            loaded = readability_binary.load()
-            if not loaded or not loaded.abspath:
-                raise Exception("Not loaded")
-        except Exception:
-            # Install via npm from GitHub repo
-            loaded = readability_binary.install()
-
-        if loaded and loaded.abspath:
-            # Output InstalledBinary JSONL
-            print(json.dumps({
-                'type': 'InstalledBinary',
-                'name': 'readability-extractor',
-                'abspath': str(loaded.abspath),
-                'version': str(loaded.version) if loaded.version else None,
-                'sha256': loaded.sha256,
-                'binprovider': loaded.loaded_binprovider.name if loaded.loaded_binprovider else 'unknown',
-            }))
-            sys.exit(0)
-        else:
-            print(json.dumps({
-                'type': 'Dependency',
-                'bin_name': 'readability-extractor',
-                'bin_providers': 'npm,env',
-            }))
-            print("Failed to install readability-extractor", file=sys.stderr)
-            sys.exit(1)
-
-    except Exception as e:
-        print(json.dumps({
-            'type': 'Dependency',
-            'bin_name': 'readability-extractor',
-            'bin_providers': 'npm,env',
-        }))
-        print(f"Error installing readability-extractor: {e}", file=sys.stderr)
-        sys.exit(1)
-
-
-if __name__ == '__main__':
-    main()
--- a/archivebox/plugins/readability/on_Crawl__00_validate_readability.py
+++ b/archivebox/plugins/readability/on_Crawl__00_validate_readability.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+"""
+Validation hook for readability-extractor binary.
+
+Runs at crawl start to verify readability-extractor is available.
+Outputs JSONL for InstalledBinary and Machine config updates.
+"""
+
+import os
+import sys
+import json
+import shutil
+import hashlib
+import subprocess
+from pathlib import Path
+
+
+def get_binary_version(abspath: str) -> str | None:
+    """Get version string from binary."""
+    try:
+        result = subprocess.run(
+            [abspath, '--version'],
+            capture_output=True,
+            text=True,
+            timeout=5,
+        )
+        if result.returncode == 0 and result.stdout:
+            first_line = result.stdout.strip().split('\n')[0]
+            return first_line[:64]
+    except Exception:
+        pass
+    return None
+
+
+def get_binary_hash(abspath: str) -> str | None:
+    """Get SHA256 hash of binary."""
+    try:
+        with open(abspath, 'rb') as f:
+            return hashlib.sha256(f.read()).hexdigest()
+    except Exception:
+        return None
+
+
+def find_readability() -> dict | None:
+    """Find readability-extractor binary."""
+    try:
+        from abx_pkg import Binary, NpmProvider, EnvProvider
+
+        class ReadabilityBinary(Binary):
+            name: str = 'readability-extractor'
+            binproviders_supported = [NpmProvider(), EnvProvider()]
+            overrides: dict = {'npm': {'packages': ['github:ArchiveBox/readability-extractor']}}
+
+        binary = ReadabilityBinary()
+        loaded = binary.load()
+        if loaded and loaded.abspath:
+            return {
+                'name': 'readability-extractor',
+                'abspath': str(loaded.abspath),
+                'version': str(loaded.version) if loaded.version else None,
+                'sha256': loaded.sha256 if hasattr(loaded, 'sha256') else None,
+                'binprovider': loaded.binprovider.name if loaded.binprovider else 'env',
+            }
+    except ImportError:
+        pass
+    except Exception:
+        pass
+
+    # Fallback to shutil.which
+    abspath = shutil.which('readability-extractor') or os.environ.get('READABILITY_BINARY', '')
+    if abspath and Path(abspath).is_file():
+        return {
+            'name': 'readability-extractor',
+            'abspath': abspath,
+            'version': get_binary_version(abspath),
+            'sha256': get_binary_hash(abspath),
+            'binprovider': 'env',
+        }
+
+    return None
+
+
+def main():
+    result = find_readability()
+
+    if result and result.get('abspath'):
+        print(json.dumps({
+            'type': 'InstalledBinary',
+            'name': result['name'],
+            'abspath': result['abspath'],
+            'version': result['version'],
+            'sha256': result['sha256'],
+            'binprovider': result['binprovider'],
+        }))
+
+        print(json.dumps({
+            'type': 'Machine',
+            '_method': 'update',
+            'key': 'config/READABILITY_BINARY',
+            'value': result['abspath'],
+        }))
+
+        if result['version']:
+            print(json.dumps({
+                'type': 'Machine',
+                '_method': 'update',
+                'key': 'config/READABILITY_VERSION',
+                'value': result['version'],
+            }))
+
+        sys.exit(0)
+    else:
+        print(json.dumps({
+            'type': 'Dependency',
+            'bin_name': 'readability-extractor',
+            'bin_providers': 'npm,env',
+        }))
+        print(f"readability-extractor binary not found", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
--- a/archivebox/plugins/readability/templates/embed.html
+++ b/archivebox/plugins/readability/templates/embed.html
@@ -0,0 +1,6 @@
+<!-- Readability embed - reader-mode article view -->
+<iframe src="{{ output_path }}"
+        class="extractor-embed readability-embed"
+        style="width: 100%; height: 100%; min-height: 500px; border: none; background: #fefefe;"
+        sandbox="allow-same-origin">
+</iframe>
--- a/archivebox/plugins/readability/templates/fullscreen.html
+++ b/archivebox/plugins/readability/templates/fullscreen.html
@@ -0,0 +1,6 @@
+<!-- Readability fullscreen - full reader-mode article -->
+<iframe src="{{ output_path }}"
+        class="extractor-fullscreen readability-fullscreen"
+        style="width: 100%; height: 100vh; border: none; background: #fefefe;"
+        sandbox="allow-same-origin">
+</iframe>
--- a/archivebox/plugins/readability/templates/icon.html
+++ b/archivebox/plugins/readability/templates/icon.html
@@ -0,0 +1 @@
+📖
--- a/archivebox/plugins/readability/templates/thumbnail.html
+++ b/archivebox/plugins/readability/templates/thumbnail.html
@@ -0,0 +1,8 @@
+<!-- Readability thumbnail - shows reader-mode extracted article content -->
+<div class="extractor-thumbnail readability-thumbnail" style="width: 100%; height: 100px; overflow: hidden; background: #fefefe; padding: 8px; font-family: Georgia, serif; font-size: 11px; line-height: 1.4; color: #333;">
+    <iframe src="{{ output_path }}"
+            style="width: 100%; height: 300px; border: none; pointer-events: none;"
+            loading="lazy"
+            sandbox="allow-same-origin">
+    </iframe>
+</div>
--- a/archivebox/plugins/readability/tests/test_readability.py
+++ b/archivebox/plugins/readability/tests/test_readability.py
@@ -2,7 +2,7 @@
 Integration tests for readability plugin

 Tests verify:
-1. Install hook installs readability-extractor via abx-pkg
+1. Validate hook checks for readability-extractor binary
 2. Verify deps with abx-pkg
 3. Plugin reports missing dependency correctly
 4. Extraction works against real example.com content
@@ -21,7 +21,7 @@ import pytest
 PLUGIN_DIR = Path(__file__).parent.parent
 PLUGINS_ROOT = PLUGIN_DIR.parent
 READABILITY_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_readability.py'))
-READABILITY_INSTALL_HOOK = PLUGIN_DIR / 'on_Crawl__00_install_readability.py'
+READABILITY_VALIDATE_HOOK = PLUGIN_DIR / 'on_Crawl__00_validate_readability.py'
 TEST_URL = 'https://example.com'


@@ -101,48 +101,63 @@ def test_reports_missing_dependency_when_not_installed():
        assert 'readability-extractor' in combined or 'BIN_NAME' in combined, "Should mention readability-extractor"


-def test_readability_install_hook():
-    """Test readability install hook to install readability-extractor if needed."""
+def test_readability_validate_hook():
+    """Test readability validate hook checks for readability-extractor binary."""
    result = subprocess.run(
-        [sys.executable, str(READABILITY_INSTALL_HOOK)],
+        [sys.executable, str(READABILITY_VALIDATE_HOOK)],
        capture_output=True,
        text=True,
-        timeout=600
+        timeout=30
    )

-    assert result.returncode == 0, f"Install hook failed: {result.stderr}"
-
-    # Verify InstalledBinary JSONL output
-    found_binary = False
-    for line in result.stdout.strip().split('\n'):
-        if line.strip():
-            try:
-                record = json.loads(line)
-                if record.get('type') == 'InstalledBinary':
-                    assert record['name'] == 'readability-extractor'
-                    assert record['abspath']
-                    found_binary = True
-                    break
-            except json.JSONDecodeError:
-                pass
-
-    assert found_binary, "Should output InstalledBinary record"
+    # Hook exits 0 if binary found, 1 if not found (with Dependency record)
+    if result.returncode == 0:
+        # Binary found - verify InstalledBinary JSONL output
+        found_binary = False
+        for line in result.stdout.strip().split('\n'):
+            if line.strip():
+                try:
+                    record = json.loads(line)
+                    if record.get('type') == 'InstalledBinary':
+                        assert record['name'] == 'readability-extractor'
+                        assert record['abspath']
+                        found_binary = True
+                        break
+                except json.JSONDecodeError:
+                    pass
+        assert found_binary, "Should output InstalledBinary record when binary found"
+    else:
+        # Binary not found - verify Dependency JSONL output
+        found_dependency = False
+        for line in result.stdout.strip().split('\n'):
+            if line.strip():
+                try:
+                    record = json.loads(line)
+                    if record.get('type') == 'Dependency':
+                        assert record['bin_name'] == 'readability-extractor'
+                        assert 'npm' in record['bin_providers']
+                        found_dependency = True
+                        break
+                except json.JSONDecodeError:
+                    pass
+        assert found_dependency, "Should output Dependency record when binary not found"


 def test_verify_deps_with_abx_pkg():
-    """Verify readability-extractor is available via abx-pkg after hook installation."""
+    """Verify readability-extractor is available via abx-pkg."""
    from abx_pkg import Binary, NpmProvider, EnvProvider, BinProviderOverrides

-    NpmProvider.model_rebuild()
-    EnvProvider.model_rebuild()
-
    readability_binary = Binary(
        name='readability-extractor',
        binproviders=[NpmProvider(), EnvProvider()],
        overrides={'npm': {'packages': ['github:ArchiveBox/readability-extractor']}}
    )
    readability_loaded = readability_binary.load()
-    assert readability_loaded and readability_loaded.abspath, "readability-extractor should be available after install hook"
+
+    if readability_loaded and readability_loaded.abspath:
+        assert True, "readability-extractor is available"
+    else:
+        pytest.skip("readability-extractor not available - Dependency record should have been emitted")


 def test_extracts_article_after_installation():
--- a/archivebox/plugins/screenshot/templates/embed.html
+++ b/archivebox/plugins/screenshot/templates/embed.html
@@ -0,0 +1,5 @@
+<!-- Screenshot embed - full image view -->
+<img src="{{ output_path }}"
+     alt="Screenshot of page"
+     class="extractor-embed screenshot-embed"
+     style="max-width: 100%; height: auto;">
--- a/archivebox/plugins/screenshot/templates/fullscreen.html
+++ b/archivebox/plugins/screenshot/templates/fullscreen.html
@@ -0,0 +1,8 @@
+<!-- Screenshot fullscreen - zoomable image -->
+<div style="width: 100%; height: 100vh; overflow: auto; background: #222; display: flex; align-items: start; justify-content: center;">
+    <img src="{{ output_path }}"
+         alt="Screenshot of page"
+         class="extractor-fullscreen screenshot-fullscreen"
+         style="max-width: 100%; cursor: zoom-in;"
+         onclick="this.style.maxWidth = this.style.maxWidth === 'none' ? '100%' : 'none'; this.style.cursor = this.style.maxWidth === 'none' ? 'zoom-out' : 'zoom-in';">
+</div>
--- a/archivebox/plugins/screenshot/templates/icon.html
+++ b/archivebox/plugins/screenshot/templates/icon.html
@@ -0,0 +1 @@
+📷
--- a/archivebox/plugins/screenshot/templates/thumbnail.html
+++ b/archivebox/plugins/screenshot/templates/thumbnail.html
@@ -0,0 +1,8 @@
+<!-- Screenshot thumbnail - shows the captured screenshot image -->
+<img src="{{ output_path }}"
+     alt="Screenshot of page"
+     class="extractor-thumbnail screenshot-thumbnail"
+     style="width: 100%; height: 100px; object-fit: cover; object-position: top center; background: #333;"
+     loading="lazy"
+     onerror="this.style.display='none'; this.nextElementSibling.style.display='block';">
+<div style="display: none; text-align: center; padding: 20px; color: #999;">📷 Screenshot</div>
--- a/archivebox/plugins/singlefile/templates/embed.html
+++ b/archivebox/plugins/singlefile/templates/embed.html
@@ -0,0 +1,6 @@
+<!-- Singlefile embed - full iframe of archived HTML -->
+<iframe src="{{ output_path }}"
+        class="extractor-embed singlefile-embed"
+        style="width: 100%; height: 100%; min-height: 500px; border: none;"
+        sandbox="allow-same-origin allow-scripts allow-forms">
+</iframe>
--- a/archivebox/plugins/singlefile/templates/fullscreen.html
+++ b/archivebox/plugins/singlefile/templates/fullscreen.html
@@ -0,0 +1,6 @@
+<!-- Singlefile fullscreen - full page iframe -->
+<iframe src="{{ output_path }}"
+        class="extractor-fullscreen singlefile-fullscreen"
+        style="width: 100%; height: 100vh; border: none;"
+        sandbox="allow-same-origin allow-scripts allow-forms allow-top-navigation-by-user-activation">
+</iframe>
--- a/archivebox/plugins/singlefile/templates/icon.html
+++ b/archivebox/plugins/singlefile/templates/icon.html
@@ -0,0 +1 @@
+📦
--- a/archivebox/plugins/singlefile/templates/thumbnail.html
+++ b/archivebox/plugins/singlefile/templates/thumbnail.html
@@ -0,0 +1,8 @@
+<!-- Singlefile thumbnail - scaled down iframe preview of archived HTML -->
+<div class="extractor-thumbnail singlefile-thumbnail" style="width: 100%; height: 100px; overflow: hidden; background: #fff;">
+    <iframe src="{{ output_path }}"
+            style="width: 400%; height: 400px; transform: scale(0.25); transform-origin: top left; pointer-events: none; border: none;"
+            loading="lazy"
+            sandbox="allow-same-origin">
+    </iframe>
+</div>
--- a/archivebox/plugins/staticfile/templates/icon.html
+++ b/archivebox/plugins/staticfile/templates/icon.html
@@ -0,0 +1 @@
+📁
--- a/archivebox/plugins/title/templates/icon.html
+++ b/archivebox/plugins/title/templates/icon.html
@@ -0,0 +1 @@
+📝
--- a/archivebox/plugins/wget/on_Crawl__00_install_wget.py
+++ b/archivebox/plugins/wget/on_Crawl__00_install_wget.py
@@ -1,68 +0,0 @@
-#!/usr/bin/env python3
-"""
-Install wget if not already available.
-
-Runs at crawl start to ensure wget is installed.
-Outputs JSONL for InstalledBinary.
-"""
-
-import json
-import sys
-from pathlib import Path
-
-
-def main():
-    try:
-        from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides
-
-        AptProvider.model_rebuild()
-        BrewProvider.model_rebuild()
-        EnvProvider.model_rebuild()
-
-        # wget binary and package have same name
-        wget_binary = Binary(
-            name='wget',
-            binproviders=[AptProvider(), BrewProvider(), EnvProvider()]
-        )
-
-        # Try to load, install if not found
-        try:
-            loaded = wget_binary.load()
-            if not loaded or not loaded.abspath:
-                raise Exception("Not loaded")
-        except Exception:
-            # Install via system package manager
-            loaded = wget_binary.install()
-
-        if loaded and loaded.abspath:
-            # Output InstalledBinary JSONL
-            print(json.dumps({
-                'type': 'InstalledBinary',
-                'name': 'wget',
-                'abspath': str(loaded.abspath),
-                'version': str(loaded.version) if loaded.version else None,
-                'sha256': loaded.sha256,
-                'binprovider': loaded.loaded_binprovider.name if loaded.loaded_binprovider else 'unknown',
-            }))
-            sys.exit(0)
-        else:
-            print(json.dumps({
-                'type': 'Dependency',
-                'bin_name': 'wget',
-                'bin_providers': 'apt,brew,env',
-            }))
-            print("Failed to install wget", file=sys.stderr)
-            sys.exit(1)
-
-    except Exception as e:
-        print(json.dumps({
-            'type': 'Dependency',
-            'bin_name': 'wget',
-            'bin_providers': 'apt,brew,env',
-        }))
-        print(f"Error installing wget: {e}", file=sys.stderr)
-        sys.exit(1)
-
-
-if __name__ == '__main__':
-    main()
--- a/archivebox/plugins/wget/templates/embed.html
+++ b/archivebox/plugins/wget/templates/embed.html
@@ -0,0 +1,6 @@
+<!-- Wget embed - full iframe of mirrored site -->
+<iframe src="{{ output_path }}"
+        class="extractor-embed wget-embed"
+        style="width: 100%; height: 100%; min-height: 500px; border: none;"
+        sandbox="allow-same-origin allow-scripts allow-forms">
+</iframe>
--- a/archivebox/plugins/wget/templates/fullscreen.html
+++ b/archivebox/plugins/wget/templates/fullscreen.html
@@ -0,0 +1,6 @@
+<!-- Wget fullscreen - full page iframe of mirrored site -->
+<iframe src="{{ output_path }}"
+        class="extractor-fullscreen wget-fullscreen"
+        style="width: 100%; height: 100vh; border: none;"
+        sandbox="allow-same-origin allow-scripts allow-forms allow-top-navigation-by-user-activation">
+</iframe>
--- a/archivebox/plugins/wget/templates/icon.html
+++ b/archivebox/plugins/wget/templates/icon.html
@@ -0,0 +1 @@
+📥
--- a/archivebox/plugins/wget/templates/thumbnail.html
+++ b/archivebox/plugins/wget/templates/thumbnail.html
@@ -0,0 +1,8 @@
+<!-- Wget thumbnail - scaled down iframe preview of mirrored site -->
+<div class="extractor-thumbnail wget-thumbnail" style="width: 100%; height: 100px; overflow: hidden; background: #fff;">
+    <iframe src="{{ output_path }}"
+            style="width: 400%; height: 400px; transform: scale(0.25); transform-origin: top left; pointer-events: none; border: none;"
+            loading="lazy"
+            sandbox="allow-same-origin">
+    </iframe>
+</div>
--- a/archivebox/plugins/wget/tests/test_wget.py
+++ b/archivebox/plugins/wget/tests/test_wget.py
@@ -2,8 +2,8 @@
 Integration tests for wget plugin

 Tests verify:
-1. Plugin reports missing dependency correctly
-2. wget can be installed via brew/apt provider hooks
+1. Validate hook checks for wget binary
+2. Verify deps with abx-pkg
 3. Config options work (SAVE_WGET, SAVE_WARC, etc.)
 4. Extraction works against real example.com
 5. Output files contain actual page content
@@ -26,7 +26,7 @@ import pytest
 PLUGIN_DIR = Path(__file__).parent.parent
 PLUGINS_ROOT = PLUGIN_DIR.parent
 WGET_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_wget.py'))
-WGET_INSTALL_HOOK = PLUGIN_DIR / 'on_Crawl__00_install_wget.py'
+WGET_VALIDATE_HOOK = PLUGIN_DIR / 'on_Crawl__00_validate_wget.py'
 BREW_HOOK = PLUGINS_ROOT / 'brew' / 'on_Dependency__install_using_brew_provider.py'
 APT_HOOK = PLUGINS_ROOT / 'apt' / 'on_Dependency__install_using_apt_provider.py'
 TEST_URL = 'https://example.com'
@@ -37,45 +37,59 @@ def test_hook_script_exists():
    assert WGET_HOOK.exists(), f"Hook script not found: {WGET_HOOK}"


-def test_wget_install_hook():
-    """Test wget install hook to install wget if needed."""
+def test_wget_validate_hook():
+    """Test wget validate hook checks for wget binary."""
    result = subprocess.run(
-        [sys.executable, str(WGET_INSTALL_HOOK)],
+        [sys.executable, str(WGET_VALIDATE_HOOK)],
        capture_output=True,
        text=True,
-        timeout=600
+        timeout=30
    )

-    assert result.returncode == 0, f"Install hook failed: {result.stderr}"
-
-    # Verify InstalledBinary JSONL output
-    found_binary = False
-    for line in result.stdout.strip().split('\n'):
-        if line.strip():
-            try:
-                record = json.loads(line)
-                if record.get('type') == 'InstalledBinary':
-                    assert record['name'] == 'wget'
-                    assert record['abspath']
-                    found_binary = True
-                    break
-            except json.JSONDecodeError:
-                pass
-
-    assert found_binary, "Should output InstalledBinary record"
+    # Hook exits 0 if binary found, 1 if not found (with Dependency record)
+    if result.returncode == 0:
+        # Binary found - verify InstalledBinary JSONL output
+        found_binary = False
+        for line in result.stdout.strip().split('\n'):
+            if line.strip():
+                try:
+                    record = json.loads(line)
+                    if record.get('type') == 'InstalledBinary':
+                        assert record['name'] == 'wget'
+                        assert record['abspath']
+                        found_binary = True
+                        break
+                except json.JSONDecodeError:
+                    pass
+        assert found_binary, "Should output InstalledBinary record when binary found"
+    else:
+        # Binary not found - verify Dependency JSONL output
+        found_dependency = False
+        for line in result.stdout.strip().split('\n'):
+            if line.strip():
+                try:
+                    record = json.loads(line)
+                    if record.get('type') == 'Dependency':
+                        assert record['bin_name'] == 'wget'
+                        assert 'env' in record['bin_providers']
+                        found_dependency = True
+                        break
+                except json.JSONDecodeError:
+                    pass
+        assert found_dependency, "Should output Dependency record when binary not found"


 def test_verify_deps_with_abx_pkg():
-    """Verify wget is available via abx-pkg after hook installation."""
-    from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider
-
-    AptProvider.model_rebuild()
-    BrewProvider.model_rebuild()
-    EnvProvider.model_rebuild()
+    """Verify wget is available via abx-pkg."""
+    from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides

    wget_binary = Binary(name='wget', binproviders=[AptProvider(), BrewProvider(), EnvProvider()])
    wget_loaded = wget_binary.load()
-    assert wget_loaded and wget_loaded.abspath, "wget should be available after install hook"
+
+    if wget_loaded and wget_loaded.abspath:
+        assert True, "wget is available"
+    else:
+        pytest.skip("wget not available - Dependency record should have been emitted")


 def test_reports_missing_dependency_when_not_installed():
--- a/archivebox/templates/admin/base.html
+++ b/archivebox/templates/admin/base.html
@@ -110,6 +110,10 @@
                    {% block nav-global %}{% endblock %}
                </div>

+                {% if has_permission %}
+                    {% include 'admin/progress_monitor.html' %}
+                {% endif %}
+
                {% block breadcrumbs %}
                    <div class="breadcrumbs">
                        <a href="{% url 'admin:index' %}">{% trans 'Home' %}</a>
--- a/archivebox/templates/admin/progress_monitor.html
+++ b/archivebox/templates/admin/progress_monitor.html
@@ -0,0 +1,648 @@
+<style>
+    /* Progress Monitor Container */
+    #progress-monitor {
+        background: linear-gradient(135deg, #0d1117 0%, #161b22 100%);
+        color: #c9d1d9;
+        font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Noto Sans', Helvetica, Arial, sans-serif;
+        font-size: 12px;
+        border-bottom: 1px solid #30363d;
+        position: relative;
+        z-index: 100;
+    }
+    #progress-monitor.hidden {
+        display: none;
+    }
+    #progress-monitor .tree-container {
+        max-height: 350px;
+        overflow-y: auto;
+    }
+
+    /* Header Bar */
+    #progress-monitor .header-bar {
+        display: flex;
+        justify-content: space-between;
+        align-items: center;
+        padding: 8px 16px;
+        background: rgba(0,0,0,0.2);
+        border-bottom: 1px solid #30363d;
+        position: sticky;
+        top: 0;
+        z-index: 10;
+    }
+    #progress-monitor .header-left {
+        display: flex;
+        align-items: center;
+        gap: 16px;
+    }
+    #progress-monitor .header-right {
+        display: flex;
+        align-items: center;
+        gap: 12px;
+    }
+
+    /* Orchestrator Status */
+    #progress-monitor .orchestrator-status {
+        display: flex;
+        align-items: center;
+        gap: 6px;
+    }
+    #progress-monitor .status-dot {
+        width: 8px;
+        height: 8px;
+        border-radius: 50%;
+        flex-shrink: 0;
+    }
+    #progress-monitor .status-dot.running {
+        background: #3fb950;
+        box-shadow: 0 0 8px #3fb950;
+        animation: pulse 2s infinite;
+    }
+    #progress-monitor .status-dot.stopped {
+        background: #f85149;
+    }
+    @keyframes pulse {
+        0%, 100% { opacity: 1; box-shadow: 0 0 8px #3fb950; }
+        50% { opacity: 0.6; box-shadow: 0 0 4px #3fb950; }
+    }
+
+    /* Stats */
+    #progress-monitor .stats {
+        display: flex;
+        gap: 16px;
+    }
+    #progress-monitor .stat {
+        display: flex;
+        align-items: center;
+        gap: 4px;
+    }
+    #progress-monitor .stat-label {
+        color: #8b949e;
+        font-size: 10px;
+        text-transform: uppercase;
+        letter-spacing: 0.5px;
+    }
+    #progress-monitor .stat-value {
+        font-weight: 600;
+        font-variant-numeric: tabular-nums;
+    }
+    #progress-monitor .stat-value.success { color: #3fb950; }
+    #progress-monitor .stat-value.error { color: #f85149; }
+    #progress-monitor .stat-value.warning { color: #d29922; }
+    #progress-monitor .stat-value.info { color: #58a6ff; }
+
+    /* Toggle Button */
+    #progress-monitor .toggle-btn {
+        background: transparent;
+        border: 1px solid #30363d;
+        color: #8b949e;
+        cursor: pointer;
+        padding: 4px 8px;
+        border-radius: 6px;
+        font-size: 11px;
+        transition: all 0.2s;
+    }
+    #progress-monitor .toggle-btn:hover {
+        background: #21262d;
+        color: #c9d1d9;
+        border-color: #8b949e;
+    }
+
+    /* Tree Container */
+    #progress-monitor .tree-container {
+        padding: 12px 16px;
+    }
+    #progress-monitor.collapsed .tree-container {
+        display: none;
+    }
+
+    /* Idle Message */
+    #progress-monitor .idle-message {
+        color: #8b949e;
+        font-style: italic;
+        padding: 8px 0;
+        text-align: center;
+    }
+
+    /* Crawl Item */
+    #progress-monitor .crawl-item {
+        background: #161b22;
+        border: 1px solid #30363d;
+        border-radius: 8px;
+        margin-bottom: 12px;
+        overflow: hidden;
+    }
+    #progress-monitor .crawl-header {
+        display: flex;
+        align-items: center;
+        gap: 12px;
+        padding: 10px 14px;
+        background: rgba(0,0,0,0.2);
+        cursor: pointer;
+    }
+    #progress-monitor .crawl-header:hover {
+        background: rgba(88, 166, 255, 0.1);
+    }
+    #progress-monitor .crawl-icon {
+        font-size: 16px;
+        width: 20px;
+        text-align: center;
+    }
+    #progress-monitor .crawl-info {
+        flex: 1;
+        min-width: 0;
+    }
+    #progress-monitor .crawl-label {
+        font-weight: 600;
+        color: #58a6ff;
+        white-space: nowrap;
+        overflow: hidden;
+        text-overflow: ellipsis;
+    }
+    #progress-monitor .crawl-meta {
+        font-size: 11px;
+        color: #8b949e;
+        margin-top: 2px;
+    }
+    #progress-monitor .crawl-stats {
+        display: flex;
+        gap: 12px;
+        font-size: 11px;
+    }
+
+    /* Progress Bar */
+    #progress-monitor .progress-bar-container {
+        height: 4px;
+        background: #21262d;
+        border-radius: 2px;
+        overflow: hidden;
+        position: relative;
+    }
+    #progress-monitor .progress-bar {
+        height: 100%;
+        border-radius: 2px;
+        transition: width 0.5s ease-out;
+        position: relative;
+    }
+    #progress-monitor .progress-bar.crawl {
+        background: linear-gradient(90deg, #238636 0%, #3fb950 100%);
+    }
+    #progress-monitor .progress-bar.snapshot {
+        background: linear-gradient(90deg, #1f6feb 0%, #58a6ff 100%);
+    }
+    #progress-monitor .progress-bar.extractor {
+        background: linear-gradient(90deg, #8957e5 0%, #a371f7 100%);
+    }
+    #progress-monitor .progress-bar.indeterminate {
+        background: linear-gradient(90deg, transparent 0%, #58a6ff 50%, transparent 100%);
+        animation: indeterminate 1.5s infinite linear;
+        width: 30% !important;
+    }
+    @keyframes indeterminate {
+        0% { transform: translateX(-100%); }
+        100% { transform: translateX(400%); }
+    }
+
+    /* Crawl Body */
+    #progress-monitor .crawl-body {
+        padding: 0 14px 14px;
+    }
+    #progress-monitor .crawl-progress {
+        padding: 10px 14px;
+        border-bottom: 1px solid #21262d;
+    }
+
+    /* Snapshot List */
+    #progress-monitor .snapshot-list {
+        margin-top: 8px;
+    }
+    #progress-monitor .snapshot-item {
+        background: #0d1117;
+        border: 1px solid #21262d;
+        border-radius: 6px;
+        margin-bottom: 8px;
+        overflow: hidden;
+    }
+    #progress-monitor .snapshot-header {
+        display: flex;
+        align-items: center;
+        gap: 10px;
+        padding: 8px 12px;
+        cursor: pointer;
+    }
+    #progress-monitor .snapshot-header:hover {
+        background: rgba(88, 166, 255, 0.05);
+    }
+    #progress-monitor .snapshot-icon {
+        font-size: 14px;
+        width: 18px;
+        text-align: center;
+        color: #58a6ff;
+    }
+    #progress-monitor .snapshot-info {
+        flex: 1;
+        min-width: 0;
+    }
+    #progress-monitor .snapshot-url {
+        font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
+        font-size: 11px;
+        color: #c9d1d9;
+        white-space: nowrap;
+        overflow: hidden;
+        text-overflow: ellipsis;
+    }
+    #progress-monitor .snapshot-meta {
+        font-size: 10px;
+        color: #8b949e;
+        margin-top: 2px;
+    }
+    #progress-monitor .snapshot-progress {
+        padding: 0 12px 8px;
+    }
+
+    /* Extractor List */
+    #progress-monitor .extractor-list {
+        padding: 8px 12px;
+        background: rgba(0,0,0,0.2);
+        border-top: 1px solid #21262d;
+    }
+    #progress-monitor .extractor-item {
+        display: flex;
+        align-items: center;
+        gap: 8px;
+        padding: 4px 0;
+    }
+    #progress-monitor .extractor-icon {
+        font-size: 12px;
+        width: 16px;
+        text-align: center;
+    }
+    #progress-monitor .extractor-icon.running {
+        color: #d29922;
+        animation: spin 1s linear infinite;
+    }
+    #progress-monitor .extractor-icon.success {
+        color: #3fb950;
+    }
+    #progress-monitor .extractor-icon.failed {
+        color: #f85149;
+    }
+    #progress-monitor .extractor-icon.pending {
+        color: #8b949e;
+    }
+    @keyframes spin {
+        from { transform: rotate(0deg); }
+        to { transform: rotate(360deg); }
+    }
+    #progress-monitor .extractor-name {
+        flex: 1;
+        font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
+        font-size: 11px;
+    }
+    #progress-monitor .extractor-progress {
+        width: 60px;
+    }
+
+    /* Status Badge */
+    #progress-monitor .status-badge {
+        font-size: 10px;
+        padding: 2px 6px;
+        border-radius: 10px;
+        font-weight: 500;
+        text-transform: uppercase;
+        letter-spacing: 0.3px;
+    }
+    #progress-monitor .status-badge.queued {
+        background: #21262d;
+        color: #8b949e;
+    }
+    #progress-monitor .status-badge.started {
+        background: rgba(210, 153, 34, 0.2);
+        color: #d29922;
+    }
+    #progress-monitor .status-badge.sealed,
+    #progress-monitor .status-badge.succeeded {
+        background: rgba(63, 185, 80, 0.2);
+        color: #3fb950;
+    }
+    #progress-monitor .status-badge.failed {
+        background: rgba(248, 81, 73, 0.2);
+        color: #f85149;
+    }
+
+    /* Expand/Collapse Icons */
+    #progress-monitor .expand-icon {
+        color: #8b949e;
+        font-size: 10px;
+        transition: transform 0.2s;
+    }
+    #progress-monitor .expand-icon.expanded {
+        transform: rotate(90deg);
+    }
+</style>
+
+<div id="progress-monitor">
+    <div class="header-bar">
+        <div class="header-left">
+            <div class="orchestrator-status">
+                <span class="status-dot stopped" id="orchestrator-dot"></span>
+                <span id="orchestrator-text">Stopped</span>
+            </div>
+            <div class="stats">
+                <div class="stat">
+                    <span class="stat-label">Workers</span>
+                    <span class="stat-value info" id="worker-count">0</span>
+                </div>
+                <div class="stat">
+                    <span class="stat-label">Queued</span>
+                    <span class="stat-value warning" id="total-queued">0</span>
+                </div>
+                <div class="stat">
+                    <span class="stat-label">Done</span>
+                    <span class="stat-value success" id="total-succeeded">0</span>
+                </div>
+                <div class="stat">
+                    <span class="stat-label">Failed</span>
+                    <span class="stat-value error" id="total-failed">0</span>
+                </div>
+            </div>
+        </div>
+        <div class="header-right">
+            <button class="toggle-btn" id="progress-collapse" title="Toggle details">Details</button>
+        </div>
+    </div>
+
+    <div class="tree-container" id="tree-container">
+        <div class="idle-message" id="idle-message">No active crawls</div>
+        <div id="crawl-tree"></div>
+    </div>
+</div>
+
+<script>
+(function() {
+    const monitor = document.getElementById('progress-monitor');
+    const collapseBtn = document.getElementById('progress-collapse');
+    const treeContainer = document.getElementById('tree-container');
+    const crawlTree = document.getElementById('crawl-tree');
+    const idleMessage = document.getElementById('idle-message');
+
+    let pollInterval = null;
+    let isCollapsed = localStorage.getItem('progress-monitor-collapsed') === 'true';
+    let expandedCrawls = new Set(JSON.parse(localStorage.getItem('progress-monitor-expanded-crawls') || '[]'));
+    let expandedSnapshots = new Set(JSON.parse(localStorage.getItem('progress-monitor-expanded-snapshots') || '[]'));
+
+    function formatUrl(url) {
+        try {
+            const u = new URL(url);
+            return u.hostname + u.pathname.substring(0, 30) + (u.pathname.length > 30 ? '...' : '');
+        } catch {
+            return url.substring(0, 50) + (url.length > 50 ? '...' : '');
+        }
+    }
+
+    function renderExtractor(extractor) {
+        const iconClass = extractor.status === 'started' ? 'running' :
+                         extractor.status === 'succeeded' ? 'success' :
+                         extractor.status === 'failed' ? 'failed' : 'pending';
+        const icon = extractor.status === 'started' ? '&#8635;' :
+                    extractor.status === 'succeeded' ? '&#10003;' :
+                    extractor.status === 'failed' ? '&#10007;' : '&#9675;';
+
+        return `
+            <div class="extractor-item">
+                <span class="extractor-icon ${iconClass}">${icon}</span>
+                <span class="extractor-name">${extractor.extractor}</span>
+                <div class="extractor-progress">
+                    <div class="progress-bar-container">
+                        <div class="progress-bar extractor ${extractor.status === 'started' ? 'indeterminate' : ''}"
+                             style="width: ${extractor.status === 'succeeded' ? '100' : extractor.status === 'failed' ? '100' : extractor.progress}%"></div>
+                    </div>
+                </div>
+            </div>
+        `;
+    }
+
+    function renderSnapshot(snapshot, crawlId) {
+        const snapshotKey = `${crawlId}-${snapshot.id}`;
+        const isExpanded = expandedSnapshots.has(snapshotKey);
+        const statusIcon = snapshot.status === 'started' ? '&#8635;' : '&#128196;';
+
+        let extractorHtml = '';
+        if (snapshot.active_extractors && snapshot.active_extractors.length > 0) {
+            extractorHtml = `
+                <div class="extractor-list" style="${isExpanded ? '' : 'display:none'}">
+                    ${snapshot.active_extractors.map(e => renderExtractor(e)).join('')}
+                </div>
+            `;
+        }
+
+        return `
+            <div class="snapshot-item" data-snapshot-key="${snapshotKey}">
+                <div class="snapshot-header" onclick="window.toggleSnapshot('${snapshotKey}')">
+                    <span class="expand-icon ${isExpanded ? 'expanded' : ''}">${snapshot.active_extractors?.length ? '&#9654;' : ''}</span>
+                    <span class="snapshot-icon">${statusIcon}</span>
+                    <div class="snapshot-info">
+                        <div class="snapshot-url">${formatUrl(snapshot.url)}</div>
+                        <div class="snapshot-meta">
+                            ${snapshot.completed_extractors}/${snapshot.total_extractors} extractors
+                            ${snapshot.failed_extractors > 0 ? `<span style="color:#f85149">(${snapshot.failed_extractors} failed)</span>` : ''}
+                        </div>
+                    </div>
+                    <span class="status-badge ${snapshot.status}">${snapshot.status}</span>
+                </div>
+                <div class="snapshot-progress">
+                    <div class="progress-bar-container">
+                        <div class="progress-bar snapshot ${snapshot.status === 'started' && snapshot.progress === 0 ? 'indeterminate' : ''}"
+                             style="width: ${snapshot.progress}%"></div>
+                    </div>
+                </div>
+                ${extractorHtml}
+            </div>
+        `;
+    }
+
+    function renderCrawl(crawl) {
+        const isExpanded = expandedCrawls.has(crawl.id);
+        const statusIcon = crawl.status === 'started' ? '&#8635;' : '&#128269;';
+
+        let snapshotsHtml = '';
+        if (crawl.active_snapshots && crawl.active_snapshots.length > 0) {
+            snapshotsHtml = crawl.active_snapshots.map(s => renderSnapshot(s, crawl.id)).join('');
+        }
+
+        return `
+            <div class="crawl-item" data-crawl-id="${crawl.id}">
+                <div class="crawl-header" onclick="window.toggleCrawl('${crawl.id}')">
+                    <span class="expand-icon ${isExpanded ? 'expanded' : ''}">${crawl.active_snapshots?.length ? '&#9654;' : ''}</span>
+                    <span class="crawl-icon">${statusIcon}</span>
+                    <div class="crawl-info">
+                        <div class="crawl-label">${crawl.label}</div>
+                        <div class="crawl-meta">depth: ${crawl.max_depth} | ${crawl.total_snapshots} snapshots</div>
+                    </div>
+                    <div class="crawl-stats">
+                        <span style="color:#3fb950">${crawl.completed_snapshots} done</span>
+                        <span style="color:#8b949e">${crawl.pending_snapshots} pending</span>
+                    </div>
+                    <span class="status-badge ${crawl.status}">${crawl.status}</span>
+                </div>
+                <div class="crawl-progress">
+                    <div class="progress-bar-container">
+                        <div class="progress-bar crawl ${crawl.status === 'started' && crawl.progress === 0 ? 'indeterminate' : ''}"
+                             style="width: ${crawl.progress}%"></div>
+                    </div>
+                </div>
+                <div class="crawl-body" style="${isExpanded ? '' : 'display:none'}">
+                    <div class="snapshot-list">
+                        ${snapshotsHtml}
+                    </div>
+                </div>
+            </div>
+        `;
+    }
+
+    window.toggleCrawl = function(crawlId) {
+        const item = document.querySelector(`[data-crawl-id="${crawlId}"]`);
+        const body = item.querySelector('.crawl-body');
+        const icon = item.querySelector('.expand-icon');
+
+        if (expandedCrawls.has(crawlId)) {
+            expandedCrawls.delete(crawlId);
+            body.style.display = 'none';
+            icon.classList.remove('expanded');
+        } else {
+            expandedCrawls.add(crawlId);
+            body.style.display = '';
+            icon.classList.add('expanded');
+        }
+        localStorage.setItem('progress-monitor-expanded-crawls', JSON.stringify([...expandedCrawls]));
+    };
+
+    window.toggleSnapshot = function(snapshotKey) {
+        const item = document.querySelector(`[data-snapshot-key="${snapshotKey}"]`);
+        const extractorList = item.querySelector('.extractor-list');
+        const icon = item.querySelector('.expand-icon');
+
+        if (!extractorList) return;
+
+        if (expandedSnapshots.has(snapshotKey)) {
+            expandedSnapshots.delete(snapshotKey);
+            extractorList.style.display = 'none';
+            icon.classList.remove('expanded');
+        } else {
+            expandedSnapshots.add(snapshotKey);
+            extractorList.style.display = '';
+            icon.classList.add('expanded');
+        }
+        localStorage.setItem('progress-monitor-expanded-snapshots', JSON.stringify([...expandedSnapshots]));
+    };
+
+    function updateProgress(data) {
+        // Calculate if there's activity
+        const hasActivity = data.active_crawls.length > 0 ||
+                           data.crawls_pending > 0 || data.crawls_started > 0 ||
+                           data.snapshots_pending > 0 || data.snapshots_started > 0 ||
+                           data.archiveresults_pending > 0 || data.archiveresults_started > 0;
+
+        // Update orchestrator status
+        const dot = document.getElementById('orchestrator-dot');
+        const text = document.getElementById('orchestrator-text');
+        if (data.orchestrator_running) {
+            dot.classList.remove('stopped');
+            dot.classList.add('running');
+            text.textContent = 'Running';
+        } else {
+            dot.classList.remove('running');
+            dot.classList.add('stopped');
+            text.textContent = 'Stopped';
+        }
+
+        // Update stats
+        document.getElementById('worker-count').textContent = data.total_workers;
+        document.getElementById('total-queued').textContent =
+            data.crawls_pending + data.snapshots_pending + data.archiveresults_pending;
+        document.getElementById('total-succeeded').textContent = data.archiveresults_succeeded;
+        document.getElementById('total-failed').textContent = data.archiveresults_failed;
+
+        // Render crawl tree
+        if (data.active_crawls.length > 0) {
+            idleMessage.style.display = 'none';
+            crawlTree.innerHTML = data.active_crawls.map(c => renderCrawl(c)).join('');
+        } else if (hasActivity) {
+            idleMessage.style.display = 'none';
+            crawlTree.innerHTML = `
+                <div class="idle-message">
+                    ${data.snapshots_started} snapshots processing, ${data.archiveresults_started} extractors running
+                </div>
+            `;
+        } else {
+            idleMessage.style.display = '';
+            // Build the URL for recent crawls (last 24 hours)
+            var yesterday = new Date(Date.now() - 24*60*60*1000).toISOString().split('T')[0];
+            var recentUrl = '/admin/crawls/crawl/?created_at__gte=' + yesterday + '&o=-1';
+            idleMessage.innerHTML = `No active crawls (${data.crawls_pending} pending, ${data.crawls_started} started, <a href="${recentUrl}" style="color: #58a6ff;">${data.crawls_recent} recent</a>)`;
+            crawlTree.innerHTML = '';
+        }
+    }
+
+    function fetchProgress() {
+        fetch('/admin/live-progress/')
+            .then(response => response.json())
+            .then(data => {
+                if (data.error) {
+                    console.error('Progress API error:', data.error, data.traceback);
+                    idleMessage.textContent = 'API Error: ' + data.error;
+                    idleMessage.style.color = '#f85149';
+                }
+                updateProgress(data);
+            })
+            .catch(error => {
+                console.error('Progress fetch error:', error);
+                idleMessage.textContent = 'Fetch Error: ' + error.message;
+                idleMessage.style.color = '#f85149';
+            });
+    }
+
+    function startPolling() {
+        if (pollInterval) return;
+        fetchProgress();
+        pollInterval = setInterval(fetchProgress, 1000);  // Poll every 1 second
+    }
+
+    function stopPolling() {
+        if (pollInterval) {
+            clearInterval(pollInterval);
+            pollInterval = null;
+        }
+    }
+
+    // Collapse toggle
+    collapseBtn.addEventListener('click', function() {
+        isCollapsed = !isCollapsed;
+        localStorage.setItem('progress-monitor-collapsed', isCollapsed);
+        if (isCollapsed) {
+            monitor.classList.add('collapsed');
+            collapseBtn.textContent = 'Expand';
+        } else {
+            monitor.classList.remove('collapsed');
+            collapseBtn.textContent = 'Details';
+        }
+    });
+
+    // Apply initial state
+    if (isCollapsed) {
+        monitor.classList.add('collapsed');
+        collapseBtn.textContent = 'Expand';
+    }
+
+    // Start polling when page loads
+    startPolling();
+
+    // Pause polling when tab is hidden
+    document.addEventListener('visibilitychange', function() {
+        if (document.hidden) {
+            stopPolling();
+        } else {
+            startPolling();
+        }
+    });
+})();
+</script>
--- a/archivebox/templates/core/snapshot_live.html
+++ b/archivebox/templates/core/snapshot_live.html
@@ -192,6 +192,42 @@
                border: 0px;
                border-top: 3px solid #aa1e55;
            }
+            #main-frame-wrapper {
+                width: 100%;
+                height: calc(100vh - 210px);
+                border-top: 3px solid #aa1e55;
+                overflow: hidden;
+            }
+            #main-frame-wrapper iframe {
+                width: 100%;
+                height: 100%;
+                border: none;
+            }
+            .full-page-wrapper {
+                width: 100%;
+                height: calc(100vh - 210px);
+            }
+            .thumbnail-wrapper {
+                height: 100px;
+                overflow: hidden;
+                background-color: #333;
+                pointer-events: none;
+            }
+            .thumbnail-wrapper iframe {
+                width: 405%;
+                height: 430px;
+                margin-bottom: -330px;
+                margin-left: -1%;
+                transform: scale(0.25);
+                transform-origin: 0 0;
+                border: none;
+            }
+            .thumbnail-wrapper img {
+                width: 100%;
+                height: 100%;
+                object-fit: cover;
+                object-position: top center;
+            }
            .card.selected-card {
                border: 2px solid orange;
                box-shadow: 0px -6px 13px 1px rgba(0,0,0,0.05);
@@ -403,12 +439,18 @@
                            <div class="card {% if forloop.first %}selected-card{% endif %}">
                                <div class="card-body">
                                    <a href="{{result.path|urlencode}}" target="preview" title="./{{result.path}} (downloaded {{result.ts}})">
-                                        <h4>{{result.name|truncatechars:24}} <small>({{result.size|filesizeformat}})</small></h4>
-                                        <!-- <p class="card-text" ><code>./{{result.path|truncatechars:30}}</code></p> -->
+                                        <h4>{% extractor_icon result.name %} {{result.name|extractor_name|truncatechars:20}} <small>({{result.size|filesizeformat}})</small></h4>
                                    </a>
-                                    <!--<a href="{{result.path}}" target="preview"><h4 class="card-title">{{result.name}}</h4></a>-->
                                </div>
-                                <iframe class="card-img-top" src="{{result.path|urlencode}}?autoplay=0" allow="autoplay 'none'; fullscreen 'none'; navigation-override 'none'; " sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" scrolling="no" loading="lazy"></iframe>
+                                {% if result.result %}
+                                    {# Use plugin-specific thumbnail template when ArchiveResult is available #}
+                                    <div class="card-img-top thumbnail-wrapper">
+                                        {% extractor_thumbnail result.result %}
+                                    </div>
+                                {% else %}
+                                    {# Fall back to generic iframe for filesystem-discovered files #}
+                                    <iframe class="card-img-top" src="{{result.path|urlencode}}?autoplay=0" allow="autoplay 'none'; fullscreen 'none'; navigation-override 'none'; " sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" scrolling="no" loading="lazy"></iframe>
+                                {% endif %}
                            </div>
                        </div>
                    {% endfor %}
@@ -431,7 +473,15 @@



-        <iframe id="main-frame" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" class="full-page-iframe" src="{{best_result.path|urlencode}}" name="preview"></iframe>
+        {% if best_result.result %}
+            {# Use plugin-specific fullscreen template when ArchiveResult is available #}
+            <div id="main-frame-wrapper" class="full-page-wrapper">
+                {% extractor_fullscreen best_result.result %}
+            </div>
+        {% else %}
+            {# Fall back to generic iframe #}
+            <iframe id="main-frame" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" class="full-page-iframe" src="{{best_result.path|urlencode}}" name="preview"></iframe>
+        {% endif %}
    


--- a/archivebox/workers/admin.py
+++ b/archivebox/workers/admin.py
@@ -1,23 +1,13 @@
+"""
+Workers admin module.
+
+The orchestrator/worker system doesn't need Django admin registration
+as workers are managed via CLI commands and the orchestrator.
+"""
+
 __package__ = 'archivebox.workers'

-from django.contrib.auth import get_permission_codename
-
-from huey_monitor.apps import HueyMonitorConfig
-from huey_monitor.admin import TaskModel, TaskModelAdmin, SignalInfoModel, SignalInfoModelAdmin
-
-
-HueyMonitorConfig.verbose_name = 'Background Workers'
-
-
-class CustomTaskModelAdmin(TaskModelAdmin):
-    actions = ["delete_selected"]
-
-    def has_delete_permission(self, request, obj=None):
-        codename = get_permission_codename("delete", self.opts)
-        return request.user.has_perm("%s.%s" % (self.opts.app_label, codename))
-
-

 def register_admin(admin_site):
-    admin_site.register(TaskModel, CustomTaskModelAdmin)
-    admin_site.register(SignalInfoModel, SignalInfoModelAdmin)
+    """No models to register - workers are process-based, not Django models."""
+    pass
--- a/archivebox/workers/management/init.py
+++ b/archivebox/workers/management/init.py
--- a/archivebox/workers/management/commands/init.py
+++ b/archivebox/workers/management/commands/init.py
--- a/archivebox/workers/management/commands/orchestrator.py
+++ b/archivebox/workers/management/commands/orchestrator.py
@@ -0,0 +1,15 @@
+from django.core.management.base import BaseCommand
+
+from workers.orchestrator import Orchestrator
+
+
+class Command(BaseCommand):
+    help = 'Run the archivebox orchestrator'
+
+    def add_arguments(self, parser):
+        parser.add_argument('--daemon', '-d', action='store_true', help="Run forever (don't exit on idle)")
+
+    def handle(self, *args, **kwargs):
+        daemon = kwargs.get('daemon', False)
+        orchestrator = Orchestrator(exit_on_idle=not daemon)
+        orchestrator.runloop()
--- a/Show More
+++ b/Show More