switch to external plugins

2026-04-05 23:37:58 +10:00 · 2026-03-15 03:45:51 -07:00
parent 07dc880d0b
commit ecb1764590
256 changed files with 516 additions and 31272 deletions
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -30,7 +30,8 @@
      "WebFetch(domain:python-statemachine.readthedocs.io)",
      "Bash(./bin/run_plugin_tests.sh:*)",
      "Bash(done)",
-      "Bash(coverage erase:*)"
+      "Bash(coverage erase:*)",
+      "Bash(gh api:*)"
    ]
  },
  "hooks": {
--- a/README.md
+++ b/README.md
@@ -491,6 +491,7 @@ docker run -it -v $PWD:/data archivebox/archivebox help
 # optional: import your browser cookies into a persona for logged-in archiving
 archivebox persona create --import=chrome personal
 # supported: chrome/chromium/brave/edge (Chromium-based only)
+# use --profile to target a specific profile (e.g. Default, Profile 1)
 # re-running import merges/dedupes cookies.txt (by domain/path/name) but replaces chrome_user_data
 ```

--- a/archivebox/init.py
+++ b/archivebox/init.py
@@ -18,6 +18,7 @@ from pathlib import Path
 # Import uuid_compat early to monkey-patch uuid.uuid7 before Django loads migrations
 # This fixes migrations generated on Python 3.14+ that reference uuid.uuid7 directly
 from archivebox import uuid_compat  # noqa: F401
+from abx_plugins import get_plugins_dir

 # Force unbuffered output for real-time logs
 if hasattr(sys.stdout, 'reconfigure'):
@@ -56,9 +57,13 @@ check_io_encoding()
 # Install monkey patches for third-party libraries
 from .misc.monkey_patches import *                    # noqa

-# Built-in plugin directories
-BUILTIN_PLUGINS_DIR = PACKAGE_DIR / 'plugins'
-USER_PLUGINS_DIR = Path(os.getcwd()) / 'plugins'
+# Plugin directories
+BUILTIN_PLUGINS_DIR = Path(get_plugins_dir()).resolve()
+USER_PLUGINS_DIR = Path(
+    os.environ.get('ARCHIVEBOX_USER_PLUGINS_DIR')
+    or os.environ.get('USER_PLUGINS_DIR')
+    or os.environ.get('DATA_DIR', os.getcwd())
+) / 'custom_plugins'

 # These are kept for backwards compatibility with existing code
 # that checks for plugins. The new hook system uses discover_hooks()
--- a/archivebox/cli/archivebox_persona.py
+++ b/archivebox/cli/archivebox_persona.py
@@ -33,6 +33,7 @@ import shutil
 import platform
 import subprocess
 import tempfile
+import json
 from pathlib import Path
 from typing import Optional, Iterable
 from collections import OrderedDict
@@ -138,6 +139,55 @@ def get_edge_user_data_dir() -> Optional[Path]:
    return None


+def get_browser_binary(browser: str) -> Optional[str]:
+    system = platform.system()
+    home = Path.home()
+    browser = browser.lower()
+
+    if system == 'Darwin':
+        candidates = {
+            'chrome': ['/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'],
+            'chromium': ['/Applications/Chromium.app/Contents/MacOS/Chromium'],
+            'brave': ['/Applications/Brave Browser.app/Contents/MacOS/Brave Browser'],
+            'edge': ['/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge'],
+        }.get(browser, [])
+    elif system == 'Linux':
+        candidates = {
+            'chrome': ['/usr/bin/google-chrome', '/usr/bin/google-chrome-stable', '/usr/bin/google-chrome-beta', '/usr/bin/google-chrome-unstable'],
+            'chromium': ['/usr/bin/chromium', '/usr/bin/chromium-browser'],
+            'brave': ['/usr/bin/brave-browser', '/usr/bin/brave-browser-beta', '/usr/bin/brave-browser-nightly'],
+            'edge': ['/usr/bin/microsoft-edge', '/usr/bin/microsoft-edge-stable', '/usr/bin/microsoft-edge-beta', '/usr/bin/microsoft-edge-dev'],
+        }.get(browser, [])
+    elif system == 'Windows':
+        local_app_data = Path(os.environ.get('LOCALAPPDATA', home / 'AppData' / 'Local'))
+        candidates = {
+            'chrome': [
+                str(local_app_data / 'Google' / 'Chrome' / 'Application' / 'chrome.exe'),
+                'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe',
+                'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe',
+            ],
+            'chromium': [str(local_app_data / 'Chromium' / 'Application' / 'chrome.exe')],
+            'brave': [
+                str(local_app_data / 'BraveSoftware' / 'Brave-Browser' / 'Application' / 'brave.exe'),
+                'C:\\Program Files\\BraveSoftware\\Brave-Browser\\Application\\brave.exe',
+                'C:\\Program Files (x86)\\BraveSoftware\\Brave-Browser\\Application\\brave.exe',
+            ],
+            'edge': [
+                str(local_app_data / 'Microsoft' / 'Edge' / 'Application' / 'msedge.exe'),
+                'C:\\Program Files\\Microsoft\\Edge\\Application\\msedge.exe',
+                'C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe',
+            ],
+        }.get(browser, [])
+    else:
+        candidates = []
+
+    for candidate in candidates:
+        if candidate and Path(candidate).exists():
+            return candidate
+
+    return None
+
+
 BROWSER_PROFILE_FINDERS = {
    'chrome': get_chrome_user_data_dir,
    'chromium': get_chrome_user_data_dir,  # Same locations
@@ -194,7 +244,12 @@ def _merge_netscape_cookies(existing_file: Path, new_file: Path) -> None:
    _write_netscape_cookies(existing_file, existing)


-def extract_cookies_via_cdp(user_data_dir: Path, output_file: Path) -> bool:
+def extract_cookies_via_cdp(
+    user_data_dir: Path,
+    output_file: Path,
+    profile_dir: str | None = None,
+    chrome_binary: str | None = None,
+) -> bool:
    """
    Launch Chrome with the given user data dir and extract cookies via CDP.

@@ -218,6 +273,8 @@ def extract_cookies_via_cdp(user_data_dir: Path, output_file: Path) -> bool:
    env['NODE_MODULES_DIR'] = str(node_modules_dir)
    env['CHROME_USER_DATA_DIR'] = str(user_data_dir)
    env['CHROME_HEADLESS'] = 'true'
+    if chrome_binary:
+        env['CHROME_BINARY'] = str(chrome_binary)
    output_path = output_file
    temp_output = None
    temp_dir = None
@@ -225,6 +282,23 @@ def extract_cookies_via_cdp(user_data_dir: Path, output_file: Path) -> bool:
        temp_dir = Path(tempfile.mkdtemp(prefix='ab_cookies_'))
        temp_output = temp_dir / 'cookies.txt'
        output_path = temp_output
+    if profile_dir:
+        extra_arg = f'--profile-directory={profile_dir}'
+        existing_extra = env.get('CHROME_ARGS_EXTRA', '').strip()
+        args_list = []
+        if existing_extra:
+            if existing_extra.startswith('['):
+                try:
+                    parsed = json.loads(existing_extra)
+                    if isinstance(parsed, list):
+                        args_list.extend(str(x) for x in parsed)
+                except Exception:
+                    args_list.extend([s.strip() for s in existing_extra.split(',') if s.strip()])
+            else:
+                args_list.extend([s.strip() for s in existing_extra.split(',') if s.strip()])
+        args_list.append(extra_arg)
+        env['CHROME_ARGS_EXTRA'] = json.dumps(args_list)
+
    env['COOKIES_OUTPUT_FILE'] = str(output_path)

    try:
@@ -322,6 +396,7 @@ def ensure_path_within_personas_dir(persona_path: Path) -> bool:
 def create_personas(
    names: Iterable[str],
    import_from: Optional[str] = None,
+    profile: Optional[str] = None,
 ) -> int:
    """
    Create Personas from names.
@@ -360,6 +435,15 @@ def create_personas(

        rprint(f'[dim]Found {import_from} profile: {source_profile_dir}[/dim]', file=sys.stderr)

+        if profile is None and (source_profile_dir / 'Default').exists():
+            profile = 'Default'
+
+        browser_binary = get_browser_binary(import_from)
+        if browser_binary:
+            rprint(f'[dim]Using {import_from} binary: {browser_binary}[/dim]', file=sys.stderr)
+    else:
+        browser_binary = None
+
    created_count = 0
    for name in name_list:
        name = name.strip()
@@ -414,7 +498,12 @@ def create_personas(
                # Extract cookies via CDP
                rprint(f'[dim]Extracting cookies via CDP...[/dim]', file=sys.stderr)

-                if extract_cookies_via_cdp(persona_chrome_dir, cookies_file):
+                if extract_cookies_via_cdp(
+                    persona_chrome_dir,
+                    cookies_file,
+                    profile_dir=profile,
+                    chrome_binary=browser_binary,
+                ):
                    rprint(f'[green]Extracted cookies to {cookies_file}[/green]', file=sys.stderr)
                else:
                    rprint(f'[yellow]Could not extract cookies automatically.[/yellow]', file=sys.stderr)
@@ -652,9 +741,10 @@ def main():
@main.command('create')
@click.argument('names', nargs=-1)
@click.option('--import', 'import_from', help='Import profile from browser (chrome, chromium, brave, edge)')
-def create_cmd(names: tuple, import_from: Optional[str]):
+@click.option('--profile', help='Profile directory name under the user data dir (e.g. Default, Profile 1)')
+def create_cmd(names: tuple, import_from: Optional[str], profile: Optional[str]):
    """Create Personas, optionally importing from a browser profile."""
-    sys.exit(create_personas(names, import_from=import_from))
+    sys.exit(create_personas(names, import_from=import_from, profile=profile))


@main.command('list')
--- a/archivebox/config/views.py
+++ b/archivebox/config/views.py
@@ -277,7 +277,7 @@ def plugins_list_view(request: HttpRequest, **kwargs) -> TableContext:
        # Show a helpful message when no plugins found
        rows['Name'].append('(no plugins found)')
        rows['Source'].append('-')
-        rows['Path'].append(mark_safe('<code>archivebox/plugins/</code> or <code>data/plugins/</code>'))
+        rows['Path'].append(mark_safe('<code>abx_plugins/plugins/</code> or <code>data/custom_plugins/</code>'))
        rows['Hooks'].append('-')
        rows['Config'].append('-')

--- a/archivebox/core/admin_snapshots.py
+++ b/archivebox/core/admin_snapshots.py
@@ -140,6 +140,10 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
    list_filter = ('created_at', 'downloaded_at', 'archiveresult__status', 'crawl__created_by', TagNameListFilter)

    fieldsets = (
+        ('Actions', {
+            'fields': ('admin_actions',),
+            'classes': ('card', 'wide', 'actions-card'),
+        }),
        ('URL', {
            'fields': ('url', 'title'),
            'classes': ('card', 'wide'),
@@ -168,10 +172,6 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
            'fields': ('output_dir',),
            'classes': ('card',),
        }),
-        ('Actions', {
-            'fields': ('admin_actions',),
-            'classes': ('card', 'wide'),
-        }),
        ('Archive Results', {
            'fields': ('archiveresults_list',),
            'classes': ('card', 'wide'),
@@ -179,7 +179,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
    )

    ordering = ['-created_at']
-    actions = ['add_tags', 'remove_tags', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
+    actions = ['add_tags', 'remove_tags', 'resnapshot_snapshot', 'update_snapshots', 'overwrite_snapshots', 'delete_snapshots']
    inlines = []  # Removed TagInline, using TagEditorWidget instead
    list_per_page = min(max(5, SERVER_CONFIG.SNAPSHOTS_PER_PAGE), 5000)

@@ -301,6 +301,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
    #         obj.pk,
    #     )

+    @admin.display(description='')
    def admin_actions(self, obj):
        summary_url = build_web_url(f'/{obj.archive_path}')
        results_url = build_web_url(f'/{obj.archive_path}/index.html#all')
@@ -311,13 +312,13 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
                   href="{}"
                   onmouseover="this.style.background='#f1f5f9'; this.style.borderColor='#cbd5e1';"
                   onmouseout="this.style.background='#f8fafc'; this.style.borderColor='#e2e8f0';">
-                    📄 Summary Page
+                    📄 View Snapshot
                </a>
                <a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #f8fafc; border: 1px solid #e2e8f0; border-radius: 8px; color: #334155; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
                   href="{}"
                   onmouseover="this.style.background='#f1f5f9'; this.style.borderColor='#cbd5e1';"
                   onmouseout="this.style.background='#f8fafc'; this.style.borderColor='#e2e8f0';">
-                    📁 Result Files
+                    📁 All files
                </a>
                <a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #f8fafc; border: 1px solid #e2e8f0; border-radius: 8px; color: #334155; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
                   href="{}"
@@ -329,19 +330,19 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):

                <span style="border-left: 1px solid #e2e8f0; height: 24px; margin: 0 4px;"></span>

-                <a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #ecfdf5; border: 1px solid #a7f3d0; border-radius: 8px; color: #065f46; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
-                   href="/admin/core/snapshot/?id__exact={}"
-                   title="Get missing extractors"
-                   onmouseover="this.style.background='#d1fae5';"
-                   onmouseout="this.style.background='#ecfdf5';">
-                    ⬇️ Finish
-                </a>
                <a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #eff6ff; border: 1px solid #bfdbfe; border-radius: 8px; color: #1e40af; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
                   href="/admin/core/snapshot/?id__exact={}"
                   title="Create a fresh new snapshot of this URL"
                   onmouseover="this.style.background='#dbeafe';"
                   onmouseout="this.style.background='#eff6ff';">
-                    🆕 Archive Again
+                    🆕 Archive Now
+                </a>
+                <a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #ecfdf5; border: 1px solid #a7f3d0; border-radius: 8px; color: #065f46; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
+                   href="/admin/core/snapshot/?id__exact={}"
+                   title="Redo failed extractors (missing outputs)"
+                   onmouseover="this.style.background='#d1fae5';"
+                   onmouseout="this.style.background='#ecfdf5';">
+                    🔁 Redo Failed
                </a>
                <a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #fffbeb; border: 1px solid #fde68a; border-radius: 8px; color: #92400e; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
                   href="/admin/core/snapshot/?id__exact={}"
@@ -707,7 +708,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
    #     return super().changelist_view(request, extra_context=None)

    @admin.action(
-        description="⏯️ Finish"
+        description="🔁 Redo Failed"
    )
    def update_snapshots(self, request, queryset):
        count = queryset.count()
@@ -721,7 +722,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):


    @admin.action(
-        description="⬇️ Fresh"
+        description="🆕 Archive Now"
    )
    def resnapshot_snapshot(self, request, queryset):
        for snapshot in queryset:
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -1704,8 +1704,8 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
        Create ArchiveResult records for all enabled hooks.

        Uses the hooks system to discover available hooks from:
-        - archivebox/plugins/*/on_Snapshot__*.{py,sh,js}
-        - data/plugins/*/on_Snapshot__*.{py,sh,js}
+        - abx_plugins/plugins/*/on_Snapshot__*.{py,sh,js}
+        - data/custom_plugins/*/on_Snapshot__*.{py,sh,js}

        Creates one ArchiveResult per hook (not per plugin), with hook_name set.
        This enables step-based execution where all hooks in a step can run in parallel.
@@ -2486,7 +2486,7 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
    @property
    def plugin_module(self) -> Any | None:
        # Hook scripts are now used instead of Python plugin modules
-        # The plugin name maps to hooks in archivebox/plugins/{plugin}/
+        # The plugin name maps to hooks in abx_plugins/plugins/{plugin}/
        return None

    def output_exists(self) -> bool:
--- a/archivebox/core/templatetags/core_tags.py
+++ b/archivebox/core/templatetags/core_tags.py
@@ -349,15 +349,6 @@ def plugin_name(value: str) -> str:
    return get_plugin_name(value)


-@register.filter
-def plugin_display_name(value: str) -> str:
-    """
-    Human-friendly plugin name overrides for UI display.
-    """
-    name = get_plugin_name(value)
-    if name == 'merkletree':
-        return 'hashes'
-    return name


@register.simple_tag(takes_context=True)
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -1145,13 +1145,31 @@ def live_progress_view(request):
        for proc in running_workers:
            env = proc.env or {}
            if not isinstance(env, dict):
-                continue
+                env = {}
+
+            cmd = proc.cmd or []
            if proc.worker_type == 'crawl':
                crawl_id = env.get('CRAWL_ID')
+                if not crawl_id:
+                    for i, part in enumerate(cmd):
+                        if part == '--crawl-id' and i + 1 < len(cmd):
+                            crawl_id = cmd[i + 1]
+                            break
+                        if part.startswith('--crawl-id='):
+                            crawl_id = part.split('=', 1)[1]
+                            break
                if crawl_id:
                    crawl_worker_pids[str(crawl_id)] = proc.pid
            elif proc.worker_type == 'snapshot':
                snapshot_id = env.get('SNAPSHOT_ID')
+                if not snapshot_id:
+                    for i, part in enumerate(cmd):
+                        if part == '--snapshot-id' and i + 1 < len(cmd):
+                            snapshot_id = cmd[i + 1]
+                            break
+                        if part.startswith('--snapshot-id='):
+                            snapshot_id = part.split('=', 1)[1]
+                            break
                if snapshot_id:
                    snapshot_worker_pids[str(snapshot_id)] = proc.pid

@@ -1243,7 +1261,7 @@ def live_progress_view(request):
                        'plugin': ar.plugin,
                        'status': status,
                    }
-                    if ar.process_id and ar.process and ar.process.status == Process.StatusChoices.RUNNING:
+                    if status == ArchiveResult.StatusChoices.STARTED and ar.process_id and ar.process:
                        plugin_payload['pid'] = ar.process.pid
                    if status == ArchiveResult.StatusChoices.STARTED:
                        plugin_payload['progress'] = progress_value
--- a/archivebox/hooks.py
+++ b/archivebox/hooks.py
@@ -6,8 +6,8 @@ with ArchiveBox via CLI arguments and stdout JSON output. This keeps the plugin
 system simple and language-agnostic.

 Directory structure:
-    archivebox/plugins/<plugin_name>/on_<Event>__<hook_name>.<ext>  (built-in)
-    data/plugins/<plugin_name>/on_<Event>__<hook_name>.<ext>        (user)
+    abx_plugins/plugins/<plugin_name>/on_<Event>__<hook_name>.<ext>     (built-in package)
+    data/custom_plugins/<plugin_name>/on_<Event>__<hook_name>.<ext>     (user)

 Hook contract:
    Input:  --url=<url> (and other --key=value args)
@@ -66,14 +66,20 @@ from functools import lru_cache
 from pathlib import Path
 from typing import List, Dict, Any, Optional, TypedDict

+from abx_plugins import get_plugins_dir
 from django.conf import settings
 from django.utils import timezone
 from django.utils.safestring import mark_safe
+from archivebox.config.constants import CONSTANTS


 # Plugin directories
-BUILTIN_PLUGINS_DIR = Path(__file__).parent / 'plugins'
-USER_PLUGINS_DIR = Path(getattr(settings, 'DATA_DIR', Path.cwd())) / 'plugins'
+BUILTIN_PLUGINS_DIR = Path(get_plugins_dir()).resolve()
+USER_PLUGINS_DIR = Path(
+    os.environ.get('ARCHIVEBOX_USER_PLUGINS_DIR')
+    or getattr(settings, 'USER_PLUGINS_DIR', '')
+    or str(CONSTANTS.USER_PLUGINS_DIR)
+).expanduser()


 # =============================================================================
@@ -197,11 +203,11 @@ def discover_hooks(

        for hook in hooks:
            # Get plugin name from parent directory
-            # e.g., archivebox/plugins/wget/on_Snapshot__50_wget.py -> 'wget'
+            # e.g., abx_plugins/plugins/wget/on_Snapshot__50_wget.py -> 'wget'
            plugin_name = hook.parent.name

            # Check if this is a plugin directory (not the root plugins dir)
-            if plugin_name in ('plugins', '.'):
+            if hook.parent.resolve() in (BUILTIN_PLUGINS_DIR.resolve(), USER_PLUGINS_DIR.resolve()):
                # Hook is in root plugins directory, not a plugin subdir
                # Include it by default (no filtering for non-plugin hooks)
                enabled_hooks.append(hook)
@@ -581,7 +587,7 @@ def get_plugins() -> List[str]:
    The plugin name is the plugin directory name, not the hook script name.

    Example:
-    archivebox/plugins/chrome/on_Snapshot__10_chrome_tab.bg.js
+    abx_plugins/plugins/chrome/on_Snapshot__10_chrome_tab.bg.js
    -> plugin = 'chrome'

    Sorted alphabetically (plugins control their hook order via numeric prefixes in hook names).
@@ -728,7 +734,7 @@ def discover_plugins_that_provide_interface(
            try:
                # Import the module dynamically
                spec = importlib.util.spec_from_file_location(
-                    f'archivebox.plugins.{plugin_name}.{module_name}',
+                    f'archivebox.dynamic_plugins.{plugin_name}.{module_name}',
                    module_path
                )
                if spec is None or spec.loader is None:
@@ -942,7 +948,7 @@ def get_plugin_special_config(plugin_name: str, config: Dict[str, Any]) -> Dict[
 # Plugins can provide custom templates for rendering their output in the UI.
 # Templates are discovered by filename convention inside each plugin's templates/ dir:
 #
-#     archivebox/plugins/<plugin_name>/
+#     abx_plugins/plugins/<plugin_name>/
 #         templates/
 #             icon.html          # Icon for admin table view (small inline HTML)
 #             card.html          # Preview card for snapshot header
--- a/archivebox/plugins/search_backend_ripgrep/init.py
+++ b/archivebox/plugins/search_backend_ripgrep/init.py
--- a/archivebox/ideas/process_plugin.py
+++ b/archivebox/ideas/process_plugin.py
@@ -0,0 +1,318 @@
+__package__ = 'archivebox.ideas'
+
+import asyncio
+import json
+import os
+import shlex
+import signal
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Callable, Mapping, MutableMapping, Optional
+
+from pydantic import BaseModel, Field
+
+try:
+    from bubus import BaseEvent, EventBus
+except Exception as exc:  # pragma: no cover - optional dependency
+    raise ImportError('ProcessPlugin requires bubus to be installed') from exc
+
+try:
+    from bubus.service import uuid7str
+except Exception:  # pragma: no cover - optional dependency
+    from uuid import uuid4 as _uuid4
+
+    def uuid7str() -> str:
+        return str(_uuid4())
+
+
+def _utcnow() -> datetime:
+    return datetime.now(timezone.utc)
+
+
+class ProcessRecord(BaseModel):
+    id: str = Field(default_factory=uuid7str)
+    cmd: list[str]
+    cwd: str | None = None
+    env: dict[str, str] = Field(default_factory=dict)
+    pid: int | None = None
+    started_at: datetime | None = None
+    ended_at: datetime | None = None
+    exit_code: int | None = None
+    stdout_path: str | None = None
+    stderr_path: str | None = None
+    cmd_path: str | None = None
+    pid_path: str | None = None
+    is_background: bool = False
+    parent_process_id: str | None = None
+
+
+class ProcessLaunch(BaseEvent[ProcessRecord]):
+    cmd: list[str]
+    cwd: str | None = None
+    env: dict[str, str] | None = None
+    timeout: float | None = None
+    output_dir: str | None = None
+    log_prefix: str | None = None
+    is_background: bool = False
+    parent_process_id: str | None = None
+    parse_stdout_events: bool = True
+
+
+class ProcessStarted(BaseEvent[None]):
+    process: ProcessRecord
+
+
+class ProcessExited(BaseEvent[None]):
+    process: ProcessRecord
+
+
+class ProcessKill(BaseEvent[ProcessRecord]):
+    process_id: str
+    signal: int = signal.SIGTERM
+    timeout: float | None = 10.0
+
+
+@dataclass
+class _RunningProcess:
+    process: asyncio.subprocess.Process
+    record: ProcessRecord
+    stdout_task: asyncio.Task[None] | None
+    stderr_task: asyncio.Task[None] | None
+    watcher_task: asyncio.Task[None] | None
+    parent_event_id: str | None
+
+
+JsonEventAdapter = Callable[[dict[str, Any], str | None], Optional[BaseEvent[Any]]]
+
+
+class ProcessPlugin:
+    """Spawn and monitor processes using events (no Django required)."""
+
+    def __init__(
+        self,
+        bus: EventBus,
+        *,
+        env: Mapping[str, str] | None = None,
+        json_event_adapter: JsonEventAdapter | None = None,
+    ) -> None:
+        self.bus = bus
+        self.env = dict(env or os.environ)
+        self.json_event_adapter = json_event_adapter
+        self._running: MutableMapping[str, _RunningProcess] = {}
+
+    def register_event_handlers(self) -> None:
+        self.bus.on(ProcessLaunch, self.on_ProcessLaunch)
+        self.bus.on(ProcessKill, self.on_ProcessKill)
+
+    async def on_ProcessLaunch(self, event: ProcessLaunch) -> ProcessRecord:
+        parent_event_id = event.event_id
+        proc_id = uuid7str()
+        cwd = event.cwd or event.output_dir or os.getcwd()
+        output_dir = Path(event.output_dir or cwd)
+        output_dir.mkdir(parents=True, exist_ok=True)
+
+        env = {**self.env, **(event.env or {})}
+
+        log_prefix = event.log_prefix or proc_id
+        stdout_path = output_dir / f'{log_prefix}.stdout.log'
+        stderr_path = output_dir / f'{log_prefix}.stderr.log'
+        cmd_path = output_dir / f'{log_prefix}.sh'
+        pid_path = output_dir / f'{log_prefix}.pid'
+
+        self._write_cmd_file(cmd_path, event.cmd)
+
+        proc = await asyncio.create_subprocess_exec(
+            *event.cmd,
+            cwd=str(cwd),
+            env=env,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+            start_new_session=True,
+        )
+
+        self._write_pid_file(pid_path, proc.pid)
+
+        record = ProcessRecord(
+            id=proc_id,
+            cmd=event.cmd,
+            cwd=str(cwd),
+            env=env,
+            pid=proc.pid,
+            started_at=_utcnow(),
+            stdout_path=str(stdout_path),
+            stderr_path=str(stderr_path),
+            cmd_path=str(cmd_path),
+            pid_path=str(pid_path),
+            is_background=event.is_background,
+            parent_process_id=event.parent_process_id,
+        )
+
+        await event.event_bus.dispatch(
+            ProcessStarted(process=record, event_parent_id=parent_event_id)
+        )
+
+        stdout_task = asyncio.create_task(
+            self._consume_stream(
+                proc.stdout, stdout_path, parent_event_id, event.parse_stdout_events
+            )
+        )
+        stderr_task = asyncio.create_task(
+            self._consume_stream(proc.stderr, stderr_path, parent_event_id, False)
+        )
+
+        running = _RunningProcess(
+            process=proc,
+            record=record,
+            stdout_task=stdout_task,
+            stderr_task=stderr_task,
+            watcher_task=None,
+            parent_event_id=parent_event_id,
+        )
+        self._running[proc_id] = running
+
+        if event.is_background:
+            running.watcher_task = asyncio.create_task(
+                self._watch_process(proc_id, event.timeout)
+            )
+            return record
+
+        await self._watch_process(proc_id, event.timeout)
+        return self._running.get(proc_id, running).record
+
+    async def on_ProcessKill(self, event: ProcessKill) -> ProcessRecord:
+        running = self._running.get(event.process_id)
+        if not running:
+            raise RuntimeError(f'Process not found: {event.process_id}')
+
+        proc = running.process
+        self._terminate_process(proc, event.signal)
+
+        if event.timeout is not None:
+            try:
+                await asyncio.wait_for(proc.wait(), timeout=event.timeout)
+            except asyncio.TimeoutError:
+                self._terminate_process(proc, signal.SIGKILL)
+        else:
+            await proc.wait()
+
+        await self._finalize_process(event.process_id)
+        return self._running.get(event.process_id, running).record
+
+    async def _watch_process(self, process_id: str, timeout: float | None) -> None:
+        running = self._running.get(process_id)
+        if not running:
+            return
+        proc = running.process
+        try:
+            if timeout is not None:
+                await asyncio.wait_for(proc.wait(), timeout=timeout)
+            else:
+                await proc.wait()
+        except asyncio.TimeoutError:
+            self._terminate_process(proc, signal.SIGTERM)
+            await asyncio.sleep(2)
+            if proc.returncode is None:
+                self._terminate_process(proc, signal.SIGKILL)
+                await proc.wait()
+        await self._finalize_process(process_id)
+
+    async def _finalize_process(self, process_id: str) -> None:
+        running = self._running.get(process_id)
+        if not running:
+            return
+
+        proc = running.process
+        record = running.record
+
+        if running.stdout_task:
+            await running.stdout_task
+        if running.stderr_task:
+            await running.stderr_task
+
+        record.exit_code = proc.returncode
+        record.ended_at = _utcnow()
+
+        await self.bus.dispatch(
+            ProcessExited(process=record, event_parent_id=running.parent_event_id)
+        )
+
+        self._running.pop(process_id, None)
+
+    async def _consume_stream(
+        self,
+        stream: asyncio.StreamReader | None,
+        path: Path,
+        parent_event_id: str | None,
+        parse_events: bool,
+    ) -> None:
+        if stream is None:
+            return
+        with path.open('w', encoding='utf-8') as fh:
+            while True:
+                line = await stream.readline()
+                if not line:
+                    break
+                text = line.decode('utf-8', errors='replace')
+                fh.write(text)
+                fh.flush()
+                if parse_events:
+                    await self._maybe_dispatch_json_event(text, parent_event_id)
+
+    async def _maybe_dispatch_json_event(self, line: str, parent_event_id: str | None) -> None:
+        text = line.strip()
+        if not text.startswith('{') or not text.endswith('}'):
+            return
+        try:
+            data = json.loads(text)
+        except json.JSONDecodeError:
+            return
+
+        event = None
+        if self.json_event_adapter:
+            event = self.json_event_adapter(data, parent_event_id)
+        elif isinstance(data, dict) and 'event_type' in data:
+            try:
+                event = BaseEvent.model_validate(data)
+            except Exception:
+                event = None
+
+        if event is None:
+            return
+
+        if not getattr(event, 'event_parent_id', None) and parent_event_id:
+            event.event_parent_id = parent_event_id
+        await self.bus.dispatch(event)
+
+    @staticmethod
+    def _write_cmd_file(path: Path, cmd: list[str]) -> None:
+        cmd_line = ' '.join(shlex.quote(part) for part in cmd)
+        path.write_text(cmd_line + '\n', encoding='utf-8')
+
+    @staticmethod
+    def _write_pid_file(path: Path, pid: int) -> None:
+        path.write_text(str(pid), encoding='utf-8')
+        ts = datetime.now().timestamp()
+        os.utime(path, (ts, ts))
+
+    @staticmethod
+    def _terminate_process(proc: asyncio.subprocess.Process, sig: int) -> None:
+        if proc.returncode is not None:
+            return
+        try:
+            os.killpg(proc.pid, sig)
+        except Exception:
+            try:
+                os.kill(proc.pid, sig)
+            except Exception:
+                pass
+
+
+__all__ = [
+    'ProcessRecord',
+    'ProcessLaunch',
+    'ProcessStarted',
+    'ProcessExited',
+    'ProcessKill',
+    'ProcessPlugin',
+]
--- a/archivebox/plugins/accessibility/config.json
+++ b/archivebox/plugins/accessibility/config.json
@@ -1,21 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "required_plugins": ["chrome"],
-  "properties": {
-    "ACCESSIBILITY_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_ACCESSIBILITY", "USE_ACCESSIBILITY"],
-      "description": "Enable accessibility tree capture"
-    },
-    "ACCESSIBILITY_TIMEOUT": {
-      "type": "integer",
-      "default": 30,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for accessibility capture in seconds"
-    }
-  }
-}
--- a/archivebox/plugins/accessibility/on_Snapshot__39_accessibility.js
+++ b/archivebox/plugins/accessibility/on_Snapshot__39_accessibility.js
@@ -1,288 +0,0 @@
-#!/usr/bin/env node
-/**
- * Extract accessibility tree and page outline from a URL.
- *
- * Extracts:
- * - Page outline (headings h1-h6, sections, articles)
- * - Iframe tree
- * - Accessibility snapshot
- * - ARIA labels and roles
- *
- * Usage: on_Snapshot__39_accessibility.js --url=<url> --snapshot-id=<uuid>
- * Output: Writes accessibility/accessibility.json
- *
- * Environment variables:
- *     SAVE_ACCESSIBILITY: Enable accessibility extraction (default: true)
- */
-
-const fs = require('fs');
-const path = require('path');
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-const puppeteer = require('puppeteer-core');
-
-// Extractor metadata
-const PLUGIN_NAME = 'accessibility';
-const OUTPUT_DIR = '.';
-const OUTPUT_FILE = 'accessibility.json';
-const CHROME_SESSION_DIR = '../chrome';
-const CHROME_SESSION_REQUIRED_ERROR = 'No Chrome session found (chrome plugin must run first)';
-
-// Parse command line arguments
-function parseArgs() {
-    const args = {};
-    process.argv.slice(2).forEach(arg => {
-        if (arg.startsWith('--')) {
-            const [key, ...valueParts] = arg.slice(2).split('=');
-            args[key.replace(/-/g, '_')] = valueParts.join('=') || true;
-        }
-    });
-    return args;
-}
-
-// Get environment variable with default
-function getEnv(name, defaultValue = '') {
-    return (process.env[name] || defaultValue).trim();
-}
-
-function getEnvBool(name, defaultValue = false) {
-    const val = getEnv(name, '').toLowerCase();
-    if (['true', '1', 'yes', 'on'].includes(val)) return true;
-    if (['false', '0', 'no', 'off'].includes(val)) return false;
-    return defaultValue;
-}
-
-// Wait for chrome tab to be fully loaded
-async function waitForChromeTabLoaded(timeoutMs = 60000) {
-    const navigationFile = path.join(CHROME_SESSION_DIR, 'navigation.json');
-    const startTime = Date.now();
-
-    while (Date.now() - startTime < timeoutMs) {
-        if (fs.existsSync(navigationFile)) {
-            return true;
-        }
-        // Wait 100ms before checking again
-        await new Promise(resolve => setTimeout(resolve, 100));
-    }
-
-    return false;
-}
-
-// Get CDP URL from chrome plugin
-function getCdpUrl() {
-    const cdpFile = path.join(CHROME_SESSION_DIR, 'cdp_url.txt');
-    if (fs.existsSync(cdpFile)) {
-        return fs.readFileSync(cdpFile, 'utf8').trim();
-    }
-    return null;
-}
-
-function assertChromeSession() {
-    const cdpFile = path.join(CHROME_SESSION_DIR, 'cdp_url.txt');
-    const targetIdFile = path.join(CHROME_SESSION_DIR, 'target_id.txt');
-    const pidFile = path.join(CHROME_SESSION_DIR, 'chrome.pid');
-    if (!fs.existsSync(cdpFile) || !fs.existsSync(targetIdFile) || !fs.existsSync(pidFile)) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-    try {
-        const pid = parseInt(fs.readFileSync(pidFile, 'utf8').trim(), 10);
-        if (!pid || Number.isNaN(pid)) throw new Error('Invalid pid');
-        process.kill(pid, 0);
-    } catch (e) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-    const cdpUrl = getCdpUrl();
-    if (!cdpUrl) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-    return cdpUrl;
-}
-
-// Extract accessibility info
-async function extractAccessibility(url) {
-    // Output directory is current directory (hook already runs in output dir)
-    const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
-
-    let browser = null;
-
-    try {
-        // Connect to existing Chrome session
-        const cdpUrl = assertChromeSession();
-
-        browser = await puppeteer.connect({
-            browserWSEndpoint: cdpUrl,
-        });
-
-        // Get the page
-        const pages = await browser.pages();
-        const page = pages.find(p => p.url().startsWith('http')) || pages[0];
-
-        if (!page) {
-            return { success: false, error: 'No page found in Chrome session' };
-        }
-
-        // Get accessibility snapshot
-        const accessibilityTree = await page.accessibility.snapshot({ interestingOnly: true });
-
-        // Extract page outline (headings, sections, etc.)
-        const outline = await page.evaluate(() => {
-            const headings = [];
-            const elements = document.querySelectorAll(
-                'h1, h2, h3, h4, h5, h6, a[name], header, footer, article, main, aside, nav, section, figure, summary, table, form, iframe'
-            );
-
-            elements.forEach(elem => {
-                // Skip unnamed anchors
-                if (elem.tagName.toLowerCase() === 'a' && !elem.name) return;
-
-                const tagName = elem.tagName.toLowerCase();
-                const elemId = elem.id || elem.name || elem.getAttribute('aria-label') || elem.role || '';
-                const elemClasses = (elem.className || '').toString().trim().split(/\s+/).slice(0, 3).join(' .');
-                const action = elem.action?.split('/').pop() || '';
-
-                let summary = (elem.innerText || '').slice(0, 128);
-                if (summary.length >= 128) summary += '...';
-
-                let prefix = '';
-                let title = '';
-
-                // Format headings with # prefix
-                const level = parseInt(tagName.replace('h', ''));
-                if (!isNaN(level)) {
-                    prefix = '#'.repeat(level);
-                    title = elem.innerText || elemId || elemClasses;
-                } else {
-                    // For other elements, create breadcrumb path
-                    const parents = [tagName];
-                    let node = elem.parentNode;
-                    while (node && parents.length < 5) {
-                        if (node.tagName) {
-                            const tag = node.tagName.toLowerCase();
-                            if (!['div', 'span', 'p', 'body', 'html'].includes(tag)) {
-                                parents.unshift(tag);
-                            } else {
-                                parents.unshift('');
-                            }
-                        }
-                        node = node.parentNode;
-                    }
-                    prefix = parents.join('>');
-
-                    title = elemId ? `#${elemId}` : '';
-                    if (!title && elemClasses) title = `.${elemClasses}`;
-                    if (action) title += ` /${action}`;
-                    if (summary && !title.includes(summary)) title += `: ${summary}`;
-                }
-
-                // Clean up title
-                title = title.replace(/\s+/g, ' ').trim();
-
-                if (prefix) {
-                    headings.push(`${prefix} ${title}`);
-                }
-            });
-
-            return headings;
-        });
-
-        // Get iframe tree
-        const iframes = [];
-        function dumpFrameTree(frame, indent = '>') {
-            iframes.push(indent + frame.url());
-            for (const child of frame.childFrames()) {
-                dumpFrameTree(child, indent + '>');
-            }
-        }
-        dumpFrameTree(page.mainFrame(), '');
-
-        const accessibilityData = {
-            url,
-            headings: outline,
-            iframes,
-            tree: accessibilityTree,
-        };
-
-        // Write output
-        fs.writeFileSync(outputPath, JSON.stringify(accessibilityData, null, 2));
-
-        return { success: true, output: outputPath, accessibilityData };
-
-    } catch (e) {
-        return { success: false, error: `${e.name}: ${e.message}` };
-    } finally {
-        if (browser) {
-            browser.disconnect();
-        }
-    }
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-    const snapshotId = args.snapshot_id;
-
-    if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__39_accessibility.js --url=<url> --snapshot-id=<uuid>');
-        process.exit(1);
-    }
-
-    const startTs = new Date();
-    let status = 'failed';
-    let output = null;
-    let error = '';
-
-    try {
-        // Check if enabled
-        if (!getEnvBool('ACCESSIBILITY_ENABLED', true)) {
-            console.log('Skipping accessibility (ACCESSIBILITY_ENABLED=False)');
-            // Output clean JSONL (no RESULT_JSON= prefix)
-            console.log(JSON.stringify({
-                type: 'ArchiveResult',
-                status: 'skipped',
-                output_str: 'ACCESSIBILITY_ENABLED=False',
-            }));
-            process.exit(0);
-        }
-
-        // Check if Chrome session exists, then wait for page load
-        assertChromeSession();
-        const pageLoaded = await waitForChromeTabLoaded(60000);
-        if (!pageLoaded) {
-            throw new Error('Page not loaded after 60s (chrome_navigate must complete first)');
-        }
-
-        const result = await extractAccessibility(url);
-
-        if (result.success) {
-            status = 'succeeded';
-            output = result.output;
-            const headingCount = result.accessibilityData.headings.length;
-            const iframeCount = result.accessibilityData.iframes.length;
-            console.log(`Accessibility extracted: ${headingCount} headings, ${iframeCount} iframes`);
-        } else {
-            status = 'failed';
-            error = result.error;
-        }
-    } catch (e) {
-        error = `${e.name}: ${e.message}`;
-        status = 'failed';
-    }
-
-    const endTs = new Date();
-
-    if (error) console.error(`ERROR: ${error}`);
-
-    // Output clean JSONL (no RESULT_JSON= prefix)
-    console.log(JSON.stringify({
-        type: 'ArchiveResult',
-        status,
-        output_str: output || error || '',
-    }));
-
-    process.exit(status === 'succeeded' ? 0 : 1);
-}
-
-main().catch(e => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
--- a/archivebox/plugins/accessibility/templates/icon.html
+++ b/archivebox/plugins/accessibility/templates/icon.html
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--accessibility" title="Accessibility"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="4.5" r="2" fill="currentColor" stroke="none"/><path d="M4 7.5h16"/><path d="M12 7.5v12"/><path d="M7 20l5-6 5 6"/></svg></span>
--- a/archivebox/plugins/accessibility/tests/test_accessibility.py
+++ b/archivebox/plugins/accessibility/tests/test_accessibility.py
@@ -1,195 +0,0 @@
-"""
-Tests for the accessibility plugin.
-
-Tests the real accessibility hook with an actual URL to verify
-accessibility tree and page outline extraction.
-"""
-
-import json
-import shutil
-import subprocess
-import sys
-import tempfile
-from pathlib import Path
-
-import pytest
-from django.test import TestCase
-
-# Import chrome test helpers
-sys.path.insert(0, str(Path(__file__).parent.parent.parent / 'chrome' / 'tests'))
-from chrome_test_helpers import (
-    chrome_session,
-    get_test_env,
-    get_plugin_dir,
-    get_hook_script,
-)
-
-
-def chrome_available() -> bool:
-    """Check if Chrome/Chromium is available."""
-    for name in ['chromium', 'chromium-browser', 'google-chrome', 'chrome']:
-        if shutil.which(name):
-            return True
-    return False
-
-
-# Get the path to the accessibility hook
-PLUGIN_DIR = get_plugin_dir(__file__)
-ACCESSIBILITY_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_accessibility.*')
-
-
-class TestAccessibilityPlugin(TestCase):
-    """Test the accessibility plugin."""
-
-    def test_accessibility_hook_exists(self):
-        """Accessibility hook script should exist."""
-        self.assertIsNotNone(ACCESSIBILITY_HOOK, "Accessibility hook not found in plugin directory")
-        self.assertTrue(ACCESSIBILITY_HOOK.exists(), f"Hook not found: {ACCESSIBILITY_HOOK}")
-
-
-class TestAccessibilityWithChrome(TestCase):
-    """Integration tests for accessibility plugin with Chrome."""
-
-    def setUp(self):
-        """Set up test environment."""
-        self.temp_dir = Path(tempfile.mkdtemp())
-
-    def tearDown(self):
-        """Clean up."""
-        shutil.rmtree(self.temp_dir, ignore_errors=True)
-
-    def test_accessibility_extracts_page_outline(self):
-        """Accessibility hook should extract headings and accessibility tree."""
-        test_url = 'https://example.com'
-        snapshot_id = 'test-accessibility-snapshot'
-
-        try:
-            with chrome_session(
-                self.temp_dir,
-                crawl_id='test-accessibility-crawl',
-                snapshot_id=snapshot_id,
-                test_url=test_url,
-                navigate=True,
-                timeout=30,
-            ) as (chrome_process, chrome_pid, snapshot_chrome_dir, env):
-                # Use the environment from chrome_session (already has CHROME_HEADLESS=true)
-
-                # Run accessibility hook with the active Chrome session
-                result = subprocess.run(
-                    ['node', str(ACCESSIBILITY_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                    cwd=str(snapshot_chrome_dir),
-                    capture_output=True,
-                    text=True,
-                    timeout=60,
-                    env=env
-                )
-
-                # Check for output file
-                accessibility_output = snapshot_chrome_dir / 'accessibility.json'
-
-                accessibility_data = None
-
-                # Try parsing from file first
-                if accessibility_output.exists():
-                    with open(accessibility_output) as f:
-                        try:
-                            accessibility_data = json.load(f)
-                        except json.JSONDecodeError:
-                            pass
-
-                # Verify hook ran successfully
-                self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}")
-                self.assertNotIn('Traceback', result.stderr)
-
-                # example.com has headings, so we should get accessibility data
-                self.assertIsNotNone(accessibility_data, "No accessibility data was generated")
-
-                # Verify we got page outline data
-                self.assertIn('headings', accessibility_data, f"Missing headings: {accessibility_data}")
-                self.assertIn('url', accessibility_data, f"Missing url: {accessibility_data}")
-
-        except RuntimeError:
-            raise
-
-    def test_accessibility_disabled_skips(self):
-        """Test that ACCESSIBILITY_ENABLED=False skips without error."""
-        test_url = 'https://example.com'
-        snapshot_id = 'test-disabled'
-
-        env = get_test_env()
-        env['ACCESSIBILITY_ENABLED'] = 'False'
-
-        result = subprocess.run(
-            ['node', str(ACCESSIBILITY_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-            cwd=str(self.temp_dir),
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=env
-        )
-
-        # Should exit 0 even when disabled
-        self.assertEqual(result.returncode, 0, f"Should succeed when disabled: {result.stderr}")
-
-        # Should NOT create output file when disabled
-        accessibility_output = self.temp_dir / 'accessibility.json'
-        self.assertFalse(accessibility_output.exists(), "Should not create file when disabled")
-
-    def test_accessibility_missing_url_argument(self):
-        """Test that missing --url argument causes error."""
-        snapshot_id = 'test-missing-url'
-
-        result = subprocess.run(
-            ['node', str(ACCESSIBILITY_HOOK), f'--snapshot-id={snapshot_id}'],
-            cwd=str(self.temp_dir),
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=get_test_env()
-        )
-
-        # Should fail with non-zero exit code
-        self.assertNotEqual(result.returncode, 0, "Should fail when URL missing")
-
-    def test_accessibility_missing_snapshot_id_argument(self):
-        """Test that missing --snapshot-id argument causes error."""
-        test_url = 'https://example.com'
-
-        result = subprocess.run(
-            ['node', str(ACCESSIBILITY_HOOK), f'--url={test_url}'],
-            cwd=str(self.temp_dir),
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=get_test_env()
-        )
-
-        # Should fail with non-zero exit code
-        self.assertNotEqual(result.returncode, 0, "Should fail when snapshot-id missing")
-
-    def test_accessibility_with_no_chrome_session(self):
-        """Test that hook fails gracefully when no Chrome session exists."""
-        test_url = 'https://example.com'
-        snapshot_id = 'test-no-chrome'
-
-        result = subprocess.run(
-            ['node', str(ACCESSIBILITY_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-            cwd=str(self.temp_dir),
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=get_test_env()
-        )
-
-        # Should fail when no Chrome session
-        self.assertNotEqual(result.returncode, 0, "Should fail when no Chrome session exists")
-        # Error should mention CDP or Chrome
-        err_lower = result.stderr.lower()
-        self.assertTrue(
-            any(x in err_lower for x in ['chrome', 'cdp', 'cannot find', 'puppeteer']),
-            f"Should mention Chrome/CDP in error: {result.stderr}"
-        )
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
--- a/archivebox/plugins/apt/on_Binary__13_apt_install.py
+++ b/archivebox/plugins/apt/on_Binary__13_apt_install.py
@@ -1,83 +0,0 @@
-#!/usr/bin/env python3
-"""
-Install a binary using apt package manager.
-
-Usage: on_Binary__install_using_apt_provider.py --binary-id=<uuid> --machine-id=<uuid> --name=<name>
-Output: Binary JSONL record to stdout after installation
-"""
-
-import json
-import sys
-
-import rich_click as click
-from abx_pkg import Binary, AptProvider, BinProviderOverrides
-
-# Fix pydantic forward reference issue
-AptProvider.model_rebuild()
-
-
-@click.command()
-@click.option('--binary-id', required=True, help="Binary UUID")
-@click.option('--machine-id', required=True, help="Machine UUID")
-@click.option('--name', required=True, help="Binary name to install")
-@click.option('--binproviders', default='*', help="Allowed providers (comma-separated)")
-@click.option('--overrides', default=None, help="JSON-encoded overrides dict")
-def main(binary_id: str, machine_id: str, name: str, binproviders: str, overrides: str | None):
-    """Install binary using apt package manager."""
-
-    # Check if apt provider is allowed
-    if binproviders != '*' and 'apt' not in binproviders.split(','):
-        click.echo(f"apt provider not allowed for {name}", err=True)
-        sys.exit(0)  # Not an error, just skip
-
-    # Use abx-pkg AptProvider to install binary
-    provider = AptProvider()
-    if not provider.INSTALLER_BIN:
-        click.echo("apt not available on this system", err=True)
-        sys.exit(1)
-
-    click.echo(f"Installing {name} via apt...", err=True)
-
-    try:
-        # Parse overrides if provided
-        overrides_dict = None
-        if overrides:
-            try:
-                overrides_dict = json.loads(overrides)
-                # Extract apt-specific overrides
-                overrides_dict = overrides_dict.get('apt', {})
-                click.echo(f"Using apt install overrides: {overrides_dict}", err=True)
-            except json.JSONDecodeError:
-                click.echo(f"Warning: Failed to parse overrides JSON: {overrides}", err=True)
-
-        binary = Binary(name=name, binproviders=[provider], overrides={'apt': overrides_dict} if overrides_dict else {}).install()
-    except Exception as e:
-        click.echo(f"apt install failed: {e}", err=True)
-        sys.exit(1)
-
-    if not binary.abspath:
-        click.echo(f"{name} not found after apt install", err=True)
-        sys.exit(1)
-
-    # Output Binary JSONL record to stdout
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'abspath': str(binary.abspath),
-        'version': str(binary.version) if binary.version else '',
-        'sha256': binary.sha256 or '',
-        'binprovider': 'apt',
-        'machine_id': machine_id,
-        'binary_id': binary_id,
-    }
-    print(json.dumps(record))
-
-    # Log human-readable info to stderr
-    click.echo(f"Installed {name} at {binary.abspath}", err=True)
-    click.echo(f"  version: {binary.version}", err=True)
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
--- a/archivebox/plugins/apt/templates/icon.html
+++ b/archivebox/plugins/apt/templates/icon.html
--- a/archivebox/plugins/apt/tests/test_apt_provider.py
+++ b/archivebox/plugins/apt/tests/test_apt_provider.py
@@ -1,154 +0,0 @@
-"""
-Tests for the apt binary provider plugin.
-
-Tests cover:
-1. Hook script execution
-2. apt package availability detection
-3. JSONL output format
-"""
-
-import json
-import os
-import shutil
-import subprocess
-import sys
-import tempfile
-from pathlib import Path
-
-import pytest
-from django.test import TestCase
-
-
-# Get the path to the apt provider hook
-PLUGIN_DIR = Path(__file__).parent.parent
-INSTALL_HOOK = next(PLUGIN_DIR.glob('on_Binary__*_apt_install.py'), None)
-
-
-def apt_available() -> bool:
-    """Check if apt is installed."""
-    return shutil.which('apt') is not None or shutil.which('apt-get') is not None
-
-
-def is_linux() -> bool:
-    """Check if running on Linux."""
-    import platform
-    return platform.system().lower() == 'linux'
-
-
-class TestAptProviderHook(TestCase):
-    """Test the apt binary provider installation hook."""
-
-    def setUp(self):
-        """Set up test environment."""
-        self.temp_dir = tempfile.mkdtemp()
-
-    def tearDown(self):
-        """Clean up."""
-        shutil.rmtree(self.temp_dir, ignore_errors=True)
-
-    def test_hook_script_exists(self):
-        """Hook script should exist."""
-        self.assertTrue(INSTALL_HOOK and INSTALL_HOOK.exists(), f"Hook not found: {INSTALL_HOOK}")
-
-    def test_hook_skips_when_apt_not_allowed(self):
-        """Hook should skip when apt not in allowed binproviders."""
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=wget',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-                '--binproviders=pip,npm',  # apt not allowed
-            ],
-            capture_output=True,
-            text=True,
-            timeout=30
-        )
-
-        # Should exit cleanly (code 0) when apt not allowed
-        self.assertIn('apt provider not allowed', result.stderr)
-        self.assertEqual(result.returncode, 0)
-
-    @pytest.mark.skipif(not is_linux(), reason="apt only available on Linux")
-    def test_hook_detects_apt(self):
-        """Hook should detect apt binary when available."""
-        assert apt_available(), "apt not installed"
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=nonexistent-pkg-xyz123',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-            ],
-            capture_output=True,
-            text=True,
-            timeout=30
-        )
-
-        # Should not say apt is not available
-        self.assertNotIn('apt not available', result.stderr)
-
-    def test_hook_handles_overrides(self):
-        """Hook should accept overrides JSON."""
-        overrides = json.dumps({
-            'apt': {'packages': ['custom-package-name']}
-        })
-
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=test-pkg',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-                f'--overrides={overrides}',
-            ],
-            capture_output=True,
-            text=True,
-            timeout=30
-        )
-
-        # Should not crash parsing overrides
-        self.assertNotIn('Traceback', result.stderr)
-
-
-@pytest.mark.skipif(not is_linux(), reason="apt only available on Linux")
-class TestAptProviderSystemBinaries(TestCase):
-    """Test apt provider with system binaries."""
-
-    def test_detect_existing_binary(self):
-        """apt provider should detect already-installed system binaries."""
-        assert apt_available(), "apt not installed"
-        # Check for a binary that's almost certainly installed (like 'ls' or 'bash')
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=bash',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-            ],
-            capture_output=True,
-            text=True,
-            timeout=60
-        )
-
-        # Parse JSONL output
-        for line in result.stdout.split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'Binary' and record.get('name') == 'bash':
-                        # Found bash
-                        self.assertTrue(record.get('abspath'))
-                        self.assertTrue(Path(record['abspath']).exists())
-                        return
-                except json.JSONDecodeError:
-                    continue
-
-        # apt may not be able to "install" bash (already installed)
-        # Just verify no crash
-        self.assertNotIn('Traceback', result.stderr)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
--- a/archivebox/plugins/archivedotorg/config.json
+++ b/archivebox/plugins/archivedotorg/config.json
@@ -1,26 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "ARCHIVEDOTORG_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_ARCHIVEDOTORG", "USE_ARCHIVEDOTORG", "SUBMIT_ARCHIVEDOTORG"],
-      "description": "Submit URLs to archive.org Wayback Machine"
-    },
-    "ARCHIVEDOTORG_TIMEOUT": {
-      "type": "integer",
-      "default": 60,
-      "minimum": 10,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for archive.org submission in seconds"
-    },
-    "ARCHIVEDOTORG_USER_AGENT": {
-      "type": "string",
-      "default": "",
-      "x-fallback": "USER_AGENT",
-      "description": "User agent string"
-    }
-  }
-}
--- a/archivebox/plugins/archivedotorg/on_Snapshot__08_archivedotorg.bg.py
+++ b/archivebox/plugins/archivedotorg/on_Snapshot__08_archivedotorg.bg.py
@@ -1,154 +0,0 @@
-#!/usr/bin/env python3
-"""
-Submit a URL to archive.org for archiving.
-
-Usage: on_Snapshot__archivedotorg.bg.py --url=<url> --snapshot-id=<uuid>
-Output: Writes archive.org.txt to $PWD with the archived URL
-
-Environment variables:
-    ARCHIVEDOTORG_TIMEOUT: Timeout in seconds (default: 60)
-    USER_AGENT: User agent string
-
-    # Fallback to ARCHIVING_CONFIG values if ARCHIVEDOTORG_* not set:
-    TIMEOUT: Fallback timeout
-
-Note: This extractor uses the 'requests' library which is bundled with ArchiveBox.
-      It can run standalone if requests is installed: pip install requests
-"""
-
-import json
-import os
-import sys
-from pathlib import Path
-
-import rich_click as click
-
-
-# Extractor metadata
-PLUGIN_NAME = 'archivedotorg'
-OUTPUT_DIR = '.'
-OUTPUT_FILE = 'archive.org.txt'
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-
-def get_env_int(name: str, default: int = 0) -> int:
-    try:
-        return int(get_env(name, str(default)))
-    except ValueError:
-        return default
-
-
-def submit_to_archivedotorg(url: str) -> tuple[bool, str | None, str]:
-    """
-    Submit URL to archive.org Wayback Machine.
-
-    Returns: (success, output_path, error_message)
-    """
-    def log(message: str) -> None:
-        print(f'[archivedotorg] {message}', file=sys.stderr)
-
-    try:
-        import requests
-    except ImportError:
-        return False, None, 'requests library not installed'
-
-    timeout = get_env_int('ARCHIVEDOTORG_TIMEOUT') or get_env_int('TIMEOUT', 60)
-    user_agent = get_env('USER_AGENT', 'Mozilla/5.0 (compatible; ArchiveBox/1.0)')
-
-    submit_url = f'https://web.archive.org/save/{url}'
-    log(f'Submitting to Wayback Machine (timeout={timeout}s)')
-    log(f'GET {submit_url}')
-
-    try:
-        response = requests.get(
-            submit_url,
-            timeout=timeout,
-            headers={'User-Agent': user_agent},
-            allow_redirects=True,
-        )
-        log(f'HTTP {response.status_code} final_url={response.url}')
-
-        # Check for successful archive
-        content_location = response.headers.get('Content-Location', '')
-        x_archive_orig_url = response.headers.get('X-Archive-Orig-Url', '')
-        if content_location:
-            log(f'Content-Location: {content_location}')
-        if x_archive_orig_url:
-            log(f'X-Archive-Orig-Url: {x_archive_orig_url}')
-
-        # Build archive URL
-        if content_location:
-            archive_url = f'https://web.archive.org{content_location}'
-            Path(OUTPUT_FILE).write_text(archive_url, encoding='utf-8')
-            log(f'Saved archive URL -> {archive_url}')
-            return True, OUTPUT_FILE, ''
-        elif 'web.archive.org' in response.url:
-            # We were redirected to an archive page
-            Path(OUTPUT_FILE).write_text(response.url, encoding='utf-8')
-            log(f'Redirected to archive page -> {response.url}')
-            return True, OUTPUT_FILE, ''
-        else:
-            # Check for errors in response
-            if 'RobotAccessControlException' in response.text:
-                # Blocked by robots.txt - save submit URL for manual retry
-                Path(OUTPUT_FILE).write_text(submit_url, encoding='utf-8')
-                log('Blocked by robots.txt, saved submit URL for manual retry')
-                return True, OUTPUT_FILE, ''  # Consider this a soft success
-            elif response.status_code >= 400:
-                return False, None, f'HTTP {response.status_code}'
-            else:
-                # Save submit URL anyway
-                Path(OUTPUT_FILE).write_text(submit_url, encoding='utf-8')
-                log('No archive URL returned, saved submit URL for manual retry')
-                return True, OUTPUT_FILE, ''
-
-    except requests.Timeout:
-        return False, None, f'Request timed out after {timeout} seconds'
-    except requests.RequestException as e:
-        return False, None, f'{type(e).__name__}: {e}'
-    except Exception as e:
-        return False, None, f'{type(e).__name__}: {e}'
-
-
-@click.command()
-@click.option('--url', required=True, help='URL to submit to archive.org')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
-def main(url: str, snapshot_id: str):
-    """Submit a URL to archive.org for archiving."""
-
-    # Check if feature is enabled
-    if get_env('ARCHIVEDOTORG_ENABLED', 'True').lower() in ('false', '0', 'no', 'off'):
-        print('Skipping archive.org submission (ARCHIVEDOTORG_ENABLED=False)', file=sys.stderr)
-        # Temporary failure (config disabled) - NO JSONL emission
-        sys.exit(0)
-
-    try:
-        # Run extraction
-        success, output, error = submit_to_archivedotorg(url)
-
-        if success:
-            # Success - emit ArchiveResult with output file
-            result = {
-                'type': 'ArchiveResult',
-                'status': 'succeeded',
-                'output_str': output or '',
-            }
-            print(json.dumps(result))
-            sys.exit(0)
-        else:
-            # Transient error (network, timeout, HTTP error) - emit NO JSONL
-            # System will retry later
-            print(f'ERROR: {error}', file=sys.stderr)
-            sys.exit(1)
-
-    except Exception as e:
-        # Unexpected error - also transient, emit NO JSONL
-        print(f'ERROR: {type(e).__name__}: {e}', file=sys.stderr)
-        sys.exit(1)
-
-
-if __name__ == '__main__':
-    main()
--- a/archivebox/plugins/archivedotorg/templates/card.html
+++ b/archivebox/plugins/archivedotorg/templates/card.html
@@ -1,12 +0,0 @@
-{% load config_tags %}
-{% get_config "ARCHIVEDOTORG_ENABLED" as enabled %}
-{% if enabled %}
-<!-- Archive.org thumbnail - iframe preview of archived page -->
-<div class="extractor-thumbnail archivedotorg-thumbnail" style="width: 100%; height: 100px; overflow: hidden;">
-    <iframe src="{{ output_path }}"
-            style="width: 100%; height: 100px; border: none; pointer-events: none;"
-            loading="lazy"
-            sandbox="allow-same-origin">
-    </iframe>
-</div>
-{% endif %}
--- a/archivebox/plugins/archivedotorg/templates/icon.html
+++ b/archivebox/plugins/archivedotorg/templates/icon.html
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--archivedotorg" title="Archive.org"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M3 7h18"/><rect x="3" y="7" width="18" height="13" rx="2"/><path d="M9 12h6"/></svg></span>
--- a/archivebox/plugins/archivedotorg/tests/test_archivedotorg.py
+++ b/archivebox/plugins/archivedotorg/tests/test_archivedotorg.py
@@ -1,93 +0,0 @@
-"""
-Integration tests for archivedotorg plugin
-
-Tests verify standalone archive.org extractor execution.
-"""
-
-import json
-import subprocess
-import sys
-import tempfile
-from pathlib import Path
-import pytest
-
-PLUGIN_DIR = Path(__file__).parent.parent
-ARCHIVEDOTORG_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_archivedotorg.*'), None)
-TEST_URL = 'https://example.com'
-
-def test_hook_script_exists():
-    assert ARCHIVEDOTORG_HOOK.exists()
-
-def test_submits_to_archivedotorg():
-    with tempfile.TemporaryDirectory() as tmpdir:
-        result = subprocess.run(
-            [sys.executable, str(ARCHIVEDOTORG_HOOK), '--url', TEST_URL, '--snapshot-id', 'test789'],
-            cwd=tmpdir, capture_output=True, text=True, timeout=60
-        )
-
-        assert result.returncode in (0, 1)
-
-        # Parse clean JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        if result.returncode == 0:
-            # Success - should have ArchiveResult
-            assert result_json, "Should have ArchiveResult JSONL output on success"
-            assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-        else:
-            # Transient error - no JSONL output, just stderr
-            assert not result_json, "Should NOT emit JSONL on transient error"
-            assert result.stderr, "Should have error message in stderr"
-
-def test_config_save_archivedotorg_false_skips():
-    with tempfile.TemporaryDirectory() as tmpdir:
-        import os
-        env = os.environ.copy()
-        env['ARCHIVEDOTORG_ENABLED'] = 'False'
-
-        result = subprocess.run(
-            [sys.executable, str(ARCHIVEDOTORG_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
-            cwd=tmpdir, capture_output=True, text=True, env=env, timeout=30
-        )
-
-        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
-
-        # Feature disabled - temporary failure, should NOT emit JSONL
-        assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
-
-        # Should NOT emit any JSONL
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
-
-def test_handles_timeout():
-    with tempfile.TemporaryDirectory() as tmpdir:
-        import os
-        env = os.environ.copy()
-        env['TIMEOUT'] = '1'
-
-        result = subprocess.run(
-            [sys.executable, str(ARCHIVEDOTORG_HOOK), '--url', TEST_URL, '--snapshot-id', 'testtimeout'],
-            cwd=tmpdir, capture_output=True, text=True, env=env, timeout=30
-        )
-
-        # Timeout is a transient error - should exit 1 with no JSONL
-        assert result.returncode in (0, 1), "Should complete without hanging"
-
-        # If it timed out (exit 1), should have no JSONL output
-        if result.returncode == 1:
-            jsonl_lines = [line for line in result.stdout.strip().split('\n')
-                          if line.strip().startswith('{')]
-            assert len(jsonl_lines) == 0, "Should not emit JSONL on timeout (transient error)"
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
--- a/archivebox/plugins/brew/on_Binary__12_brew_install.py
+++ b/archivebox/plugins/brew/on_Binary__12_brew_install.py
@@ -1,87 +0,0 @@
-#!/usr/bin/env python3
-"""
-Install a binary using Homebrew package manager.
-
-Usage: on_Binary__install_using_brew_provider.py --binary-id=<uuid> --machine-id=<uuid> --name=<name> [--custom-cmd=<cmd>]
-Output: Binary JSONL record to stdout after installation
-
-Environment variables:
-    MACHINE_ID: Machine UUID (set by orchestrator)
-"""
-
-import json
-import os
-import sys
-
-import rich_click as click
-from abx_pkg import Binary, BrewProvider, BinProviderOverrides
-
-# Fix pydantic forward reference issue
-BrewProvider.model_rebuild()
-
-
-@click.command()
-@click.option('--machine-id', required=True, help="Machine UUID")
-@click.option('--binary-id', required=True, help="Dependency UUID")
-@click.option('--name', required=True, help="Binary name to install")
-@click.option('--binproviders', default='*', help="Allowed providers (comma-separated)")
-@click.option('--custom-cmd', default=None, help="Custom install command")
-@click.option('--overrides', default=None, help="JSON-encoded overrides dict")
-def main(binary_id: str, machine_id: str, name: str, binproviders: str, custom_cmd: str | None, overrides: str | None):
-    """Install binary using Homebrew."""
-
-    if binproviders != '*' and 'brew' not in binproviders.split(','):
-        click.echo(f"brew provider not allowed for {name}", err=True)
-        sys.exit(0)
-
-    # Use abx-pkg BrewProvider to install binary
-    provider = BrewProvider()
-    if not provider.INSTALLER_BIN:
-        click.echo("brew not available on this system", err=True)
-        sys.exit(1)
-
-    click.echo(f"Installing {name} via brew...", err=True)
-
-    try:
-        # Parse overrides if provided
-        overrides_dict = None
-        if overrides:
-            try:
-                overrides_dict = json.loads(overrides)
-                click.echo(f"Using custom install overrides: {overrides_dict}", err=True)
-            except json.JSONDecodeError:
-                click.echo(f"Warning: Failed to parse overrides JSON: {overrides}", err=True)
-
-        binary = Binary(name=name, binproviders=[provider], overrides=overrides_dict or {}).install()
-    except Exception as e:
-        click.echo(f"brew install failed: {e}", err=True)
-        sys.exit(1)
-
-    if not binary.abspath:
-        click.echo(f"{name} not found after brew install", err=True)
-        sys.exit(1)
-
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    # Output Binary JSONL record to stdout
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'abspath': str(binary.abspath),
-        'version': str(binary.version) if binary.version else '',
-        'sha256': binary.sha256 or '',
-        'binprovider': 'brew',
-        'machine_id': machine_id,
-        'binary_id': binary_id,
-    }
-    print(json.dumps(record))
-
-    # Log human-readable info to stderr
-    click.echo(f"Installed {name} at {binary.abspath}", err=True)
-    click.echo(f"  version: {binary.version}", err=True)
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
--- a/archivebox/plugins/brew/templates/icon.html
+++ b/archivebox/plugins/brew/templates/icon.html
--- a/archivebox/plugins/chrome/chrome_utils.js
+++ b/archivebox/plugins/chrome/chrome_utils.js
--- a/archivebox/plugins/chrome/config.json
+++ b/archivebox/plugins/chrome/config.json
@@ -1,157 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "CHROME_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["USE_CHROME"],
-      "description": "Enable Chromium browser integration for archiving"
-    },
-    "CHROME_BINARY": {
-      "type": "string",
-      "default": "chromium",
-      "x-aliases": ["CHROMIUM_BINARY", "GOOGLE_CHROME_BINARY"],
-      "description": "Path to Chromium binary"
-    },
-    "CHROME_NODE_BINARY": {
-      "type": "string",
-      "default": "node",
-      "x-fallback": "NODE_BINARY",
-      "description": "Path to Node.js binary (for Puppeteer)"
-    },
-    "CHROME_TIMEOUT": {
-      "type": "integer",
-      "default": 60,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for Chrome operations in seconds"
-    },
-    "CHROME_HEADLESS": {
-      "type": "boolean",
-      "default": true,
-      "description": "Run Chrome in headless mode"
-    },
-    "CHROME_SANDBOX": {
-      "type": "boolean",
-      "default": true,
-      "description": "Enable Chrome sandbox (disable in Docker with --no-sandbox)"
-    },
-    "CHROME_RESOLUTION": {
-      "type": "string",
-      "default": "1440,2000",
-      "pattern": "^\\d+,\\d+$",
-      "x-fallback": "RESOLUTION",
-      "description": "Browser viewport resolution (width,height)"
-    },
-    "CHROME_USER_DATA_DIR": {
-      "type": "string",
-      "default": "",
-      "description": "Path to Chrome user data directory for persistent sessions (derived from ACTIVE_PERSONA if not set)"
-    },
-    "CHROME_USER_AGENT": {
-      "type": "string",
-      "default": "",
-      "x-fallback": "USER_AGENT",
-      "description": "User agent string for Chrome"
-    },
-    "CHROME_ARGS": {
-      "type": "array",
-      "items": {"type": "string"},
-      "default": [
-        "--no-first-run",
-        "--no-default-browser-check",
-        "--disable-default-apps",
-        "--disable-sync",
-        "--disable-infobars",
-        "--disable-blink-features=AutomationControlled",
-        "--disable-component-update",
-        "--disable-domain-reliability",
-        "--disable-breakpad",
-        "--disable-client-side-phishing-detection",
-        "--disable-hang-monitor",
-        "--disable-speech-synthesis-api",
-        "--disable-speech-api",
-        "--disable-print-preview",
-        "--disable-notifications",
-        "--disable-desktop-notifications",
-        "--disable-popup-blocking",
-        "--disable-prompt-on-repost",
-        "--disable-external-intent-requests",
-        "--disable-session-crashed-bubble",
-        "--disable-search-engine-choice-screen",
-        "--disable-datasaver-prompt",
-        "--ash-no-nudges",
-        "--hide-crash-restore-bubble",
-        "--suppress-message-center-popups",
-        "--noerrdialogs",
-        "--no-pings",
-        "--silent-debugger-extension-api",
-        "--deny-permission-prompts",
-        "--safebrowsing-disable-auto-update",
-        "--metrics-recording-only",
-        "--password-store=basic",
-        "--use-mock-keychain",
-        "--disable-cookie-encryption",
-        "--font-render-hinting=none",
-        "--force-color-profile=srgb",
-        "--disable-partial-raster",
-        "--disable-skia-runtime-opts",
-        "--disable-2d-canvas-clip-aa",
-        "--enable-webgl",
-        "--hide-scrollbars",
-        "--export-tagged-pdf",
-        "--generate-pdf-document-outline",
-        "--disable-lazy-loading",
-        "--disable-renderer-backgrounding",
-        "--disable-background-networking",
-        "--disable-background-timer-throttling",
-        "--disable-backgrounding-occluded-windows",
-        "--disable-ipc-flooding-protection",
-        "--disable-extensions-http-throttling",
-        "--disable-field-trial-config",
-        "--disable-back-forward-cache",
-        "--autoplay-policy=no-user-gesture-required",
-        "--disable-gesture-requirement-for-media-playback",
-        "--lang=en-US,en;q=0.9",
-        "--log-level=2",
-        "--enable-logging=stderr"
-      ],
-      "x-aliases": ["CHROME_DEFAULT_ARGS"],
-      "description": "Default Chrome command-line arguments (static flags only, dynamic args like --user-data-dir are added at runtime)"
-    },
-    "CHROME_ARGS_EXTRA": {
-      "type": "array",
-      "items": {"type": "string"},
-      "default": [],
-      "x-aliases": ["CHROME_EXTRA_ARGS"],
-      "description": "Extra arguments to append to Chrome command (for user customization)"
-    },
-    "CHROME_PAGELOAD_TIMEOUT": {
-      "type": "integer",
-      "default": 60,
-      "minimum": 5,
-      "x-fallback": "CHROME_TIMEOUT",
-      "description": "Timeout for page navigation/load in seconds"
-    },
-    "CHROME_WAIT_FOR": {
-      "type": "string",
-      "default": "networkidle2",
-      "enum": ["domcontentloaded", "load", "networkidle0", "networkidle2"],
-      "description": "Page load completion condition (domcontentloaded, load, networkidle0, networkidle2)"
-    },
-    "CHROME_DELAY_AFTER_LOAD": {
-      "type": "number",
-      "default": 0,
-      "minimum": 0,
-      "description": "Extra delay in seconds after page load completes before archiving (useful for JS-heavy SPAs)"
-    },
-    "CHROME_CHECK_SSL_VALIDITY": {
-      "type": "boolean",
-      "default": true,
-      "x-fallback": "CHECK_SSL_VALIDITY",
-      "description": "Whether to verify SSL certificates (disable for self-signed certs)"
-    }
-  }
-}
--- a/archivebox/plugins/chrome/extract_cookies.js
+++ b/archivebox/plugins/chrome/extract_cookies.js
@@ -1,254 +0,0 @@
-#!/usr/bin/env node
-/**
- * Extract cookies from Chrome via CDP and write to Netscape cookies.txt format.
- *
- * This script launches Chrome with a given user data directory, connects via CDP,
- * extracts all cookies, and writes them to a cookies.txt file in Netscape format.
- *
- * Usage:
- *   CHROME_USER_DATA_DIR=/path/to/profile COOKIES_OUTPUT_FILE=/path/to/cookies.txt node extract_cookies.js
- *
- * Environment variables:
- *   CHROME_USER_DATA_DIR: Path to Chrome user data directory (required)
- *   COOKIES_OUTPUT_FILE: Path to output cookies.txt file (required)
- *   CHROME_HEADLESS: Run in headless mode (default: true)
- *   NODE_MODULES_DIR: Path to node_modules for module resolution
- */
-
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) {
-    module.paths.unshift(process.env.NODE_MODULES_DIR);
-}
-
-const fs = require('fs');
-const path = require('path');
-const {
-    findAnyChromiumBinary,
-    launchChromium,
-    killChrome,
-    getEnv,
-} = require('./chrome_utils.js');
-
-/**
- * Convert a cookie object to Netscape cookies.txt format line.
- *
- * Format: domain  includeSubdomains  path  secure  expiry  name  value
- *
- * @param {Object} cookie - CDP cookie object
- * @returns {string} - Netscape format cookie line
- */
-function cookieToNetscape(cookie) {
-    // Domain: prefix with . for domain cookies (not host-only)
-    let domain = cookie.domain;
-    if (!domain.startsWith('.') && !cookie.hostOnly) {
-        domain = '.' + domain;
-    }
-
-    // Include subdomains: TRUE if domain cookie (starts with .)
-    const includeSubdomains = domain.startsWith('.') ? 'TRUE' : 'FALSE';
-
-    // Path
-    const cookiePath = cookie.path || '/';
-
-    // Secure flag
-    const secure = cookie.secure ? 'TRUE' : 'FALSE';
-
-    // Expiry timestamp (0 for session cookies)
-    let expiry = '0';
-    if (cookie.expires && cookie.expires > 0) {
-        // CDP returns expiry in seconds since epoch
-        expiry = Math.floor(cookie.expires).toString();
-    }
-
-    // Name and value
-    const name = cookie.name;
-    const value = cookie.value;
-
-    return `${domain}\t${includeSubdomains}\t${cookiePath}\t${secure}\t${expiry}\t${name}\t${value}`;
-}
-
-/**
- * Write cookies to Netscape cookies.txt format file.
- *
- * @param {Array} cookies - Array of CDP cookie objects
- * @param {string} outputPath - Path to output file
- */
-function writeCookiesFile(cookies, outputPath) {
-    const lines = [
-        '# Netscape HTTP Cookie File',
-        '# https://curl.se/docs/http-cookies.html',
-        '# This file was generated by ArchiveBox persona cookie extraction',
-        '#',
-        '# Format: domain\\tincludeSubdomains\\tpath\\tsecure\\texpiry\\tname\\tvalue',
-        '',
-    ];
-
-    for (const cookie of cookies) {
-        lines.push(cookieToNetscape(cookie));
-    }
-
-    fs.writeFileSync(outputPath, lines.join('\n') + '\n');
-}
-
-async function main() {
-    const userDataDir = getEnv('CHROME_USER_DATA_DIR');
-    const outputFile = getEnv('COOKIES_OUTPUT_FILE');
-
-    if (!userDataDir) {
-        console.error('ERROR: CHROME_USER_DATA_DIR environment variable is required');
-        process.exit(1);
-    }
-
-    if (!outputFile) {
-        console.error('ERROR: COOKIES_OUTPUT_FILE environment variable is required');
-        process.exit(1);
-    }
-
-    if (!fs.existsSync(userDataDir)) {
-        console.error(`ERROR: User data directory does not exist: ${userDataDir}`);
-        process.exit(1);
-    }
-
-    const binary = findAnyChromiumBinary();
-    if (!binary) {
-        console.error('ERROR: Chromium-based browser binary not found');
-        process.exit(1);
-    }
-
-    console.error(`[*] Extracting cookies from: ${userDataDir}`);
-    console.error(`[*] Output file: ${outputFile}`);
-    console.error(`[*] Using browser: ${binary}`);
-
-    // Create a temporary output directory for Chrome files
-    const outputDir = fs.mkdtempSync(path.join(require('os').tmpdir(), 'chrome-cookies-'));
-
-    let chromePid = null;
-
-    try {
-        // Launch Chrome with the user data directory
-        const result = await launchChromium({
-            binary,
-            outputDir,
-            userDataDir,
-            headless: true,
-            killZombies: false,  // Don't kill other Chrome instances
-        });
-
-        if (!result.success) {
-            console.error(`ERROR: Failed to launch Chrome: ${result.error}`);
-            process.exit(1);
-        }
-
-        chromePid = result.pid;
-        const cdpUrl = result.cdpUrl;
-        const port = result.port;
-
-        console.error(`[*] Chrome launched (PID: ${chromePid})`);
-        console.error(`[*] CDP URL: ${cdpUrl}`);
-
-        // Connect to CDP and get cookies
-        const http = require('http');
-
-        // Use CDP directly via HTTP to get all cookies
-        const getCookies = () => {
-            return new Promise((resolve, reject) => {
-                const req = http.request(
-                    {
-                        hostname: '127.0.0.1',
-                        port: port,
-                        path: '/json/list',
-                        method: 'GET',
-                    },
-                    (res) => {
-                        let data = '';
-                        res.on('data', (chunk) => (data += chunk));
-                        res.on('end', () => {
-                            try {
-                                const targets = JSON.parse(data);
-                                // Find a page target
-                                const pageTarget = targets.find(t => t.type === 'page') || targets[0];
-                                if (!pageTarget) {
-                                    reject(new Error('No page target found'));
-                                    return;
-                                }
-
-                                // Connect via WebSocket and send CDP command
-                                const WebSocket = require('ws');
-                                const ws = new WebSocket(pageTarget.webSocketDebuggerUrl);
-
-                                ws.on('open', () => {
-                                    ws.send(JSON.stringify({
-                                        id: 1,
-                                        method: 'Network.getAllCookies',
-                                    }));
-                                });
-
-                                ws.on('message', (message) => {
-                                    const response = JSON.parse(message);
-                                    if (response.id === 1) {
-                                        ws.close();
-                                        if (response.result && response.result.cookies) {
-                                            resolve(response.result.cookies);
-                                        } else {
-                                            reject(new Error('Failed to get cookies: ' + JSON.stringify(response)));
-                                        }
-                                    }
-                                });
-
-                                ws.on('error', (err) => {
-                                    reject(err);
-                                });
-                            } catch (e) {
-                                reject(e);
-                            }
-                        });
-                    }
-                );
-
-                req.on('error', reject);
-                req.end();
-            });
-        };
-
-        // Wait a moment for the browser to fully initialize
-        await new Promise(r => setTimeout(r, 2000));
-
-        console.error('[*] Fetching cookies via CDP...');
-        const cookies = await getCookies();
-
-        console.error(`[+] Retrieved ${cookies.length} cookies`);
-
-        // Write cookies to file
-        writeCookiesFile(cookies, outputFile);
-        console.error(`[+] Wrote cookies to: ${outputFile}`);
-
-        // Clean up
-        await killChrome(chromePid, outputDir);
-        chromePid = null;
-
-        // Remove temp directory
-        fs.rmSync(outputDir, { recursive: true, force: true });
-
-        console.error('[+] Cookie extraction complete');
-        process.exit(0);
-
-    } catch (error) {
-        console.error(`ERROR: ${error.message}`);
-
-        // Clean up on error
-        if (chromePid) {
-            await killChrome(chromePid, outputDir);
-        }
-
-        try {
-            fs.rmSync(outputDir, { recursive: true, force: true });
-        } catch (e) {}
-
-        process.exit(1);
-    }
-}
-
-main().catch((e) => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
--- a/archivebox/plugins/chrome/on_Crawl__70_chrome_install.py
+++ b/archivebox/plugins/chrome/on_Crawl__70_chrome_install.py
@@ -1,34 +0,0 @@
-#!/usr/bin/env python3
-"""
-Emit Chromium Binary dependency for the crawl.
-
-NOTE: We use Chromium instead of Chrome because Chrome 137+ removed support for
--load-extension and --disable-extensions-except flags, which are needed for
-loading unpacked extensions in headless mode.
-"""
-
-import json
-import os
-import sys
-
-
-def main():
-    # Check if Chrome is enabled
-    chrome_enabled = os.environ.get('CHROME_ENABLED', 'true').lower() not in ('false', '0', 'no', 'off')
-    if not chrome_enabled:
-        sys.exit(0)
-
-    record = {
-        'type': 'Binary',
-        'name': 'chromium',
-        'binproviders': 'puppeteer,env',
-        'overrides': {
-            'puppeteer': ['chromium@latest', '--install-deps'],
-        },
-    }
-    print(json.dumps(record))
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
--- a/archivebox/plugins/chrome/on_Crawl__90_chrome_launch.bg.js
+++ b/archivebox/plugins/chrome/on_Crawl__90_chrome_launch.bg.js
@@ -1,427 +0,0 @@
-#!/usr/bin/env node
-/**
- * Launch a shared Chromium browser session for the entire crawl.
- *
- * This runs once per crawl and keeps Chromium alive for all snapshots to share.
- * Each snapshot creates its own tab via on_Snapshot__10_chrome_tab.bg.js.
- *
- * NOTE: We use Chromium instead of Chrome because Chrome 137+ removed support for
- * --load-extension and --disable-extensions-except flags.
- *
- * Usage: on_Crawl__90_chrome_launch.bg.js --crawl-id=<uuid> --source-url=<url>
- * Output: Writes to current directory (executor creates chrome/ dir):
- *   - cdp_url.txt: WebSocket URL for CDP connection
- *   - chrome.pid: Chromium process ID (for cleanup)
- *   - port.txt: Debug port number
- *   - extensions.json: Loaded extensions metadata
- *
- * Environment variables:
- *     NODE_MODULES_DIR: Path to node_modules directory for module resolution
- *     CHROME_BINARY: Path to Chromium binary (falls back to auto-detection)
- *     CHROME_RESOLUTION: Page resolution (default: 1440,2000)
- *     CHROME_HEADLESS: Run in headless mode (default: true)
- *     CHROME_CHECK_SSL_VALIDITY: Whether to check SSL certificates (default: true)
- *     CHROME_EXTENSIONS_DIR: Directory containing Chrome extensions
- */
-
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) {
-    module.paths.unshift(process.env.NODE_MODULES_DIR);
-}
-
-const fs = require('fs');
-const path = require('path');
-const http = require('http');
-const puppeteer = require('puppeteer');
-const {
-    findChromium,
-    launchChromium,
-    killChrome,
-    getEnv,
-    getEnvBool,
-    getExtensionId,
-    writePidWithMtime,
-    getExtensionsDir,
-} = require('./chrome_utils.js');
-
-// Extractor metadata
-const PLUGIN_NAME = 'chrome_launch';
-const OUTPUT_DIR = '.';
-
-// Global state for cleanup
-let chromePid = null;
-let browserInstance = null;
-
-function parseCookiesTxt(contents) {
-    const cookies = [];
-    let skipped = 0;
-
-    for (const rawLine of contents.split(/\r?\n/)) {
-        const line = rawLine.trim();
-        if (!line) continue;
-
-        let httpOnly = false;
-        let dataLine = line;
-
-        if (dataLine.startsWith('#HttpOnly_')) {
-            httpOnly = true;
-            dataLine = dataLine.slice('#HttpOnly_'.length);
-        } else if (dataLine.startsWith('#')) {
-            continue;
-        }
-
-        const parts = dataLine.split('\t');
-        if (parts.length < 7) {
-            skipped += 1;
-            continue;
-        }
-
-        const [domainRaw, includeSubdomainsRaw, pathRaw, secureRaw, expiryRaw, name, value] = parts;
-        if (!name || !domainRaw) {
-            skipped += 1;
-            continue;
-        }
-
-        const includeSubdomains = (includeSubdomainsRaw || '').toUpperCase() === 'TRUE';
-        let domain = domainRaw;
-        if (includeSubdomains && !domain.startsWith('.')) domain = `.${domain}`;
-        if (!includeSubdomains && domain.startsWith('.')) domain = domain.slice(1);
-
-        const cookie = {
-            name,
-            value,
-            domain,
-            path: pathRaw || '/',
-            secure: (secureRaw || '').toUpperCase() === 'TRUE',
-            httpOnly,
-        };
-
-        const expires = parseInt(expiryRaw, 10);
-        if (!isNaN(expires) && expires > 0) {
-            cookie.expires = expires;
-        }
-
-        cookies.push(cookie);
-    }
-
-    return { cookies, skipped };
-}
-
-async function importCookiesFromFile(browser, cookiesFile, userDataDir) {
-    if (!cookiesFile) return;
-
-    if (!fs.existsSync(cookiesFile)) {
-        console.error(`[!] Cookies file not found: ${cookiesFile}`);
-        return;
-    }
-
-    let contents = '';
-    try {
-        contents = fs.readFileSync(cookiesFile, 'utf-8');
-    } catch (e) {
-        console.error(`[!] Failed to read COOKIES_TXT_FILE: ${e.message}`);
-        return;
-    }
-
-    const { cookies, skipped } = parseCookiesTxt(contents);
-    if (cookies.length === 0) {
-        console.error('[!] No cookies found to import');
-        return;
-    }
-
-    console.error(`[*] Importing ${cookies.length} cookies from ${cookiesFile}...`);
-    if (skipped) {
-        console.error(`[*] Skipped ${skipped} malformed cookie line(s)`);
-    }
-    if (!userDataDir) {
-        console.error('[!] CHROME_USER_DATA_DIR not set; cookies will not persist beyond this session');
-    }
-
-    const page = await browser.newPage();
-    const client = await page.target().createCDPSession();
-    await client.send('Network.enable');
-
-    const chunkSize = 200;
-    let imported = 0;
-    for (let i = 0; i < cookies.length; i += chunkSize) {
-        const chunk = cookies.slice(i, i + chunkSize);
-        try {
-            await client.send('Network.setCookies', { cookies: chunk });
-            imported += chunk.length;
-        } catch (e) {
-            console.error(`[!] Failed to import cookies ${i + 1}-${i + chunk.length}: ${e.message}`);
-        }
-    }
-
-    await page.close();
-    console.error(`[+] Imported ${imported}/${cookies.length} cookies`);
-}
-
-function getPortFromCdpUrl(cdpUrl) {
-    if (!cdpUrl) return null;
-    const match = cdpUrl.match(/:(\d+)\/devtools\//);
-    return match ? match[1] : null;
-}
-
-async function fetchDevtoolsTargets(cdpUrl) {
-    const port = getPortFromCdpUrl(cdpUrl);
-    if (!port) return [];
-
-    const urlPath = '/json/list';
-    return new Promise((resolve, reject) => {
-        const req = http.get(
-            { hostname: '127.0.0.1', port, path: urlPath },
-            (res) => {
-                let data = '';
-                res.on('data', (chunk) => (data += chunk));
-                res.on('end', () => {
-                    try {
-                        const targets = JSON.parse(data);
-                        resolve(Array.isArray(targets) ? targets : []);
-                    } catch (e) {
-                        reject(e);
-                    }
-                });
-            }
-        );
-        req.on('error', reject);
-    });
-}
-
-async function discoverExtensionTargets(cdpUrl, installedExtensions) {
-    const builtinIds = [
-        'nkeimhogjdpnpccoofpliimaahmaaome',
-        'fignfifoniblkonapihmkfakmlgkbkcf',
-        'ahfgeienlihckogmohjhadlkjgocpleb',
-        'mhjfbmdgcfjbbpaeojofohoefgiehjai',
-    ];
-
-    let targets = [];
-    for (let i = 0; i < 10; i += 1) {
-        try {
-            targets = await fetchDevtoolsTargets(cdpUrl);
-            if (targets.length > 0) break;
-        } catch (e) {
-            // Ignore and retry
-        }
-        await new Promise(r => setTimeout(r, 500));
-    }
-
-    const customExtTargets = targets.filter(t => {
-        const url = t.url || '';
-        if (!url.startsWith('chrome-extension://')) return false;
-        const extId = url.split('://')[1].split('/')[0];
-        return !builtinIds.includes(extId);
-    });
-
-    console.error(`[+] Found ${customExtTargets.length} custom extension target(s) via /json/list`);
-
-    for (const target of customExtTargets) {
-        const url = target.url || '';
-        const extId = url.split('://')[1].split('/')[0];
-        console.error(`[+] Extension target: ${extId} (${target.type || 'unknown'})`);
-    }
-
-    const runtimeIds = new Set(customExtTargets.map(t => (t.url || '').split('://')[1].split('/')[0]));
-    for (const ext of installedExtensions) {
-        if (ext.id) {
-            ext.loaded = runtimeIds.has(ext.id);
-        }
-    }
-
-    if (customExtTargets.length === 0 && installedExtensions.length > 0) {
-        console.error(`[!] Warning: No custom extensions detected. Extension loading may have failed.`);
-        console.error(`[!] Make sure you are using Chromium, not Chrome (Chrome 137+ removed --load-extension support)`);
-    }
-}
-
-// Parse command line arguments
-function parseArgs() {
-    const args = {};
-    process.argv.slice(2).forEach((arg) => {
-        if (arg.startsWith('--')) {
-            const [key, ...valueParts] = arg.slice(2).split('=');
-            args[key.replace(/-/g, '_')] = valueParts.join('=') || true;
-        }
-    });
-    return args;
-}
-
-// Cleanup handler for SIGTERM
-async function cleanup() {
-    console.error('[*] Cleaning up Chrome session...');
-
-    // Try graceful browser close first
-    if (browserInstance) {
-        try {
-            console.error('[*] Closing browser gracefully...');
-            await browserInstance.close();
-            browserInstance = null;
-            console.error('[+] Browser closed gracefully');
-        } catch (e) {
-            console.error(`[!] Graceful close failed: ${e.message}`);
-        }
-    }
-
-    // Kill Chrome process
-    if (chromePid) {
-        await killChrome(chromePid, OUTPUT_DIR);
-    }
-
-    process.exit(0);
-}
-
-// Register signal handlers
-process.on('SIGTERM', cleanup);
-process.on('SIGINT', cleanup);
-
-async function main() {
-    const args = parseArgs();
-    const crawlId = args.crawl_id;
-
-    try {
-        const binary = findChromium();
-        if (!binary) {
-            console.error('ERROR: Chromium binary not found');
-            console.error('DEPENDENCY_NEEDED=chromium');
-            console.error('BIN_PROVIDERS=puppeteer,env,playwright,apt,brew');
-            console.error('INSTALL_HINT=npx @puppeteer/browsers install chromium@latest');
-            process.exit(1);
-        }
-
-        // Get Chromium version
-        let version = '';
-        try {
-            const { execSync } = require('child_process');
-            version = execSync(`"${binary}" --version`, { encoding: 'utf8', timeout: 5000 })
-                .trim()
-                .slice(0, 64);
-        } catch (e) {}
-
-        console.error(`[*] Using browser: ${binary}`);
-        if (version) console.error(`[*] Version: ${version}`);
-
-        // Load installed extensions
-        const extensionsDir = getExtensionsDir();
-        const userDataDir = getEnv('CHROME_USER_DATA_DIR');
-        const cookiesFile = getEnv('COOKIES_TXT_FILE') || getEnv('COOKIES_FILE');
-
-        if (userDataDir) {
-            console.error(`[*] Using user data dir: ${userDataDir}`);
-        }
-        if (cookiesFile) {
-            console.error(`[*] Using cookies file: ${cookiesFile}`);
-        }
-
-        const installedExtensions = [];
-        const extensionPaths = [];
-        if (fs.existsSync(extensionsDir)) {
-            const files = fs.readdirSync(extensionsDir);
-            for (const file of files) {
-                if (file.endsWith('.extension.json')) {
-                    try {
-                        const extPath = path.join(extensionsDir, file);
-                        const extData = JSON.parse(fs.readFileSync(extPath, 'utf-8'));
-                        if (extData.unpacked_path && fs.existsSync(extData.unpacked_path)) {
-                            installedExtensions.push(extData);
-                            extensionPaths.push(extData.unpacked_path);
-                            console.error(`[*] Loading extension: ${extData.name || file}`);
-                        }
-                    } catch (e) {
-                        console.warn(`[!] Skipping invalid extension cache: ${file}`);
-                    }
-                }
-            }
-        }
-
-        if (installedExtensions.length > 0) {
-            console.error(`[+] Found ${installedExtensions.length} extension(s) to load`);
-        }
-
-        // Ensure extension IDs are available without chrome://extensions
-        for (const ext of installedExtensions) {
-            if (!ext.id && ext.unpacked_path) {
-                try {
-                    ext.id = getExtensionId(ext.unpacked_path);
-                } catch (e) {
-                    console.error(`[!] Failed to compute extension id for ${ext.name}: ${e.message}`);
-                }
-            }
-        }
-
-        // Note: PID file is written by run_hook() with hook-specific name
-        // Snapshot.cleanup() kills all *.pid processes when done
-        if (!fs.existsSync(OUTPUT_DIR)) {
-            fs.mkdirSync(OUTPUT_DIR, { recursive: true });
-        }
-
-        // Launch Chromium using consolidated function
-        // userDataDir is derived from ACTIVE_PERSONA by get_config() if not explicitly set
-        const result = await launchChromium({
-            binary,
-            outputDir: OUTPUT_DIR,
-            userDataDir,
-            extensionPaths,
-        });
-
-        if (!result.success) {
-            console.error(`ERROR: ${result.error}`);
-            process.exit(1);
-        }
-
-        chromePid = result.pid;
-        const cdpUrl = result.cdpUrl;
-
-        // Discover extension targets at launch (no chrome://extensions)
-        if (extensionPaths.length > 0) {
-            await new Promise(r => setTimeout(r, 2000));
-            console.error('[*] Discovering extension targets via devtools /json/list...');
-            await discoverExtensionTargets(cdpUrl, installedExtensions);
-        }
-
-        // Only connect to CDP when cookies import is needed to reduce crash risk.
-        if (cookiesFile) {
-            console.error(`[*] Connecting puppeteer to CDP for cookie import...`);
-            const browser = await puppeteer.connect({
-                browserWSEndpoint: cdpUrl,
-                defaultViewport: null,
-            });
-            browserInstance = browser;
-
-            // Import cookies into Chrome profile at crawl start
-            await importCookiesFromFile(browser, cookiesFile, userDataDir);
-
-            try {
-                browser.disconnect();
-            } catch (e) {}
-            browserInstance = null;
-        } else {
-            console.error('[*] Skipping puppeteer CDP connection (no cookies to import)');
-        }
-
-        // Write extensions metadata with actual IDs
-        if (installedExtensions.length > 0) {
-            fs.writeFileSync(
-                path.join(OUTPUT_DIR, 'extensions.json'),
-                JSON.stringify(installedExtensions, null, 2)
-            );
-        }
-
-        console.error(`[+] Chromium session started for crawl ${crawlId}`);
-        console.error(`[+] CDP URL: ${cdpUrl}`);
-        console.error(`[+] PID: ${chromePid}`);
-
-        // Stay alive to handle cleanup on SIGTERM
-        console.log('[*] Chromium launch hook staying alive to handle cleanup...');
-        setInterval(() => {}, 1000000);
-
-    } catch (e) {
-        console.error(`ERROR: ${e.name}: ${e.message}`);
-        process.exit(1);
-    }
-}
-
-main().catch((e) => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
--- a/archivebox/plugins/chrome/on_Snapshot__10_chrome_tab.bg.js
+++ b/archivebox/plugins/chrome/on_Snapshot__10_chrome_tab.bg.js
@@ -1,264 +0,0 @@
-#!/usr/bin/env node
-/**
- * Create a Chrome tab for this snapshot in the shared crawl Chrome session.
- *
- * Connects to the crawl-level Chrome session (from on_Crawl__90_chrome_launch.bg.js)
- * and creates a new tab. This hook does NOT launch its own Chrome instance.
- *
- * Usage: on_Snapshot__10_chrome_tab.bg.js --url=<url> --snapshot-id=<uuid> --crawl-id=<uuid>
- * Output: Creates chrome/ directory under snapshot output dir with:
- *   - cdp_url.txt: WebSocket URL for CDP connection
- *   - chrome.pid: Chrome process ID (from crawl)
- *   - target_id.txt: Target ID of this snapshot's tab
- *   - url.txt: The URL to be navigated to
- *
- * Environment variables:
- *     CRAWL_OUTPUT_DIR: Crawl output directory (to find crawl's Chrome session)
- *     CHROME_BINARY: Path to Chromium binary (optional, for version info)
- *
- * This is a background hook that stays alive until SIGTERM so the tab
- * can be closed cleanly at the end of the snapshot run.
- */
-
-const fs = require('fs');
-const path = require('path');
-const { execSync } = require('child_process');
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-
-const puppeteer = require('puppeteer');
-const { getEnv, getEnvInt } = require('./chrome_utils.js');
-
-// Extractor metadata
-const PLUGIN_NAME = 'chrome_tab';
-const OUTPUT_DIR = '.';  // Hook already runs in chrome/ output directory
-const CHROME_SESSION_DIR = '.';
-const CHROME_SESSION_REQUIRED_ERROR = 'No Chrome session found (chrome plugin must run first)';
-
-let finalStatus = 'failed';
-let finalOutput = '';
-let finalError = '';
-let cmdVersion = '';
-let finalized = false;
-
-// Parse command line arguments
-function parseArgs() {
-    const args = {};
-    process.argv.slice(2).forEach(arg => {
-        if (arg.startsWith('--')) {
-            const [key, ...valueParts] = arg.slice(2).split('=');
-            args[key.replace(/-/g, '_')] = valueParts.join('=') || true;
-        }
-    });
-    return args;
-}
-
-function emitResult(statusOverride) {
-    if (finalized) return;
-    finalized = true;
-
-    const status = statusOverride || finalStatus;
-    const outputStr = status === 'succeeded'
-        ? finalOutput
-        : (finalError || finalOutput || '');
-
-    const result = {
-        type: 'ArchiveResult',
-        status,
-        output_str: outputStr,
-    };
-    if (cmdVersion) {
-        result.cmd_version = cmdVersion;
-    }
-    console.log(JSON.stringify(result));
-}
-
-// Cleanup handler for SIGTERM - close this snapshot's tab
-async function cleanup(signal) {
-    if (signal) {
-        console.error(`\nReceived ${signal}, closing chrome tab...`);
-    }
-    try {
-        const cdpFile = path.join(OUTPUT_DIR, 'cdp_url.txt');
-        const targetIdFile = path.join(OUTPUT_DIR, 'target_id.txt');
-
-        if (fs.existsSync(cdpFile) && fs.existsSync(targetIdFile)) {
-            const cdpUrl = fs.readFileSync(cdpFile, 'utf8').trim();
-            const targetId = fs.readFileSync(targetIdFile, 'utf8').trim();
-
-            const browser = await puppeteer.connect({ browserWSEndpoint: cdpUrl });
-            const pages = await browser.pages();
-            const page = pages.find(p => p.target()._targetId === targetId);
-
-            if (page) {
-                await page.close();
-            }
-            browser.disconnect();
-        }
-    } catch (e) {
-        // Best effort
-    }
-    emitResult();
-    process.exit(finalStatus === 'succeeded' ? 0 : 1);
-}
-
-// Register signal handlers
-process.on('SIGTERM', () => cleanup('SIGTERM'));
-process.on('SIGINT', () => cleanup('SIGINT'));
-
-// Try to find the crawl's Chrome session
-function getCrawlChromeSession() {
-    // Use CRAWL_OUTPUT_DIR env var set by get_config() in configset.py
-    const crawlOutputDir = getEnv('CRAWL_OUTPUT_DIR', '');
-    if (!crawlOutputDir) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-
-    const crawlChromeDir = path.join(crawlOutputDir, 'chrome');
-    const cdpFile = path.join(crawlChromeDir, 'cdp_url.txt');
-    const pidFile = path.join(crawlChromeDir, 'chrome.pid');
-
-    if (!fs.existsSync(cdpFile)) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-    if (!fs.existsSync(pidFile)) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-
-    const cdpUrl = fs.readFileSync(cdpFile, 'utf-8').trim();
-    const pid = parseInt(fs.readFileSync(pidFile, 'utf-8').trim(), 10);
-    if (!cdpUrl) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-    if (!pid || Number.isNaN(pid)) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-
-    // Verify the process is still running
-    try {
-        process.kill(pid, 0);  // Signal 0 = check if process exists
-    } catch (e) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-
-    return { cdpUrl, pid };
-}
-
-async function waitForCrawlChromeSession(timeoutMs, intervalMs = 250) {
-    const startTime = Date.now();
-    let lastError = null;
-
-    while (Date.now() - startTime < timeoutMs) {
-        try {
-            return getCrawlChromeSession();
-        } catch (e) {
-            lastError = e;
-        }
-        await new Promise(resolve => setTimeout(resolve, intervalMs));
-    }
-
-    if (lastError) {
-        throw lastError;
-    }
-    throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-}
-
-// Create a new tab in an existing Chrome session
-async function createTabInExistingChrome(cdpUrl, url, pid) {
-    console.log(`[*] Connecting to existing Chrome session: ${cdpUrl}`);
-
-    // Connect Puppeteer to the running Chrome
-    const browser = await puppeteer.connect({
-        browserWSEndpoint: cdpUrl,
-        defaultViewport: null,
-    });
-
-    // Create a new tab for this snapshot
-    const page = await browser.newPage();
-
-    // Get the page target ID
-    const target = page.target();
-    const targetId = target._targetId;
-
-    // Write session info
-    fs.writeFileSync(path.join(OUTPUT_DIR, 'cdp_url.txt'), cdpUrl);
-    fs.writeFileSync(path.join(OUTPUT_DIR, 'chrome.pid'), String(pid));
-    fs.writeFileSync(path.join(OUTPUT_DIR, 'target_id.txt'), targetId);
-    fs.writeFileSync(path.join(OUTPUT_DIR, 'url.txt'), url);
-
-    // Disconnect Puppeteer (Chrome and tab stay alive)
-    browser.disconnect();
-
-    return { success: true, output: OUTPUT_DIR, cdpUrl, targetId, pid };
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-    const snapshotId = args.snapshot_id;
-    const crawlId = args.crawl_id || getEnv('CRAWL_ID', '');
-
-    if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__10_chrome_tab.bg.js --url=<url> --snapshot-id=<uuid> [--crawl-id=<uuid>]');
-        process.exit(1);
-    }
-
-    let status = 'failed';
-    let output = '';
-    let error = '';
-    let version = '';
-
-    try {
-        // Get Chrome version
-        try {
-            const binary = getEnv('CHROME_BINARY', '').trim();
-            if (binary) {
-                version = execSync(`"${binary}" --version`, { encoding: 'utf8', timeout: 5000 }).trim().slice(0, 64);
-            }
-        } catch (e) {
-            version = '';
-        }
-
-        // Try to use existing crawl Chrome session (wait for readiness)
-        const timeoutSeconds = getEnvInt('CHROME_TAB_TIMEOUT', getEnvInt('CHROME_TIMEOUT', getEnvInt('TIMEOUT', 60)));
-        const crawlSession = await waitForCrawlChromeSession(timeoutSeconds * 1000);
-        console.log(`[*] Found existing Chrome session from crawl ${crawlId}`);
-        const result = await createTabInExistingChrome(crawlSession.cdpUrl, url, crawlSession.pid);
-
-        if (result.success) {
-            status = 'succeeded';
-            output = result.output;
-            console.log(`[+] Chrome tab ready`);
-            console.log(`[+] CDP URL: ${result.cdpUrl}`);
-            console.log(`[+] Page target ID: ${result.targetId}`);
-        } else {
-            status = 'failed';
-            error = result.error;
-        }
-    } catch (e) {
-        error = `${e.name}: ${e.message}`;
-        status = 'failed';
-    }
-
-    if (error) {
-        console.error(`ERROR: ${error}`);
-    }
-
-    finalStatus = status;
-    finalOutput = output || '';
-    finalError = error || '';
-    cmdVersion = version || '';
-
-    if (status !== 'succeeded') {
-        emitResult(status);
-        process.exit(1);
-    }
-
-    console.log('[*] Chrome tab created, waiting for cleanup signal...');
-    await new Promise(() => {}); // Keep alive until SIGTERM
-}
-
-main().catch(e => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
--- a/archivebox/plugins/chrome/on_Snapshot__11_chrome_wait.js
+++ b/archivebox/plugins/chrome/on_Snapshot__11_chrome_wait.js
@@ -1,77 +0,0 @@
-#!/usr/bin/env node
-/**
- * Wait for Chrome session files to exist (cdp_url.txt + target_id.txt).
- *
- * This is a foreground hook that blocks until the Chrome tab is ready,
- * so downstream hooks can safely connect to CDP.
- *
- * Usage: on_Snapshot__11_chrome_wait.js --url=<url> --snapshot-id=<uuid>
- */
-
-const fs = require('fs');
-const path = require('path');
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-
-const {
-    getEnvInt,
-    waitForChromeSession,
-    readCdpUrl,
-    readTargetId,
-} = require('./chrome_utils.js');
-
-const CHROME_SESSION_DIR = '.';
-const CHROME_SESSION_REQUIRED_ERROR = 'No Chrome session found (chrome plugin must run first)';
-
-function parseArgs() {
-    const args = {};
-    process.argv.slice(2).forEach(arg => {
-        if (arg.startsWith('--')) {
-            const [key, ...valueParts] = arg.slice(2).split('=');
-            args[key.replace(/-/g, '_')] = valueParts.join('=') || true;
-        }
-    });
-    return args;
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-    const snapshotId = args.snapshot_id;
-
-    if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__11_chrome_wait.js --url=<url> --snapshot-id=<uuid>');
-        process.exit(1);
-    }
-
-    const timeoutSeconds = getEnvInt('CHROME_TAB_TIMEOUT', getEnvInt('CHROME_TIMEOUT', getEnvInt('TIMEOUT', 60)));
-    const timeoutMs = timeoutSeconds * 1000;
-
-    console.error(`[chrome_wait] Waiting for Chrome session (timeout=${timeoutSeconds}s)...`);
-
-    const ready = await waitForChromeSession(CHROME_SESSION_DIR, timeoutMs);
-    if (!ready) {
-        const error = CHROME_SESSION_REQUIRED_ERROR;
-        console.error(`[chrome_wait] ERROR: ${error}`);
-        console.log(JSON.stringify({ type: 'ArchiveResult', status: 'failed', output_str: error }));
-        process.exit(1);
-    }
-
-    const cdpUrl = readCdpUrl(CHROME_SESSION_DIR);
-    const targetId = readTargetId(CHROME_SESSION_DIR);
-    if (!cdpUrl || !targetId) {
-        const error = CHROME_SESSION_REQUIRED_ERROR;
-        console.error(`[chrome_wait] ERROR: ${error}`);
-        console.log(JSON.stringify({ type: 'ArchiveResult', status: 'failed', output_str: error }));
-        process.exit(1);
-    }
-
-    console.error(`[chrome_wait] Chrome session ready (cdp_url=${cdpUrl.slice(0, 32)}..., target_id=${targetId}).`);
-    console.log(JSON.stringify({ type: 'ArchiveResult', status: 'succeeded', output_str: 'chrome session ready' }));
-    process.exit(0);
-}
-
-main().catch(e => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
--- a/archivebox/plugins/chrome/on_Snapshot__30_chrome_navigate.js
+++ b/archivebox/plugins/chrome/on_Snapshot__30_chrome_navigate.js
@@ -1,225 +0,0 @@
-#!/usr/bin/env node
-/**
- * Navigate the Chrome browser to the target URL.
- *
- * This is a simple hook that ONLY navigates - nothing else.
- * Pre-load hooks (21-29) should set up their own CDP listeners.
- * Post-load hooks (31+) can then read from the loaded page.
- *
- * Usage: on_Snapshot__30_chrome_navigate.js --url=<url> --snapshot-id=<uuid>
- * Output: Writes page_loaded.txt marker when navigation completes
- *
- * Environment variables:
- *     CHROME_PAGELOAD_TIMEOUT: Timeout in seconds (default: 60)
- *     CHROME_DELAY_AFTER_LOAD: Extra delay after load in seconds (default: 0)
- *     CHROME_WAIT_FOR: Wait condition (default: networkidle2)
- */
-
-const fs = require('fs');
-const path = require('path');
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-const puppeteer = require('puppeteer');
-
-const PLUGIN_NAME = 'chrome_navigate';
-const CHROME_SESSION_DIR = '.';
-const OUTPUT_DIR = '.';
-const CHROME_SESSION_REQUIRED_ERROR = 'No Chrome session found (chrome plugin must run first)';
-
-function parseArgs() {
-    const args = {};
-    process.argv.slice(2).forEach(arg => {
-        if (arg.startsWith('--')) {
-            const [key, ...valueParts] = arg.slice(2).split('=');
-            args[key.replace(/-/g, '_')] = valueParts.join('=') || true;
-        }
-    });
-    return args;
-}
-
-function getEnv(name, defaultValue = '') {
-    return (process.env[name] || defaultValue).trim();
-}
-
-function getEnvInt(name, defaultValue = 0) {
-    const val = parseInt(getEnv(name, String(defaultValue)), 10);
-    return isNaN(val) ? defaultValue : val;
-}
-
-function getEnvFloat(name, defaultValue = 0) {
-    const val = parseFloat(getEnv(name, String(defaultValue)));
-    return isNaN(val) ? defaultValue : val;
-}
-
-async function waitForChromeTabOpen(timeoutMs = 60000) {
-    const cdpFile = path.join(CHROME_SESSION_DIR, 'cdp_url.txt');
-    const targetIdFile = path.join(CHROME_SESSION_DIR, 'target_id.txt');
-    const startTime = Date.now();
-
-    while (Date.now() - startTime < timeoutMs) {
-        if (fs.existsSync(cdpFile) && fs.existsSync(targetIdFile)) {
-            return true;
-        }
-        // Wait 100ms before checking again
-        await new Promise(resolve => setTimeout(resolve, 100));
-    }
-
-    return false;
-}
-
-function getCdpUrl() {
-    const cdpFile = path.join(CHROME_SESSION_DIR, 'cdp_url.txt');
-    if (!fs.existsSync(cdpFile)) return null;
-    return fs.readFileSync(cdpFile, 'utf8').trim();
-}
-
-function getPageId() {
-    const targetIdFile = path.join(CHROME_SESSION_DIR, 'target_id.txt');
-    if (!fs.existsSync(targetIdFile)) return null;
-    return fs.readFileSync(targetIdFile, 'utf8').trim();
-}
-
-function getWaitCondition() {
-    const waitFor = getEnv('CHROME_WAIT_FOR', 'networkidle2').toLowerCase();
-    const valid = ['domcontentloaded', 'load', 'networkidle0', 'networkidle2'];
-    return valid.includes(waitFor) ? waitFor : 'networkidle2';
-}
-
-function sleep(ms) {
-    return new Promise(resolve => setTimeout(resolve, ms));
-}
-
-async function navigate(url, cdpUrl) {
-    const timeout = (getEnvInt('CHROME_PAGELOAD_TIMEOUT') || getEnvInt('CHROME_TIMEOUT') || getEnvInt('TIMEOUT', 60)) * 1000;
-    const delayAfterLoad = getEnvFloat('CHROME_DELAY_AFTER_LOAD', 0) * 1000;
-    const waitUntil = getWaitCondition();
-    const targetId = getPageId();
-
-    let browser = null;
-    const navStartTime = Date.now();
-
-    try {
-        browser = await puppeteer.connect({ browserWSEndpoint: cdpUrl });
-
-        const pages = await browser.pages();
-        if (pages.length === 0) {
-            return { success: false, error: 'No pages found in browser', waitUntil, elapsed: Date.now() - navStartTime };
-        }
-
-        // Find page by target ID if available
-        let page = null;
-        if (targetId) {
-            page = pages.find(p => {
-                const target = p.target();
-                return target && target._targetId === targetId;
-            });
-        }
-        if (!page) {
-            page = pages[pages.length - 1];
-        }
-
-        // Navigate
-        console.log(`Navigating to ${url} (wait: ${waitUntil}, timeout: ${timeout}ms)`);
-        const response = await page.goto(url, { waitUntil, timeout });
-
-        // Optional delay
-        if (delayAfterLoad > 0) {
-            console.log(`Waiting ${delayAfterLoad}ms after load...`);
-            await sleep(delayAfterLoad);
-        }
-
-        const finalUrl = page.url();
-        const status = response ? response.status() : null;
-        const elapsed = Date.now() - navStartTime;
-
-        // Write navigation state as JSON
-        const navigationState = {
-            waitUntil,
-            elapsed,
-            url,
-            finalUrl,
-            status,
-            timestamp: new Date().toISOString()
-        };
-        fs.writeFileSync(path.join(OUTPUT_DIR, 'navigation.json'), JSON.stringify(navigationState, null, 2));
-
-        // Write marker files for backwards compatibility
-        fs.writeFileSync(path.join(OUTPUT_DIR, 'page_loaded.txt'), new Date().toISOString());
-        fs.writeFileSync(path.join(OUTPUT_DIR, 'final_url.txt'), finalUrl);
-
-        browser.disconnect();
-
-        return { success: true, finalUrl, status, waitUntil, elapsed };
-
-    } catch (e) {
-        if (browser) browser.disconnect();
-        const elapsed = Date.now() - navStartTime;
-        return { success: false, error: `${e.name}: ${e.message}`, waitUntil, elapsed };
-    }
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-    const snapshotId = args.snapshot_id;
-
-    if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__30_chrome_navigate.js --url=<url> --snapshot-id=<uuid>');
-        process.exit(1);
-    }
-
-    const startTs = new Date();
-    let status = 'failed';
-    let output = null;
-    let error = '';
-
-    // Wait for chrome tab to be open (up to 60s)
-    const tabOpen = await waitForChromeTabOpen(60000);
-    if (!tabOpen) {
-        console.error(`ERROR: ${CHROME_SESSION_REQUIRED_ERROR}`);
-        process.exit(1);
-    }
-
-    const cdpUrl = getCdpUrl();
-    if (!cdpUrl) {
-        console.error(`ERROR: ${CHROME_SESSION_REQUIRED_ERROR}`);
-        process.exit(1);
-    }
-
-    const result = await navigate(url, cdpUrl);
-
-    if (result.success) {
-        status = 'succeeded';
-        output = 'navigation.json';
-        console.log(`Page loaded: ${result.finalUrl} (HTTP ${result.status}) in ${result.elapsed}ms (waitUntil: ${result.waitUntil})`);
-    } else {
-        error = result.error;
-        // Save navigation state even on failure
-        const navigationState = {
-            waitUntil: result.waitUntil,
-            elapsed: result.elapsed,
-            url,
-            error: result.error,
-            timestamp: new Date().toISOString()
-        };
-        fs.writeFileSync(path.join(OUTPUT_DIR, 'navigation.json'), JSON.stringify(navigationState, null, 2));
-    }
-
-    const endTs = new Date();
-
-    if (error) console.error(`ERROR: ${error}`);
-
-    // Output clean JSONL (no RESULT_JSON= prefix)
-    console.log(JSON.stringify({
-        type: 'ArchiveResult',
-        status,
-        output_str: output || error || '',
-    }));
-
-    process.exit(status === 'succeeded' ? 0 : 1);
-}
-
-main().catch(e => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
--- a/archivebox/plugins/chrome/templates/icon.html
+++ b/archivebox/plugins/chrome/templates/icon.html
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--chrome" title="Chrome"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><rect x="3" y="4.5" width="18" height="15" rx="2"/><path d="M3 9h18"/><circle cx="7" cy="7" r="1" fill="currentColor" stroke="none"/><circle cx="11" cy="7" r="1" fill="currentColor" stroke="none"/></svg></span>
--- a/archivebox/plugins/chrome/tests/chrome_test_helpers.py
+++ b/archivebox/plugins/chrome/tests/chrome_test_helpers.py
--- a/archivebox/plugins/chrome/tests/test_chrome.py
+++ b/archivebox/plugins/chrome/tests/test_chrome.py
@@ -1,722 +0,0 @@
-"""
-Integration tests for chrome plugin
-
-Tests verify:
-1. Chromium install via @puppeteer/browsers
-2. Verify deps with abx-pkg
-3. Chrome hooks exist
-4. Chromium launches at crawl level
-5. Tab creation at snapshot level
-6. Tab navigation works
-7. Tab cleanup on SIGTERM
-8. Chromium cleanup on crawl end
-
-NOTE: We use Chromium instead of Chrome because Chrome 137+ removed support for
--load-extension and --disable-extensions-except flags, which are needed for
-loading unpacked extensions in headless mode.
-"""
-
-import json
-import os
-import signal
-import subprocess
-import sys
-import time
-from pathlib import Path
-import pytest
-import tempfile
-import shutil
-import platform
-
-from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-    get_test_env,
-    find_chromium_binary,
-    install_chromium_with_hooks,
-    CHROME_PLUGIN_DIR as PLUGIN_DIR,
-    CHROME_LAUNCH_HOOK,
-    CHROME_TAB_HOOK,
-    CHROME_NAVIGATE_HOOK,
-)
-
-def _get_cookies_via_cdp(port: int, env: dict) -> list[dict]:
-    node_script = r"""
-const http = require('http');
-const WebSocket = require('ws');
-const port = process.env.CDP_PORT;
-
-function getTargets() {
-  return new Promise((resolve, reject) => {
-    const req = http.get(`http://127.0.0.1:${port}/json/list`, (res) => {
-      let data = '';
-      res.on('data', (chunk) => (data += chunk));
-      res.on('end', () => {
-        try {
-          resolve(JSON.parse(data));
-        } catch (e) {
-          reject(e);
-        }
-      });
-    });
-    req.on('error', reject);
-  });
-}
-
-(async () => {
-  const targets = await getTargets();
-  const pageTarget = targets.find(t => t.type === 'page') || targets[0];
-  if (!pageTarget) {
-    console.error('No page target found');
-    process.exit(2);
-  }
-
-  const ws = new WebSocket(pageTarget.webSocketDebuggerUrl);
-  const timer = setTimeout(() => {
-    console.error('Timeout waiting for cookies');
-    process.exit(3);
-  }, 10000);
-
-  ws.on('open', () => {
-    ws.send(JSON.stringify({ id: 1, method: 'Network.getAllCookies' }));
-  });
-
-  ws.on('message', (data) => {
-    const msg = JSON.parse(data);
-    if (msg.id === 1) {
-      clearTimeout(timer);
-      ws.close();
-      if (!msg.result || !msg.result.cookies) {
-        console.error('No cookies in response');
-        process.exit(4);
-      }
-      process.stdout.write(JSON.stringify(msg.result.cookies));
-      process.exit(0);
-    }
-  });
-
-  ws.on('error', (err) => {
-    console.error(String(err));
-    process.exit(5);
-  });
-})().catch((err) => {
-  console.error(String(err));
-  process.exit(1);
-});
-"""
-
-    result = subprocess.run(
-        ['node', '-e', node_script],
-        capture_output=True,
-        text=True,
-        timeout=30,
-        env=env | {'CDP_PORT': str(port)},
-    )
-    assert result.returncode == 0, f"Failed to read cookies via CDP: {result.stderr}\nStdout: {result.stdout}"
-    return json.loads(result.stdout or '[]')
-
-
-@pytest.fixture(scope="session", autouse=True)
-def ensure_chromium_and_puppeteer_installed(tmp_path_factory):
-    """Ensure Chromium and puppeteer are installed before running tests."""
-    if not os.environ.get('DATA_DIR'):
-        test_data_dir = tmp_path_factory.mktemp('chrome_test_data')
-        os.environ['DATA_DIR'] = str(test_data_dir)
-    env = get_test_env()
-
-    try:
-        chromium_binary = install_chromium_with_hooks(env)
-    except RuntimeError as e:
-        raise RuntimeError(str(e))
-
-    if not chromium_binary:
-        raise RuntimeError("Chromium not found after install")
-
-    os.environ['CHROME_BINARY'] = chromium_binary
-    for key in ('NODE_MODULES_DIR', 'NODE_PATH', 'PATH'):
-        if env.get(key):
-            os.environ[key] = env[key]
-
-
-def test_hook_scripts_exist():
-    """Verify chrome hooks exist."""
-    assert CHROME_LAUNCH_HOOK.exists(), f"Hook not found: {CHROME_LAUNCH_HOOK}"
-    assert CHROME_TAB_HOOK.exists(), f"Hook not found: {CHROME_TAB_HOOK}"
-    assert CHROME_NAVIGATE_HOOK.exists(), f"Hook not found: {CHROME_NAVIGATE_HOOK}"
-
-
-def test_verify_chromium_available():
-    """Verify Chromium is available via CHROME_BINARY env var."""
-    chromium_binary = os.environ.get('CHROME_BINARY') or find_chromium_binary()
-
-    assert chromium_binary, "Chromium binary should be available (set by fixture or found)"
-    assert Path(chromium_binary).exists(), f"Chromium binary should exist at {chromium_binary}"
-
-    # Verify it's actually Chromium by checking version
-    result = subprocess.run(
-        [chromium_binary, '--version'],
-        capture_output=True,
-        text=True,
-        timeout=10
-    )
-    assert result.returncode == 0, f"Failed to get Chromium version: {result.stderr}"
-    assert 'Chromium' in result.stdout or 'Chrome' in result.stdout, f"Unexpected version output: {result.stdout}"
-
-
-def test_chrome_launch_and_tab_creation():
-    """Integration test: Launch Chrome at crawl level and create tab at snapshot level."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        crawl_dir = Path(tmpdir) / 'crawl'
-        crawl_dir.mkdir()
-        chrome_dir = crawl_dir / 'chrome'
-        chrome_dir.mkdir()
-
-        # Get test environment with NODE_MODULES_DIR set
-        env = get_test_env()
-        env['CHROME_HEADLESS'] = 'true'
-
-        # Launch Chrome at crawl level (background process)
-        chrome_launch_process = subprocess.Popen(
-            ['node', str(CHROME_LAUNCH_HOOK), '--crawl-id=test-crawl-123'],
-            cwd=str(chrome_dir),
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-            env=env
-        )
-
-        # Wait for Chrome to launch (check process isn't dead and files exist)
-        for i in range(15):  # Wait up to 15 seconds for Chrome to start
-            if chrome_launch_process.poll() is not None:
-                stdout, stderr = chrome_launch_process.communicate()
-                pytest.fail(f"Chrome launch process exited early:\nStdout: {stdout}\nStderr: {stderr}")
-            if (chrome_dir / 'cdp_url.txt').exists():
-                break
-            time.sleep(1)
-
-        # Verify Chrome launch outputs - if it failed, get the error from the process
-        if not (chrome_dir / 'cdp_url.txt').exists():
-            # Try to get output from the process
-            try:
-                stdout, stderr = chrome_launch_process.communicate(timeout=1)
-            except subprocess.TimeoutExpired:
-                # Process still running, try to read available output
-                stdout = stderr = "(process still running)"
-
-            # Check what files exist
-            if chrome_dir.exists():
-                files = list(chrome_dir.iterdir())
-                # Check if Chrome process is still alive
-                if (chrome_dir / 'chrome.pid').exists():
-                    chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
-                    try:
-                        os.kill(chrome_pid, 0)
-                        chrome_alive = "yes"
-                    except OSError:
-                        chrome_alive = "no"
-                    pytest.fail(f"cdp_url.txt missing after 15s. Chrome dir files: {files}. Chrome process {chrome_pid} alive: {chrome_alive}\nLaunch stdout: {stdout}\nLaunch stderr: {stderr}")
-                else:
-                    pytest.fail(f"cdp_url.txt missing. Chrome dir exists with files: {files}\nLaunch stdout: {stdout}\nLaunch stderr: {stderr}")
-            else:
-                pytest.fail(f"Chrome dir {chrome_dir} doesn't exist\nLaunch stdout: {stdout}\nLaunch stderr: {stderr}")
-
-        assert (chrome_dir / 'cdp_url.txt').exists(), "cdp_url.txt should exist"
-        assert (chrome_dir / 'chrome.pid').exists(), "chrome.pid should exist"
-        assert (chrome_dir / 'port.txt').exists(), "port.txt should exist"
-
-        cdp_url = (chrome_dir / 'cdp_url.txt').read_text().strip()
-        chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
-
-        assert cdp_url.startswith('ws://'), f"CDP URL should be WebSocket URL: {cdp_url}"
-        assert chrome_pid > 0, "Chrome PID should be valid"
-
-        # Verify Chrome process is running
-        try:
-            os.kill(chrome_pid, 0)
-        except OSError:
-            pytest.fail(f"Chrome process {chrome_pid} is not running")
-
-        # Create snapshot directory and tab
-        snapshot_dir = Path(tmpdir) / 'snapshot1'
-        snapshot_dir.mkdir()
-        snapshot_chrome_dir = snapshot_dir / 'chrome'
-        snapshot_chrome_dir.mkdir()
-
-        # Launch tab at snapshot level
-        env['CRAWL_OUTPUT_DIR'] = str(crawl_dir)
-        result = subprocess.run(
-            ['node', str(CHROME_TAB_HOOK), '--url=https://example.com', '--snapshot-id=snap-123', '--crawl-id=test-crawl-123'],
-            cwd=str(snapshot_chrome_dir),
-            capture_output=True,
-            text=True,
-            timeout=60,
-            env=env
-        )
-
-        assert result.returncode == 0, f"Tab creation failed: {result.stderr}\nStdout: {result.stdout}"
-
-        # Verify tab creation outputs
-        assert (snapshot_chrome_dir / 'cdp_url.txt').exists(), "Snapshot cdp_url.txt should exist"
-        assert (snapshot_chrome_dir / 'target_id.txt').exists(), "target_id.txt should exist"
-        assert (snapshot_chrome_dir / 'url.txt').exists(), "url.txt should exist"
-
-        target_id = (snapshot_chrome_dir / 'target_id.txt').read_text().strip()
-        assert len(target_id) > 0, "Target ID should not be empty"
-
-        # Cleanup: Kill Chrome and launch process
-        try:
-            chrome_launch_process.send_signal(signal.SIGTERM)
-            chrome_launch_process.wait(timeout=5)
-        except:
-            pass
-        try:
-            os.kill(chrome_pid, signal.SIGKILL)
-        except OSError:
-            pass
-
-
-def test_cookies_imported_on_launch():
-    """Integration test: COOKIES_TXT_FILE is imported at crawl start."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        crawl_dir = Path(tmpdir) / 'crawl'
-        crawl_dir.mkdir()
-        chrome_dir = crawl_dir / 'chrome'
-        chrome_dir.mkdir()
-
-        cookies_file = Path(tmpdir) / 'cookies.txt'
-        cookies_file.write_text(
-            '\n'.join([
-                '# Netscape HTTP Cookie File',
-                '# https://curl.se/docs/http-cookies.html',
-                '# This file was generated by a test',
-                '',
-                'example.com\tTRUE\t/\tFALSE\t2147483647\tabx_test_cookie\thello',
-                '',
-            ])
-        )
-
-        profile_dir = Path(tmpdir) / 'profile'
-        env = get_test_env()
-        env.update({
-            'CHROME_HEADLESS': 'true',
-            'CHROME_USER_DATA_DIR': str(profile_dir),
-            'COOKIES_TXT_FILE': str(cookies_file),
-        })
-
-        chrome_launch_process = subprocess.Popen(
-            ['node', str(CHROME_LAUNCH_HOOK), '--crawl-id=test-crawl-cookies'],
-            cwd=str(chrome_dir),
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-            env=env
-        )
-
-        for _ in range(15):
-            if (chrome_dir / 'port.txt').exists():
-                break
-            time.sleep(1)
-
-        assert (chrome_dir / 'port.txt').exists(), "port.txt should exist"
-        chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
-        port = int((chrome_dir / 'port.txt').read_text().strip())
-
-        cookie_found = False
-        for _ in range(15):
-            cookies = _get_cookies_via_cdp(port, env)
-            cookie_found = any(
-                c.get('name') == 'abx_test_cookie' and c.get('value') == 'hello'
-                for c in cookies
-            )
-            if cookie_found:
-                break
-            time.sleep(1)
-
-        assert cookie_found, "Imported cookie should be present in Chrome session"
-
-        # Cleanup
-        try:
-            chrome_launch_process.send_signal(signal.SIGTERM)
-            chrome_launch_process.wait(timeout=5)
-        except:
-            pass
-        try:
-            os.kill(chrome_pid, signal.SIGKILL)
-        except OSError:
-            pass
-
-
-def test_chrome_navigation():
-    """Integration test: Navigate to a URL."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        crawl_dir = Path(tmpdir) / 'crawl'
-        crawl_dir.mkdir()
-        chrome_dir = crawl_dir / 'chrome'
-        chrome_dir.mkdir()
-
-        # Launch Chrome (background process)
-        chrome_launch_process = subprocess.Popen(
-            ['node', str(CHROME_LAUNCH_HOOK), '--crawl-id=test-crawl-nav'],
-            cwd=str(chrome_dir),
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-            env=get_test_env() | {'CHROME_HEADLESS': 'true'}
-        )
-
-        # Wait for Chrome to launch
-        time.sleep(3)
-
-        chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
-
-        # Create snapshot and tab
-        snapshot_dir = Path(tmpdir) / 'snapshot1'
-        snapshot_dir.mkdir()
-        snapshot_chrome_dir = snapshot_dir / 'chrome'
-        snapshot_chrome_dir.mkdir()
-
-        result = subprocess.run(
-            ['node', str(CHROME_TAB_HOOK), '--url=https://example.com', '--snapshot-id=snap-nav-123', '--crawl-id=test-crawl-nav'],
-            cwd=str(snapshot_chrome_dir),
-            capture_output=True,
-            text=True,
-            timeout=60,
-            env=get_test_env() | {'CRAWL_OUTPUT_DIR': str(crawl_dir), 'CHROME_HEADLESS': 'true'}
-        )
-        assert result.returncode == 0, f"Tab creation failed: {result.stderr}"
-
-        # Navigate to URL
-        result = subprocess.run(
-            ['node', str(CHROME_NAVIGATE_HOOK), '--url=https://example.com', '--snapshot-id=snap-nav-123'],
-            cwd=str(snapshot_chrome_dir),
-            capture_output=True,
-            text=True,
-            timeout=120,
-            env=get_test_env() | {'CHROME_PAGELOAD_TIMEOUT': '30', 'CHROME_WAIT_FOR': 'load'}
-        )
-
-        assert result.returncode == 0, f"Navigation failed: {result.stderr}\nStdout: {result.stdout}"
-
-        # Verify navigation outputs
-        assert (snapshot_chrome_dir / 'navigation.json').exists(), "navigation.json should exist"
-        assert (snapshot_chrome_dir / 'page_loaded.txt').exists(), "page_loaded.txt should exist"
-
-        nav_data = json.loads((snapshot_chrome_dir / 'navigation.json').read_text())
-        assert nav_data.get('status') in [200, 301, 302], f"Should get valid HTTP status: {nav_data}"
-        assert nav_data.get('finalUrl'), "Should have final URL"
-
-        # Cleanup
-        try:
-            chrome_launch_process.send_signal(signal.SIGTERM)
-            chrome_launch_process.wait(timeout=5)
-        except:
-            pass
-        try:
-            os.kill(chrome_pid, signal.SIGKILL)
-        except OSError:
-            pass
-
-
-def test_tab_cleanup_on_sigterm():
-    """Integration test: Tab cleanup when receiving SIGTERM."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        crawl_dir = Path(tmpdir) / 'crawl'
-        crawl_dir.mkdir()
-        chrome_dir = crawl_dir / 'chrome'
-        chrome_dir.mkdir()
-
-        # Launch Chrome (background process)
-        chrome_launch_process = subprocess.Popen(
-            ['node', str(CHROME_LAUNCH_HOOK), '--crawl-id=test-cleanup'],
-            cwd=str(chrome_dir),
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-            env=get_test_env() | {'CHROME_HEADLESS': 'true'}
-        )
-
-        # Wait for Chrome to launch
-        time.sleep(3)
-
-        chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
-
-        # Create snapshot and tab - run in background
-        snapshot_dir = Path(tmpdir) / 'snapshot1'
-        snapshot_dir.mkdir()
-        snapshot_chrome_dir = snapshot_dir / 'chrome'
-        snapshot_chrome_dir.mkdir()
-
-        tab_process = subprocess.Popen(
-            ['node', str(CHROME_TAB_HOOK), '--url=https://example.com', '--snapshot-id=snap-cleanup', '--crawl-id=test-cleanup'],
-            cwd=str(snapshot_chrome_dir),
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-            env=get_test_env() | {'CRAWL_OUTPUT_DIR': str(crawl_dir), 'CHROME_HEADLESS': 'true'}
-        )
-
-        # Wait for tab to be created
-        time.sleep(3)
-
-        # Send SIGTERM to tab process
-        tab_process.send_signal(signal.SIGTERM)
-        stdout, stderr = tab_process.communicate(timeout=10)
-
-        assert tab_process.returncode == 0, f"Tab process should exit cleanly: {stderr}"
-
-        # Chrome should still be running
-        try:
-            os.kill(chrome_pid, 0)
-        except OSError:
-            pytest.fail("Chrome should still be running after tab cleanup")
-
-        # Cleanup
-        try:
-            chrome_launch_process.send_signal(signal.SIGTERM)
-            chrome_launch_process.wait(timeout=5)
-        except:
-            pass
-        try:
-            os.kill(chrome_pid, signal.SIGKILL)
-        except OSError:
-            pass
-
-
-def test_multiple_snapshots_share_chrome():
-    """Integration test: Multiple snapshots share one Chrome instance."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        crawl_dir = Path(tmpdir) / 'crawl'
-        crawl_dir.mkdir()
-        chrome_dir = crawl_dir / 'chrome'
-        chrome_dir.mkdir()
-
-        # Launch Chrome at crawl level
-        chrome_launch_process = subprocess.Popen(
-            ['node', str(CHROME_LAUNCH_HOOK), '--crawl-id=test-multi-crawl'],
-            cwd=str(chrome_dir),
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-            env=get_test_env() | {'CHROME_HEADLESS': 'true'}
-        )
-
-        # Wait for Chrome to launch
-        for i in range(15):
-            if (chrome_dir / 'cdp_url.txt').exists():
-                break
-            time.sleep(1)
-
-        chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
-        crawl_cdp_url = (chrome_dir / 'cdp_url.txt').read_text().strip()
-
-        # Create multiple snapshots that share this Chrome
-        snapshot_dirs = []
-        target_ids = []
-
-        for snap_num in range(3):
-            snapshot_dir = Path(tmpdir) / f'snapshot{snap_num}'
-            snapshot_dir.mkdir()
-            snapshot_chrome_dir = snapshot_dir / 'chrome'
-            snapshot_chrome_dir.mkdir()
-            snapshot_dirs.append(snapshot_chrome_dir)
-
-            # Create tab for this snapshot
-            result = subprocess.run(
-                ['node', str(CHROME_TAB_HOOK), f'--url=https://example.com/{snap_num}', f'--snapshot-id=snap-{snap_num}', '--crawl-id=test-multi-crawl'],
-                cwd=str(snapshot_chrome_dir),
-                capture_output=True,
-                text=True,
-                timeout=60,
-                env=get_test_env() | {'CRAWL_OUTPUT_DIR': str(crawl_dir), 'CHROME_HEADLESS': 'true'}
-            )
-
-            assert result.returncode == 0, f"Tab {snap_num} creation failed: {result.stderr}"
-
-            # Verify each snapshot has its own target_id but same Chrome PID
-            assert (snapshot_chrome_dir / 'target_id.txt').exists()
-            assert (snapshot_chrome_dir / 'cdp_url.txt').exists()
-            assert (snapshot_chrome_dir / 'chrome.pid').exists()
-
-            target_id = (snapshot_chrome_dir / 'target_id.txt').read_text().strip()
-            snapshot_cdp_url = (snapshot_chrome_dir / 'cdp_url.txt').read_text().strip()
-            snapshot_pid = int((snapshot_chrome_dir / 'chrome.pid').read_text().strip())
-
-            target_ids.append(target_id)
-
-            # All snapshots should share same Chrome
-            assert snapshot_pid == chrome_pid, f"Snapshot {snap_num} should use crawl Chrome PID"
-            assert snapshot_cdp_url == crawl_cdp_url, f"Snapshot {snap_num} should use crawl CDP URL"
-
-        # All target IDs should be unique (different tabs)
-        assert len(set(target_ids)) == 3, f"All snapshots should have unique tabs: {target_ids}"
-
-        # Chrome should still be running with all 3 tabs
-        try:
-            os.kill(chrome_pid, 0)
-        except OSError:
-            pytest.fail("Chrome should still be running after creating 3 tabs")
-
-        # Cleanup
-        try:
-            chrome_launch_process.send_signal(signal.SIGTERM)
-            chrome_launch_process.wait(timeout=5)
-        except:
-            pass
-        try:
-            os.kill(chrome_pid, signal.SIGKILL)
-        except OSError:
-            pass
-
-
-def test_chrome_cleanup_on_crawl_end():
-    """Integration test: Chrome cleanup at end of crawl."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        crawl_dir = Path(tmpdir) / 'crawl'
-        crawl_dir.mkdir()
-        chrome_dir = crawl_dir / 'chrome'
-        chrome_dir.mkdir()
-
-        # Launch Chrome in background
-        chrome_launch_process = subprocess.Popen(
-            ['node', str(CHROME_LAUNCH_HOOK), '--crawl-id=test-crawl-end'],
-            cwd=str(chrome_dir),
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-            env=get_test_env() | {'CHROME_HEADLESS': 'true'}
-        )
-
-        # Wait for Chrome to launch
-        time.sleep(3)
-
-        # Verify Chrome is running
-        assert (chrome_dir / 'chrome.pid').exists(), "Chrome PID file should exist"
-        chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
-
-        try:
-            os.kill(chrome_pid, 0)
-        except OSError:
-            pytest.fail("Chrome should be running")
-
-        # Send SIGTERM to chrome launch process
-        chrome_launch_process.send_signal(signal.SIGTERM)
-        stdout, stderr = chrome_launch_process.communicate(timeout=10)
-
-        # Wait for cleanup
-        time.sleep(3)
-
-        # Verify Chrome process is killed
-        try:
-            os.kill(chrome_pid, 0)
-            pytest.fail("Chrome should be killed after SIGTERM")
-        except OSError:
-            # Expected - Chrome should be dead
-            pass
-
-
-def test_zombie_prevention_hook_killed():
-    """Integration test: Chrome is killed even if hook process is SIGKILL'd."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        crawl_dir = Path(tmpdir) / 'crawl'
-        crawl_dir.mkdir()
-        chrome_dir = crawl_dir / 'chrome'
-        chrome_dir.mkdir()
-
-        # Launch Chrome
-        chrome_launch_process = subprocess.Popen(
-            ['node', str(CHROME_LAUNCH_HOOK), '--crawl-id=test-zombie'],
-            cwd=str(chrome_dir),
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-            env=get_test_env() | {'CHROME_HEADLESS': 'true'}
-        )
-
-        # Wait for Chrome to launch
-        for i in range(15):
-            if (chrome_dir / 'chrome.pid').exists():
-                break
-            time.sleep(1)
-
-        assert (chrome_dir / 'chrome.pid').exists(), "Chrome PID file should exist"
-
-        chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
-        hook_pid = chrome_launch_process.pid  # Use the Popen process PID instead of hook.pid file
-
-        # Verify both Chrome and hook are running
-        try:
-            os.kill(chrome_pid, 0)
-            os.kill(hook_pid, 0)
-        except OSError:
-            pytest.fail("Both Chrome and hook should be running")
-
-        # Simulate hook getting SIGKILL'd (can't cleanup)
-        os.kill(hook_pid, signal.SIGKILL)
-        time.sleep(1)
-
-        # Chrome should still be running (orphaned)
-        try:
-            os.kill(chrome_pid, 0)
-        except OSError:
-            pytest.fail("Chrome should still be running after hook SIGKILL")
-
-        # Simulate Crawl.cleanup() using the actual cleanup logic
-        def is_process_alive(pid):
-            """Check if a process exists."""
-            try:
-                os.kill(pid, 0)
-                return True
-            except (OSError, ProcessLookupError):
-                return False
-
-        for pid_file in chrome_dir.glob('**/*.pid'):
-            try:
-                pid = int(pid_file.read_text().strip())
-
-                # Step 1: SIGTERM for graceful shutdown
-                try:
-                    try:
-                        os.killpg(pid, signal.SIGTERM)
-                    except (OSError, ProcessLookupError):
-                        os.kill(pid, signal.SIGTERM)
-                except ProcessLookupError:
-                    pid_file.unlink(missing_ok=True)
-                    continue
-
-                # Step 2: Wait for graceful shutdown
-                time.sleep(2)
-
-                # Step 3: Check if still alive
-                if not is_process_alive(pid):
-                    pid_file.unlink(missing_ok=True)
-                    continue
-
-                # Step 4: Force kill ENTIRE process group with SIGKILL
-                try:
-                    try:
-                        # Always kill entire process group with SIGKILL
-                        os.killpg(pid, signal.SIGKILL)
-                    except (OSError, ProcessLookupError):
-                        os.kill(pid, signal.SIGKILL)
-                except ProcessLookupError:
-                    pid_file.unlink(missing_ok=True)
-                    continue
-
-                # Step 5: Wait and verify death
-                time.sleep(1)
-
-                if not is_process_alive(pid):
-                    pid_file.unlink(missing_ok=True)
-
-            except (ValueError, OSError):
-                pass
-
-        # Chrome should now be dead
-        try:
-            os.kill(chrome_pid, 0)
-            pytest.fail("Chrome should be killed after cleanup")
-        except OSError:
-            # Expected - Chrome is dead
-            pass
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
--- a/archivebox/plugins/chrome/tests/test_chrome_test_helpers.py
+++ b/archivebox/plugins/chrome/tests/test_chrome_test_helpers.py
@@ -1,260 +0,0 @@
-"""
-Tests for chrome_test_helpers.py functions.
-
-These tests verify the Python helper functions used across Chrome plugin tests.
-"""
-
-import os
-import pytest
-import tempfile
-from pathlib import Path
-
-from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-    get_test_env,
-    get_machine_type,
-    get_lib_dir,
-    get_node_modules_dir,
-    get_extensions_dir,
-    find_chromium_binary,
-    get_plugin_dir,
-    get_hook_script,
-    parse_jsonl_output,
-)
-
-
-def test_get_machine_type():
-    """Test get_machine_type() returns valid format."""
-    machine_type = get_machine_type()
-    assert isinstance(machine_type, str)
-    assert '-' in machine_type, "Machine type should be in format: arch-os"
-    # Should be one of the expected formats
-    assert any(x in machine_type for x in ['arm64', 'x86_64']), "Should contain valid architecture"
-    assert any(x in machine_type for x in ['darwin', 'linux', 'win32']), "Should contain valid OS"
-
-
-def test_get_lib_dir_with_env_var():
-    """Test get_lib_dir() respects LIB_DIR env var."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        custom_lib = Path(tmpdir) / 'custom_lib'
-        custom_lib.mkdir()
-
-        old_lib_dir = os.environ.get('LIB_DIR')
-        try:
-            os.environ['LIB_DIR'] = str(custom_lib)
-            lib_dir = get_lib_dir()
-            assert lib_dir == custom_lib
-        finally:
-            if old_lib_dir:
-                os.environ['LIB_DIR'] = old_lib_dir
-            else:
-                os.environ.pop('LIB_DIR', None)
-
-
-def test_get_node_modules_dir_with_env_var():
-    """Test get_node_modules_dir() respects NODE_MODULES_DIR env var."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        custom_nm = Path(tmpdir) / 'node_modules'
-        custom_nm.mkdir()
-
-        old_nm_dir = os.environ.get('NODE_MODULES_DIR')
-        try:
-            os.environ['NODE_MODULES_DIR'] = str(custom_nm)
-            nm_dir = get_node_modules_dir()
-            assert nm_dir == custom_nm
-        finally:
-            if old_nm_dir:
-                os.environ['NODE_MODULES_DIR'] = old_nm_dir
-            else:
-                os.environ.pop('NODE_MODULES_DIR', None)
-
-
-def test_get_extensions_dir_default():
-    """Test get_extensions_dir() returns expected path format."""
-    ext_dir = get_extensions_dir()
-    assert isinstance(ext_dir, str)
-    assert 'personas' in ext_dir
-    assert 'chrome_extensions' in ext_dir
-
-
-def test_get_extensions_dir_with_custom_persona():
-    """Test get_extensions_dir() respects ACTIVE_PERSONA env var."""
-    old_persona = os.environ.get('ACTIVE_PERSONA')
-    old_data_dir = os.environ.get('DATA_DIR')
-    try:
-        os.environ['ACTIVE_PERSONA'] = 'TestPersona'
-        os.environ['DATA_DIR'] = '/tmp/test'
-        ext_dir = get_extensions_dir()
-        assert 'TestPersona' in ext_dir
-        assert '/tmp/test' in ext_dir
-    finally:
-        if old_persona:
-            os.environ['ACTIVE_PERSONA'] = old_persona
-        else:
-            os.environ.pop('ACTIVE_PERSONA', None)
-        if old_data_dir:
-            os.environ['DATA_DIR'] = old_data_dir
-        else:
-            os.environ.pop('DATA_DIR', None)
-
-
-def test_get_test_env_returns_dict():
-    """Test get_test_env() returns properly formatted environment dict."""
-    env = get_test_env()
-    assert isinstance(env, dict)
-
-    # Should include key paths
-    assert 'MACHINE_TYPE' in env
-    assert 'LIB_DIR' in env
-    assert 'NODE_MODULES_DIR' in env
-    assert 'NODE_PATH' in env  # Critical for module resolution
-    assert 'NPM_BIN_DIR' in env
-    assert 'CHROME_EXTENSIONS_DIR' in env
-
-    # Verify NODE_PATH equals NODE_MODULES_DIR (for Node.js module resolution)
-    assert env['NODE_PATH'] == env['NODE_MODULES_DIR']
-
-
-def test_get_test_env_paths_are_absolute():
-    """Test that get_test_env() returns absolute paths."""
-    env = get_test_env()
-
-    # All path-like values should be absolute
-    assert Path(env['LIB_DIR']).is_absolute()
-    assert Path(env['NODE_MODULES_DIR']).is_absolute()
-    assert Path(env['NODE_PATH']).is_absolute()
-
-
-def test_find_chromium_binary():
-    """Test find_chromium_binary() returns a path or None."""
-    binary = find_chromium_binary()
-    if binary:
-        assert isinstance(binary, str)
-        # Should be an absolute path if found
-        assert os.path.isabs(binary)
-
-
-def test_get_plugin_dir():
-    """Test get_plugin_dir() finds correct plugin directory."""
-    # Use this test file's path
-    test_file = __file__
-    plugin_dir = get_plugin_dir(test_file)
-
-    assert plugin_dir.exists()
-    assert plugin_dir.is_dir()
-    # Should be the chrome plugin directory
-    assert plugin_dir.name == 'chrome'
-    assert (plugin_dir.parent.name == 'plugins')
-
-
-def test_get_hook_script_finds_existing_hook():
-    """Test get_hook_script() can find an existing hook."""
-    from archivebox.plugins.chrome.tests.chrome_test_helpers import CHROME_PLUGIN_DIR
-
-    # Try to find the chrome launch hook
-    hook = get_hook_script(CHROME_PLUGIN_DIR, 'on_Crawl__*_chrome_launch.*')
-
-    if hook:  # May not exist in all test environments
-        assert hook.exists()
-        assert hook.is_file()
-        assert 'chrome_launch' in hook.name
-
-
-def test_get_hook_script_returns_none_for_missing():
-    """Test get_hook_script() returns None for non-existent hooks."""
-    from archivebox.plugins.chrome.tests.chrome_test_helpers import CHROME_PLUGIN_DIR
-
-    hook = get_hook_script(CHROME_PLUGIN_DIR, 'nonexistent_hook_*_pattern.*')
-    assert hook is None
-
-
-def test_parse_jsonl_output_valid():
-    """Test parse_jsonl_output() parses valid JSONL."""
-    jsonl_output = '''{"type": "ArchiveResult", "status": "succeeded", "output": "test1"}
-{"type": "ArchiveResult", "status": "failed", "error": "test2"}
-'''
-
-    # Returns first match only
-    result = parse_jsonl_output(jsonl_output)
-    assert result is not None
-    assert result['type'] == 'ArchiveResult'
-    assert result['status'] == 'succeeded'
-    assert result['output'] == 'test1'
-
-
-def test_parse_jsonl_output_with_non_json_lines():
-    """Test parse_jsonl_output() skips non-JSON lines."""
-    mixed_output = '''Some non-JSON output
-{"type": "ArchiveResult", "status": "succeeded"}
-More non-JSON
-{"type": "ArchiveResult", "status": "failed"}
-'''
-
-    result = parse_jsonl_output(mixed_output)
-    assert result is not None
-    assert result['type'] == 'ArchiveResult'
-    assert result['status'] == 'succeeded'
-
-
-def test_parse_jsonl_output_empty():
-    """Test parse_jsonl_output() handles empty input."""
-    result = parse_jsonl_output('')
-    assert result is None
-
-
-def test_parse_jsonl_output_filters_by_type():
-    """Test parse_jsonl_output() can filter by record type."""
-    jsonl_output = '''{"type": "LogEntry", "data": "log1"}
-{"type": "ArchiveResult", "data": "result1"}
-{"type": "ArchiveResult", "data": "result2"}
-'''
-
-    # Should return first ArchiveResult, not LogEntry
-    result = parse_jsonl_output(jsonl_output, record_type='ArchiveResult')
-    assert result is not None
-    assert result['type'] == 'ArchiveResult'
-    assert result['data'] == 'result1'  # First ArchiveResult
-
-
-def test_parse_jsonl_output_filters_custom_type():
-    """Test parse_jsonl_output() can filter by custom record type."""
-    jsonl_output = '''{"type": "ArchiveResult", "data": "result1"}
-{"type": "LogEntry", "data": "log1"}
-{"type": "ArchiveResult", "data": "result2"}
-'''
-
-    result = parse_jsonl_output(jsonl_output, record_type='LogEntry')
-    assert result is not None
-    assert result['type'] == 'LogEntry'
-    assert result['data'] == 'log1'
-
-
-def test_machine_type_consistency():
-    """Test that machine type is consistent across calls."""
-    mt1 = get_machine_type()
-    mt2 = get_machine_type()
-    assert mt1 == mt2, "Machine type should be stable across calls"
-
-
-def test_lib_dir_is_directory():
-    """Test that lib_dir points to an actual directory when DATA_DIR is set."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        old_data_dir = os.environ.get('DATA_DIR')
-        try:
-            os.environ['DATA_DIR'] = tmpdir
-            # Create the expected directory structure
-            machine_type = get_machine_type()
-            lib_dir = Path(tmpdir) / 'lib' / machine_type
-            lib_dir.mkdir(parents=True, exist_ok=True)
-
-            result = get_lib_dir()
-            # Should return a Path object
-            assert isinstance(result, Path)
-        finally:
-            if old_data_dir:
-                os.environ['DATA_DIR'] = old_data_dir
-            else:
-                os.environ.pop('DATA_DIR', None)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
--- a/archivebox/plugins/consolelog/config.json
+++ b/archivebox/plugins/consolelog/config.json
@@ -1,21 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "required_plugins": ["chrome"],
-  "properties": {
-    "CONSOLELOG_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_CONSOLELOG", "USE_CONSOLELOG"],
-      "description": "Enable console log capture"
-    },
-    "CONSOLELOG_TIMEOUT": {
-      "type": "integer",
-      "default": 30,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for console log capture in seconds"
-    }
-  }
-}
--- a/archivebox/plugins/consolelog/on_Snapshot__21_consolelog.bg.js
+++ b/archivebox/plugins/consolelog/on_Snapshot__21_consolelog.bg.js
@@ -1,201 +0,0 @@
-#!/usr/bin/env node
-/**
- * Capture console output from a page.
- *
- * This hook sets up CDP listeners BEFORE chrome_navigate loads the page,
- * then waits for navigation to complete. The listeners stay active through
- * navigation and capture all console output.
- *
- * Usage: on_Snapshot__21_consolelog.js --url=<url> --snapshot-id=<uuid>
- * Output: Writes console.jsonl
- */
-
-const fs = require('fs');
-const path = require('path');
-
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-
-const puppeteer = require('puppeteer-core');
-
-// Import shared utilities from chrome_utils.js
-const {
-    getEnvBool,
-    getEnvInt,
-    parseArgs,
-    connectToPage,
-    waitForPageLoaded,
-} = require('../chrome/chrome_utils.js');
-
-const PLUGIN_NAME = 'consolelog';
-const OUTPUT_DIR = '.';
-const OUTPUT_FILE = 'console.jsonl';
-const CHROME_SESSION_DIR = '../chrome';
-
-let browser = null;
-let page = null;
-let logCount = 0;
-let errorCount = 0;
-let requestFailCount = 0;
-let shuttingDown = false;
-
-async function serializeArgs(args) {
-    const serialized = [];
-    for (const arg of args) {
-        try {
-            const json = await arg.jsonValue();
-            serialized.push(json);
-        } catch (e) {
-            try {
-                serialized.push(String(arg));
-            } catch (e2) {
-                serialized.push('[Unserializable]');
-            }
-        }
-    }
-    return serialized;
-}
-
-async function setupListeners() {
-    const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
-    const timeout = getEnvInt('CONSOLELOG_TIMEOUT', 30) * 1000;
-
-    fs.writeFileSync(outputPath, ''); // Clear existing
-
-    // Connect to Chrome page using shared utility
-    const { browser, page } = await connectToPage({
-        chromeSessionDir: CHROME_SESSION_DIR,
-        timeoutMs: timeout,
-        puppeteer,
-    });
-
-    // Set up listeners that write directly to file
-    page.on('console', async (msg) => {
-        try {
-            const logEntry = {
-                timestamp: new Date().toISOString(),
-                type: msg.type(),
-                text: msg.text(),
-                args: await serializeArgs(msg.args()),
-                location: msg.location(),
-            };
-            fs.appendFileSync(outputPath, JSON.stringify(logEntry) + '\n');
-            logCount += 1;
-        } catch (e) {
-            // Ignore errors
-        }
-    });
-
-    page.on('pageerror', (error) => {
-        try {
-            const logEntry = {
-                timestamp: new Date().toISOString(),
-                type: 'error',
-                text: error.message,
-                stack: error.stack || '',
-            };
-            fs.appendFileSync(outputPath, JSON.stringify(logEntry) + '\n');
-            errorCount += 1;
-        } catch (e) {
-            // Ignore
-        }
-    });
-
-    page.on('requestfailed', (request) => {
-        try {
-            const failure = request.failure();
-            const logEntry = {
-                timestamp: new Date().toISOString(),
-                type: 'request_failed',
-                text: `Request failed: ${request.url()}`,
-                error: failure ? failure.errorText : 'Unknown error',
-                url: request.url(),
-            };
-            fs.appendFileSync(outputPath, JSON.stringify(logEntry) + '\n');
-            requestFailCount += 1;
-        } catch (e) {
-            // Ignore
-        }
-    });
-
-    return { browser, page };
-}
-
-function emitResult(status = 'succeeded') {
-    if (shuttingDown) return;
-    shuttingDown = true;
-
-    const counts = `${logCount} console, ${errorCount} errors, ${requestFailCount} failed requests`;
-    console.log(JSON.stringify({
-        type: 'ArchiveResult',
-        status,
-        output_str: `${OUTPUT_FILE} (${counts})`,
-    }));
-}
-
-async function handleShutdown(signal) {
-    console.error(`\nReceived ${signal}, emitting final results...`);
-    emitResult('succeeded');
-    if (browser) {
-        try {
-            browser.disconnect();
-        } catch (e) {}
-    }
-    process.exit(0);
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-    const snapshotId = args.snapshot_id;
-
-    if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__21_consolelog.js --url=<url> --snapshot-id=<uuid>');
-        process.exit(1);
-    }
-
-    if (!getEnvBool('CONSOLELOG_ENABLED', true)) {
-        console.error('Skipping (CONSOLELOG_ENABLED=False)');
-        console.log(JSON.stringify({type: 'ArchiveResult', status: 'skipped', output_str: 'CONSOLELOG_ENABLED=False'}));
-        process.exit(0);
-    }
-
-    try {
-        // Set up listeners BEFORE navigation
-        const connection = await setupListeners();
-        browser = connection.browser;
-        page = connection.page;
-
-        // Register signal handlers for graceful shutdown
-        process.on('SIGTERM', () => handleShutdown('SIGTERM'));
-        process.on('SIGINT', () => handleShutdown('SIGINT'));
-
-        // Wait for chrome_navigate to complete (non-fatal)
-        try {
-            const timeout = getEnvInt('CONSOLELOG_TIMEOUT', 30) * 1000;
-            await waitForPageLoaded(CHROME_SESSION_DIR, timeout * 4, 500);
-        } catch (e) {
-            console.error(`WARN: ${e.message}`);
-        }
-
-        // console.error('Consolelog active, waiting for cleanup signal...');
-        await new Promise(() => {}); // Keep alive until SIGTERM
-        return;
-
-    } catch (e) {
-        const error = `${e.name}: ${e.message}`;
-        console.error(`ERROR: ${error}`);
-
-        console.log(JSON.stringify({
-            type: 'ArchiveResult',
-            status: 'failed',
-            output_str: error,
-        }));
-        process.exit(1);
-    }
-}
-
-main().catch(e => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
--- a/archivebox/plugins/consolelog/templates/icon.html
+++ b/archivebox/plugins/consolelog/templates/icon.html
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--consolelog" title="Console Log"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><rect x="3" y="4.5" width="18" height="15" rx="2"/><path d="M7 12l2 2-2 2"/><path d="M11 16h6"/></svg></span>
--- a/archivebox/plugins/consolelog/tests/test_consolelog.py
+++ b/archivebox/plugins/consolelog/tests/test_consolelog.py
@@ -1,127 +0,0 @@
-"""
-Tests for the consolelog plugin.
-
-Tests the real consolelog hook with an actual URL to verify
-console output capture.
-"""
-
-import json
-import shutil
-import subprocess
-import sys
-import tempfile
-import time
-from pathlib import Path
-
-from django.test import TestCase
-
-# Import chrome test helpers
-sys.path.insert(0, str(Path(__file__).parent.parent.parent / 'chrome' / 'tests'))
-from chrome_test_helpers import (
-    chrome_session,
-    CHROME_NAVIGATE_HOOK,
-    get_plugin_dir,
-    get_hook_script,
-)
-
-
-# Get the path to the consolelog hook
-PLUGIN_DIR = get_plugin_dir(__file__)
-CONSOLELOG_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_consolelog.*')
-
-
-class TestConsolelogPlugin(TestCase):
-    """Test the consolelog plugin."""
-
-    def test_consolelog_hook_exists(self):
-        """Consolelog hook script should exist."""
-        self.assertIsNotNone(CONSOLELOG_HOOK, "Consolelog hook not found in plugin directory")
-        self.assertTrue(CONSOLELOG_HOOK.exists(), f"Hook not found: {CONSOLELOG_HOOK}")
-
-
-class TestConsolelogWithChrome(TestCase):
-    """Integration tests for consolelog plugin with Chrome."""
-
-    def setUp(self):
-        """Set up test environment."""
-        self.temp_dir = Path(tempfile.mkdtemp())
-
-    def tearDown(self):
-        """Clean up."""
-        shutil.rmtree(self.temp_dir, ignore_errors=True)
-
-    def test_consolelog_captures_output(self):
-        """Consolelog hook should capture console output from page."""
-        test_url = 'data:text/html,<script>console.log("archivebox-console-test")</script>'
-        snapshot_id = 'test-consolelog-snapshot'
-
-        with chrome_session(
-            self.temp_dir,
-            crawl_id='test-consolelog-crawl',
-            snapshot_id=snapshot_id,
-            test_url=test_url,
-            navigate=False,
-            timeout=30,
-        ) as (chrome_process, chrome_pid, snapshot_chrome_dir, env):
-            console_dir = snapshot_chrome_dir.parent / 'consolelog'
-            console_dir.mkdir(exist_ok=True)
-
-            # Run consolelog hook with the active Chrome session (background hook)
-            result = subprocess.Popen(
-                ['node', str(CONSOLELOG_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                cwd=str(console_dir),
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE,
-                text=True,
-                env=env
-            )
-
-            nav_result = subprocess.run(
-                ['node', str(CHROME_NAVIGATE_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                cwd=str(snapshot_chrome_dir),
-                capture_output=True,
-                text=True,
-                timeout=120,
-                env=env
-            )
-            self.assertEqual(nav_result.returncode, 0, f"Navigation failed: {nav_result.stderr}")
-
-            # Check for output file
-            console_output = console_dir / 'console.jsonl'
-
-            # Allow it to run briefly, then terminate (background hook)
-            for _ in range(10):
-                if console_output.exists() and console_output.stat().st_size > 0:
-                    break
-                time.sleep(1)
-            if result.poll() is None:
-                result.terminate()
-                try:
-                    stdout, stderr = result.communicate(timeout=5)
-                except subprocess.TimeoutExpired:
-                    result.kill()
-                    stdout, stderr = result.communicate()
-            else:
-                stdout, stderr = result.communicate()
-
-            # At minimum, verify no crash
-            self.assertNotIn('Traceback', stderr)
-
-            # If output file exists, verify it's valid JSONL and has output
-            if console_output.exists():
-                with open(console_output) as f:
-                    content = f.read().strip()
-                    self.assertTrue(content, "Console output should not be empty")
-                    for line in content.split('\n'):
-                        if line.strip():
-                            try:
-                                record = json.loads(line)
-                                # Verify structure
-                                self.assertIn('timestamp', record)
-                                self.assertIn('type', record)
-                            except json.JSONDecodeError:
-                                pass  # Some lines may be incomplete
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
--- a/archivebox/plugins/custom/on_Binary__14_custom_install.py
+++ b/archivebox/plugins/custom/on_Binary__14_custom_install.py
@@ -1,98 +0,0 @@
-#!/usr/bin/env python3
-"""
-Install a binary using a custom bash command.
-
-This provider runs arbitrary shell commands to install binaries
-that don't fit into standard package managers.
-
-Usage: on_Binary__install_using_custom_bash.py --binary-id=<uuid> --machine-id=<uuid> --name=<name> --custom-cmd=<cmd>
-Output: Binary JSONL record to stdout after installation
-
-Environment variables:
-    MACHINE_ID: Machine UUID (set by orchestrator)
-"""
-
-import json
-import os
-import subprocess
-import sys
-
-import rich_click as click
-from abx_pkg import Binary, EnvProvider
-
-
-@click.command()
-@click.option('--binary-id', required=True, help="Binary UUID")
-@click.option('--machine-id', required=True, help="Machine UUID")
-@click.option('--name', required=True, help="Binary name to install")
-@click.option('--binproviders', default='*', help="Allowed providers (comma-separated)")
-@click.option('--custom-cmd', required=True, help="Custom bash command to run")
-def main(binary_id: str, machine_id: str, name: str, binproviders: str, custom_cmd: str):
-    """Install binary using custom bash command."""
-
-    if binproviders != '*' and 'custom' not in binproviders.split(','):
-        click.echo(f"custom provider not allowed for {name}", err=True)
-        sys.exit(0)
-
-    if not custom_cmd:
-        click.echo("custom provider requires --custom-cmd", err=True)
-        sys.exit(1)
-
-    click.echo(f"Installing {name} via custom command: {custom_cmd}", err=True)
-
-    try:
-        result = subprocess.run(
-            custom_cmd,
-            shell=True,
-            timeout=600,  # 10 minute timeout for custom installs
-        )
-        if result.returncode != 0:
-            click.echo(f"Custom install failed (exit={result.returncode})", err=True)
-            sys.exit(1)
-    except subprocess.TimeoutExpired:
-        click.echo("Custom install timed out", err=True)
-        sys.exit(1)
-
-    # Use abx-pkg to load the binary and get its info
-    provider = EnvProvider()
-    try:
-        binary = Binary(name=name, binproviders=[provider]).load()
-    except Exception:
-        try:
-            binary = Binary(
-                name=name,
-                binproviders=[provider],
-                overrides={'env': {'version': '0.0.1'}},
-            ).load()
-        except Exception as e:
-            click.echo(f"{name} not found after custom install: {e}", err=True)
-            sys.exit(1)
-
-    if not binary.abspath:
-        click.echo(f"{name} not found after custom install", err=True)
-        sys.exit(1)
-
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    # Output Binary JSONL record to stdout
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'abspath': str(binary.abspath),
-        'version': str(binary.version) if binary.version else '',
-        'sha256': binary.sha256 or '',
-        'binprovider': 'custom',
-        'machine_id': machine_id,
-        'binary_id': binary_id,
-    }
-    print(json.dumps(record))
-
-    # Log human-readable info to stderr
-    click.echo(f"Installed {name} at {binary.abspath}", err=True)
-    click.echo(f"  version: {binary.version}", err=True)
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
--- a/archivebox/plugins/custom/templates/icon.html
+++ b/archivebox/plugins/custom/templates/icon.html
--- a/archivebox/plugins/custom/tests/test_custom_provider.py
+++ b/archivebox/plugins/custom/tests/test_custom_provider.py
@@ -1,149 +0,0 @@
-"""
-Tests for the custom binary provider plugin.
-
-Tests the custom bash binary installer with safe commands.
-"""
-
-import json
-import os
-import subprocess
-import sys
-import tempfile
-from pathlib import Path
-
-import pytest
-from django.test import TestCase
-
-
-# Get the path to the custom provider hook
-PLUGIN_DIR = Path(__file__).parent.parent
-INSTALL_HOOK = next(PLUGIN_DIR.glob('on_Binary__*_custom_install.py'), None)
-
-
-class TestCustomProviderHook(TestCase):
-    """Test the custom binary provider hook."""
-
-    def setUp(self):
-        """Set up test environment."""
-        self.temp_dir = tempfile.mkdtemp()
-
-    def tearDown(self):
-        """Clean up."""
-        import shutil
-        shutil.rmtree(self.temp_dir, ignore_errors=True)
-
-    def test_hook_script_exists(self):
-        """Hook script should exist."""
-        self.assertTrue(INSTALL_HOOK and INSTALL_HOOK.exists(), f"Hook not found: {INSTALL_HOOK}")
-
-    def test_hook_skips_when_custom_not_allowed(self):
-        """Hook should skip when custom not in allowed binproviders."""
-        env = os.environ.copy()
-        env['DATA_DIR'] = self.temp_dir
-
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=echo',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-                '--binproviders=pip,apt',  # custom not allowed
-                '--custom-cmd=echo hello',
-            ],
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=env
-        )
-
-        # Should exit cleanly (code 0) when custom not allowed
-        self.assertEqual(result.returncode, 0)
-        self.assertIn('custom provider not allowed', result.stderr)
-
-    def test_hook_runs_custom_command_and_finds_binary(self):
-        """Hook should run custom command and find the binary in PATH."""
-        env = os.environ.copy()
-        env['DATA_DIR'] = self.temp_dir
-
-        # Use a simple echo command that doesn't actually install anything
-        # Then check for 'echo' which is already in PATH
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=echo',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-                '--custom-cmd=echo "custom install simulation"',
-            ],
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=env
-        )
-
-        # Should succeed since echo is in PATH
-        self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}")
-
-        # Parse JSONL output
-        for line in result.stdout.split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'Binary' and record.get('name') == 'echo':
-                        self.assertEqual(record['binprovider'], 'custom')
-                        self.assertTrue(record['abspath'])
-                        return
-                except json.JSONDecodeError:
-                    continue
-
-        self.fail("No Binary JSONL record found in output")
-
-    def test_hook_fails_for_missing_binary_after_command(self):
-        """Hook should fail if binary not found after running custom command."""
-        env = os.environ.copy()
-        env['DATA_DIR'] = self.temp_dir
-
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=nonexistent_binary_xyz123',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-                '--custom-cmd=echo "failed install"',  # Doesn't actually install
-            ],
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=env
-        )
-
-        # Should fail since binary not found after command
-        self.assertEqual(result.returncode, 1)
-        self.assertIn('not found', result.stderr.lower())
-
-    def test_hook_fails_for_failing_command(self):
-        """Hook should fail if custom command returns non-zero exit code."""
-        env = os.environ.copy()
-        env['DATA_DIR'] = self.temp_dir
-
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=echo',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-                '--custom-cmd=exit 1',  # Command that fails
-            ],
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=env
-        )
-
-        # Should fail with exit code 1
-        self.assertEqual(result.returncode, 1)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
--- a/archivebox/plugins/dns/config.json
+++ b/archivebox/plugins/dns/config.json
@@ -1,21 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "required_plugins": ["chrome"],
-  "properties": {
-    "DNS_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_DNS", "USE_DNS"],
-      "description": "Enable DNS traffic recording during page load"
-    },
-    "DNS_TIMEOUT": {
-      "type": "integer",
-      "default": 30,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for DNS recording in seconds"
-    }
-  }
-}
--- a/archivebox/plugins/dns/on_Snapshot__22_dns.bg.js
+++ b/archivebox/plugins/dns/on_Snapshot__22_dns.bg.js
@@ -1,265 +0,0 @@
-#!/usr/bin/env node
-/**
- * Record all DNS traffic (hostname -> IP resolutions) during page load.
- *
- * This hook sets up CDP listeners BEFORE chrome_navigate loads the page,
- * then waits for navigation to complete. The listeners capture all DNS
- * resolutions by extracting hostname/IP pairs from network responses.
- *
- * Usage: on_Snapshot__22_dns.js --url=<url> --snapshot-id=<uuid>
- * Output: Writes dns.jsonl with one line per DNS resolution record
- */
-
-const fs = require('fs');
-const path = require('path');
-
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-
-const puppeteer = require('puppeteer-core');
-
-// Import shared utilities from chrome_utils.js
-const {
-    getEnvBool,
-    getEnvInt,
-    parseArgs,
-    connectToPage,
-    waitForPageLoaded,
-} = require('../chrome/chrome_utils.js');
-
-const PLUGIN_NAME = 'dns';
-const OUTPUT_DIR = '.';
-const OUTPUT_FILE = 'dns.jsonl';
-const CHROME_SESSION_DIR = '../chrome';
-
-let browser = null;
-let page = null;
-let recordCount = 0;
-let shuttingDown = false;
-
-function extractHostname(url) {
-    try {
-        const urlObj = new URL(url);
-        return urlObj.hostname;
-    } catch (e) {
-        return null;
-    }
-}
-
-async function setupListener(targetUrl) {
-    const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
-    const timeout = getEnvInt('DNS_TIMEOUT', 30) * 1000;
-
-    // Initialize output file
-    fs.writeFileSync(outputPath, '');
-
-    // Track seen hostname -> IP mappings to avoid duplicates per request
-    const seenResolutions = new Map();
-    // Track request IDs to their URLs for correlation
-    const requestUrls = new Map();
-
-    // Connect to Chrome page using shared utility
-    const { browser, page } = await connectToPage({
-        chromeSessionDir: CHROME_SESSION_DIR,
-        timeoutMs: timeout,
-        puppeteer,
-    });
-
-    // Get CDP session for low-level network events
-    const client = await page.target().createCDPSession();
-
-    // Enable network domain to receive events
-    await client.send('Network.enable');
-
-    // Listen for request events to track URLs
-    client.on('Network.requestWillBeSent', (params) => {
-        requestUrls.set(params.requestId, params.request.url);
-    });
-
-    // Listen for response events which contain remoteIPAddress (the resolved IP)
-    client.on('Network.responseReceived', (params) => {
-        try {
-            const response = params.response;
-            const url = response.url;
-            const remoteIPAddress = response.remoteIPAddress;
-            const remotePort = response.remotePort;
-
-            if (!url || !remoteIPAddress) {
-                return;
-            }
-
-            const hostname = extractHostname(url);
-            if (!hostname) {
-                return;
-            }
-
-            // Skip if IP address is same as hostname (already an IP)
-            if (hostname === remoteIPAddress) {
-                return;
-            }
-
-            // Create a unique key for this resolution
-            const resolutionKey = `${hostname}:${remoteIPAddress}`;
-
-            // Skip if we've already recorded this resolution
-            if (seenResolutions.has(resolutionKey)) {
-                return;
-            }
-            seenResolutions.set(resolutionKey, true);
-
-            // Determine record type (A for IPv4, AAAA for IPv6)
-            const isIPv6 = remoteIPAddress.includes(':');
-            const recordType = isIPv6 ? 'AAAA' : 'A';
-
-            // Create DNS record
-            const timestamp = new Date().toISOString();
-            const dnsRecord = {
-                ts: timestamp,
-                hostname: hostname,
-                ip: remoteIPAddress,
-                port: remotePort || null,
-                type: recordType,
-                protocol: url.startsWith('https://') ? 'https' : 'http',
-                url: url,
-                requestId: params.requestId,
-            };
-
-            // Append to output file
-            fs.appendFileSync(outputPath, JSON.stringify(dnsRecord) + '\n');
-            recordCount += 1;
-
-        } catch (e) {
-            // Ignore errors
-        }
-    });
-
-    // Listen for failed requests too - they still involve DNS
-    client.on('Network.loadingFailed', (params) => {
-        try {
-            const requestId = params.requestId;
-            const url = requestUrls.get(requestId);
-
-            if (!url) {
-                return;
-            }
-
-            const hostname = extractHostname(url);
-            if (!hostname) {
-                return;
-            }
-
-            // Check if this is a DNS-related failure
-            const errorText = params.errorText || '';
-            if (errorText.includes('net::ERR_NAME_NOT_RESOLVED') ||
-                errorText.includes('net::ERR_NAME_RESOLUTION_FAILED')) {
-
-                // Create a unique key for this failed resolution
-                const resolutionKey = `${hostname}:NXDOMAIN`;
-
-                // Skip if we've already recorded this NXDOMAIN
-                if (seenResolutions.has(resolutionKey)) {
-                    return;
-                }
-                seenResolutions.set(resolutionKey, true);
-
-                const timestamp = new Date().toISOString();
-                const dnsRecord = {
-                    ts: timestamp,
-                    hostname: hostname,
-                    ip: null,
-                    port: null,
-                    type: 'NXDOMAIN',
-                    protocol: url.startsWith('https://') ? 'https' : 'http',
-                    url: url,
-                    requestId: requestId,
-                    error: errorText,
-                };
-
-                fs.appendFileSync(outputPath, JSON.stringify(dnsRecord) + '\n');
-                recordCount += 1;
-            }
-        } catch (e) {
-            // Ignore errors
-        }
-    });
-
-    return { browser, page, client };
-}
-
-function emitResult(status = 'succeeded') {
-    if (shuttingDown) return;
-    shuttingDown = true;
-
-    console.log(JSON.stringify({
-        type: 'ArchiveResult',
-        status,
-        output_str: `${OUTPUT_FILE} (${recordCount} DNS records)`,
-    }));
-}
-
-async function handleShutdown(signal) {
-    console.error(`\nReceived ${signal}, emitting final results...`);
-    emitResult('succeeded');
-    if (browser) {
-        try {
-            browser.disconnect();
-        } catch (e) {}
-    }
-    process.exit(0);
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-    const snapshotId = args.snapshot_id;
-
-    if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__22_dns.js --url=<url> --snapshot-id=<uuid>');
-        process.exit(1);
-    }
-
-    if (!getEnvBool('DNS_ENABLED', true)) {
-        console.error('Skipping (DNS_ENABLED=False)');
-        console.log(JSON.stringify({type: 'ArchiveResult', status: 'skipped', output_str: 'DNS_ENABLED=False'}));
-        process.exit(0);
-    }
-
-    try {
-        // Set up listener BEFORE navigation
-        const connection = await setupListener(url);
-        browser = connection.browser;
-        page = connection.page;
-
-        // Register signal handlers for graceful shutdown
-        process.on('SIGTERM', () => handleShutdown('SIGTERM'));
-        process.on('SIGINT', () => handleShutdown('SIGINT'));
-
-        // Wait for chrome_navigate to complete (non-fatal)
-        try {
-            const timeout = getEnvInt('DNS_TIMEOUT', 30) * 1000;
-            await waitForPageLoaded(CHROME_SESSION_DIR, timeout * 4, 500);
-        } catch (e) {
-            console.error(`WARN: ${e.message}`);
-        }
-
-        // console.error('DNS listener active, waiting for cleanup signal...');
-        await new Promise(() => {}); // Keep alive until SIGTERM
-        return;
-
-    } catch (e) {
-        const error = `${e.name}: ${e.message}`;
-        console.error(`ERROR: ${error}`);
-
-        console.log(JSON.stringify({
-            type: 'ArchiveResult',
-            status: 'failed',
-            output_str: error,
-        }));
-        process.exit(1);
-    }
-}
-
-main().catch(e => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
--- a/archivebox/plugins/dns/templates/icon.html
+++ b/archivebox/plugins/dns/templates/icon.html
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--dns" title="DNS"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><circle cx="6" cy="12" r="2"/><circle cx="18" cy="6" r="2"/><circle cx="18" cy="18" r="2"/><path d="M8 12h6"/><path d="M16 8l-2 2"/><path d="M16 16l-2-2"/></svg></span>
--- a/archivebox/plugins/dns/tests/test_dns.py
+++ b/archivebox/plugins/dns/tests/test_dns.py
@@ -1,126 +0,0 @@
-"""
-Tests for the DNS plugin.
-
-Tests the real DNS hook with an actual URL to verify
-DNS resolution capture.
-"""
-
-import json
-import shutil
-import subprocess
-import sys
-import tempfile
-import time
-from pathlib import Path
-
-from django.test import TestCase
-
-# Import chrome test helpers
-sys.path.insert(0, str(Path(__file__).parent.parent.parent / 'chrome' / 'tests'))
-from chrome_test_helpers import (
-    chrome_session,
-    CHROME_NAVIGATE_HOOK,
-    get_plugin_dir,
-    get_hook_script,
-)
-
-
-# Get the path to the DNS hook
-PLUGIN_DIR = get_plugin_dir(__file__)
-DNS_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_dns.*')
-
-
-class TestDNSPlugin(TestCase):
-    """Test the DNS plugin."""
-
-    def test_dns_hook_exists(self):
-        """DNS hook script should exist."""
-        self.assertIsNotNone(DNS_HOOK, "DNS hook not found in plugin directory")
-        self.assertTrue(DNS_HOOK.exists(), f"Hook not found: {DNS_HOOK}")
-
-
-class TestDNSWithChrome(TestCase):
-    """Integration tests for DNS plugin with Chrome."""
-
-    def setUp(self):
-        """Set up test environment."""
-        self.temp_dir = Path(tempfile.mkdtemp())
-
-    def tearDown(self):
-        """Clean up."""
-        shutil.rmtree(self.temp_dir, ignore_errors=True)
-
-    def test_dns_records_captured(self):
-        """DNS hook should capture DNS records from a real URL."""
-        test_url = 'https://example.com'
-        snapshot_id = 'test-dns-snapshot'
-
-        with chrome_session(
-            self.temp_dir,
-            crawl_id='test-dns-crawl',
-            snapshot_id=snapshot_id,
-            test_url=test_url,
-            navigate=False,
-            timeout=30,
-        ) as (_process, _pid, snapshot_chrome_dir, env):
-            dns_dir = snapshot_chrome_dir.parent / 'dns'
-            dns_dir.mkdir(exist_ok=True)
-
-            result = subprocess.Popen(
-                ['node', str(DNS_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                cwd=str(dns_dir),
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE,
-                text=True,
-                env=env
-            )
-
-            nav_result = subprocess.run(
-                ['node', str(CHROME_NAVIGATE_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                cwd=str(snapshot_chrome_dir),
-                capture_output=True,
-                text=True,
-                timeout=120,
-                env=env
-            )
-            self.assertEqual(nav_result.returncode, 0, f"Navigation failed: {nav_result.stderr}")
-
-            dns_output = dns_dir / 'dns.jsonl'
-            for _ in range(30):
-                if dns_output.exists() and dns_output.stat().st_size > 0:
-                    break
-                time.sleep(1)
-
-            if result.poll() is None:
-                result.terminate()
-                try:
-                    stdout, stderr = result.communicate(timeout=5)
-                except subprocess.TimeoutExpired:
-                    result.kill()
-                    stdout, stderr = result.communicate()
-            else:
-                stdout, stderr = result.communicate()
-
-            self.assertNotIn('Traceback', stderr)
-
-            self.assertTrue(dns_output.exists(), "dns.jsonl not created")
-            content = dns_output.read_text().strip()
-            self.assertTrue(content, "DNS output should not be empty")
-
-            records = []
-            for line in content.split('\n'):
-                line = line.strip()
-                if not line:
-                    continue
-                try:
-                    records.append(json.loads(line))
-                except json.JSONDecodeError:
-                    pass
-
-            self.assertTrue(records, "No DNS records parsed")
-            has_ip_record = any(r.get('hostname') and r.get('ip') for r in records)
-            self.assertTrue(has_ip_record, f"No DNS record with hostname + ip: {records}")
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
--- a/archivebox/plugins/dom/config.json
+++ b/archivebox/plugins/dom/config.json
@@ -1,21 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "required_plugins": ["chrome"],
-  "properties": {
-    "DOM_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_DOM", "USE_DOM"],
-      "description": "Enable DOM capture"
-    },
-    "DOM_TIMEOUT": {
-      "type": "integer",
-      "default": 60,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for DOM capture in seconds"
-    }
-  }
-}
--- a/archivebox/plugins/dom/on_Snapshot__53_dom.js
+++ b/archivebox/plugins/dom/on_Snapshot__53_dom.js
@@ -1,184 +0,0 @@
-#!/usr/bin/env node
-/**
- * Dump the DOM of a URL using Chrome/Puppeteer.
- *
- * Requires a Chrome session (from chrome plugin) and connects to it via CDP.
- *
- * Usage: on_Snapshot__53_dom.js --url=<url> --snapshot-id=<uuid>
- * Output: Writes dom/output.html
- *
- * Environment variables:
- *     DOM_ENABLED: Enable DOM extraction (default: true)
- */
-
-const fs = require('fs');
-const path = require('path');
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-
-const {
-    getEnvBool,
-    parseArgs,
-    readCdpUrl,
-} = require('../chrome/chrome_utils.js');
-
-// Check if DOM is enabled BEFORE requiring puppeteer
-if (!getEnvBool('DOM_ENABLED', true)) {
-    console.error('Skipping DOM (DOM_ENABLED=False)');
-    // Temporary failure (config disabled) - NO JSONL emission
-    process.exit(0);
-}
-
-// Now safe to require puppeteer
-const puppeteer = require('puppeteer-core');
-
-// Extractor metadata
-const PLUGIN_NAME = 'dom';
-const OUTPUT_DIR = '.';
-const OUTPUT_FILE = 'output.html';
-const CHROME_SESSION_DIR = '../chrome';
-
-// Check if staticfile extractor already downloaded this URL
-const STATICFILE_DIR = '../staticfile';
-function hasStaticFileOutput() {
-    if (!fs.existsSync(STATICFILE_DIR)) return false;
-    const stdoutPath = path.join(STATICFILE_DIR, 'stdout.log');
-    if (!fs.existsSync(stdoutPath)) return false;
-    const stdout = fs.readFileSync(stdoutPath, 'utf8');
-    for (const line of stdout.split('\n')) {
-        const trimmed = line.trim();
-        if (!trimmed.startsWith('{')) continue;
-        try {
-            const record = JSON.parse(trimmed);
-            if (record.type === 'ArchiveResult' && record.status === 'succeeded') {
-                return true;
-            }
-        } catch (e) {}
-    }
-    return false;
-}
-
-// Wait for chrome tab to be fully loaded
-async function waitForChromeTabLoaded(timeoutMs = 60000) {
-    const navigationFile = path.join(CHROME_SESSION_DIR, 'navigation.json');
-    const startTime = Date.now();
-
-    while (Date.now() - startTime < timeoutMs) {
-        if (fs.existsSync(navigationFile)) {
-            return true;
-        }
-        // Wait 100ms before checking again
-        await new Promise(resolve => setTimeout(resolve, 100));
-    }
-
-    return false;
-}
-
-async function dumpDom(url) {
-    // Output directory is current directory (hook already runs in output dir)
-    const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
-
-    let browser = null;
-    let page = null;
-
-    try {
-        // Connect to existing Chrome session (required)
-        const cdpUrl = readCdpUrl(CHROME_SESSION_DIR);
-        if (!cdpUrl) {
-            return { success: false, error: 'No Chrome session found (chrome plugin must run first)' };
-        }
-
-        browser = await puppeteer.connect({
-            browserWSEndpoint: cdpUrl,
-            defaultViewport: null,
-        });
-
-        // Get existing pages or create new one
-        const pages = await browser.pages();
-        page = pages.find(p => p.url().startsWith('http')) || pages[0];
-
-        if (!page) {
-            page = await browser.newPage();
-        }
-
-        // Get the full DOM content
-        const domContent = await page.content();
-
-        if (domContent && domContent.length > 100) {
-            fs.writeFileSync(outputPath, domContent, 'utf8');
-            return { success: true, output: outputPath };
-        } else {
-            return { success: false, error: 'DOM content too short or empty' };
-        }
-
-    } catch (e) {
-        return { success: false, error: `${e.name}: ${e.message}` };
-    } finally {
-        if (browser) {
-            browser.disconnect();
-        }
-    }
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-    const snapshotId = args.snapshot_id;
-
-    if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__53_dom.js --url=<url> --snapshot-id=<uuid>');
-        process.exit(1);
-    }
-
-    try {
-        // Check if staticfile extractor already handled this (permanent skip)
-        if (hasStaticFileOutput()) {
-            console.error(`Skipping DOM - staticfile extractor already downloaded this`);
-            // Permanent skip - emit ArchiveResult with status='skipped'
-            console.log(JSON.stringify({
-                type: 'ArchiveResult',
-                status: 'skipped',
-                output_str: 'staticfile already handled',
-            }));
-            process.exit(0);
-        }
-
-        const cdpUrl = readCdpUrl(CHROME_SESSION_DIR);
-        if (!cdpUrl) {
-            throw new Error('No Chrome session found (chrome plugin must run first)');
-        }
-
-        // Wait for page to be fully loaded
-        const pageLoaded = await waitForChromeTabLoaded(60000);
-        if (!pageLoaded) {
-            throw new Error('Page not loaded after 60s (chrome_navigate must complete first)');
-        }
-
-        const result = await dumpDom(url);
-
-        if (result.success) {
-            // Success - emit ArchiveResult
-            const size = fs.statSync(result.output).size;
-            console.error(`DOM saved (${size} bytes)`);
-            console.log(JSON.stringify({
-                type: 'ArchiveResult',
-                status: 'succeeded',
-                output_str: result.output,
-            }));
-            process.exit(0);
-        } else {
-            // Transient error - emit NO JSONL
-            console.error(`ERROR: ${result.error}`);
-            process.exit(1);
-        }
-    } catch (e) {
-        // Transient error - emit NO JSONL
-        console.error(`ERROR: ${e.name}: ${e.message}`);
-        process.exit(1);
-    }
-}
-
-main().catch(e => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
--- a/archivebox/plugins/dom/templates/card.html
+++ b/archivebox/plugins/dom/templates/card.html
@@ -1,8 +0,0 @@
-<!-- DOM thumbnail - scaled down iframe preview of captured DOM HTML -->
-<div class="extractor-thumbnail dom-thumbnail" style="width: 100%; height: 100px; overflow: hidden; background: #fff;">
-    <iframe src="{{ output_path }}"
-            style="width: 400%; height: 400px; transform: scale(0.25); transform-origin: top left; pointer-events: none; border: none;"
-            loading="lazy"
-            sandbox="allow-same-origin">
-    </iframe>
-</div>
--- a/archivebox/plugins/dom/templates/icon.html
+++ b/archivebox/plugins/dom/templates/icon.html
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--dom" title="DOM"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M8 9l-3 3 3 3"/><path d="M16 9l3 3-3 3"/><path d="M10 20l4-16"/></svg></span>
--- a/archivebox/plugins/dom/tests/test_dom.py
+++ b/archivebox/plugins/dom/tests/test_dom.py
@@ -1,185 +0,0 @@
-"""
-Integration tests for dom plugin
-
-Tests verify:
-1. Hook script exists
-2. Dependencies installed via chrome validation hooks
-3. Verify deps with abx-pkg
-4. DOM extraction works on https://example.com
-5. JSONL output is correct
-6. Filesystem output contains actual page content
-7. Config options work
-"""
-
-import json
-import os
-import subprocess
-import sys
-import tempfile
-from pathlib import Path
-
-import pytest
-
-from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-    get_test_env,
-    get_plugin_dir,
-    get_hook_script,
-    run_hook_and_parse,
-    LIB_DIR,
-    NODE_MODULES_DIR,
-    PLUGINS_ROOT,
-    chrome_session,
-)
-
-
-PLUGIN_DIR = get_plugin_dir(__file__)
-DOM_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_dom.*')
-NPM_PROVIDER_HOOK = get_hook_script(PLUGINS_ROOT / 'npm', 'on_Binary__install_using_npm_provider.py')
-TEST_URL = 'https://example.com'
-
-
-def test_hook_script_exists():
-    """Verify on_Snapshot hook exists."""
-    assert DOM_HOOK.exists(), f"Hook not found: {DOM_HOOK}"
-
-
-def test_verify_deps_with_abx_pkg():
-    """Verify dependencies are available via abx-pkg after hook installation."""
-    from abx_pkg import Binary, EnvProvider, BinProviderOverrides
-
-    EnvProvider.model_rebuild()
-
-    # Verify node is available
-    node_binary = Binary(name='node', binproviders=[EnvProvider()])
-    node_loaded = node_binary.load()
-    assert node_loaded and node_loaded.abspath, "Node.js required for dom plugin"
-
-
-def test_extracts_dom_from_example_com():
-    """Test full workflow: extract DOM from real example.com via hook."""
-    # Prerequisites checked by earlier test
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        with chrome_session(tmpdir, test_url=TEST_URL) as (_process, _pid, snapshot_chrome_dir, env):
-            dom_dir = snapshot_chrome_dir.parent / 'dom'
-            dom_dir.mkdir(exist_ok=True)
-
-            # Run DOM extraction hook
-            result = subprocess.run(
-                ['node', str(DOM_HOOK), f'--url={TEST_URL}', '--snapshot-id=test789'],
-                cwd=dom_dir,
-                capture_output=True,
-                text=True,
-                timeout=120,
-                env=env
-            )
-
-        assert result.returncode == 0, f"Extraction failed: {result.stderr}"
-
-        # Parse clean JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-        # Verify filesystem output (hook writes directly to working dir)
-        dom_file = dom_dir / 'output.html'
-        assert dom_file.exists(), f"output.html not created. Files: {list(tmpdir.iterdir())}"
-
-        # Verify HTML content contains REAL example.com text
-        html_content = dom_file.read_text(errors='ignore')
-        assert len(html_content) > 200, f"HTML content too short: {len(html_content)} bytes"
-        assert '<html' in html_content.lower(), "Missing <html> tag"
-        assert 'example domain' in html_content.lower(), "Missing 'Example Domain' in HTML"
-        assert ('this domain' in html_content.lower() or
-                'illustrative examples' in html_content.lower()), \
-            "Missing example.com description text"
-
-
-def test_config_save_dom_false_skips():
-    """Test that DOM_ENABLED=False exits without emitting JSONL."""
-    import os
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-        env = os.environ.copy()
-        env['DOM_ENABLED'] = 'False'
-
-        result = subprocess.run(
-            ['node', str(DOM_HOOK), f'--url={TEST_URL}', '--snapshot-id=test999'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=30
-        )
-
-        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
-
-        # Feature disabled - temporary failure, should NOT emit JSONL
-        assert 'Skipping DOM' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
-
-        # Should NOT emit any JSONL
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
-
-
-def test_staticfile_present_skips():
-    """Test that dom skips when staticfile already downloaded."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Create directory structure like real ArchiveBox:
-        # tmpdir/
-        #   staticfile/  <- staticfile extractor output
-        #   dom/         <- dom extractor runs here, looks for ../staticfile
-        staticfile_dir = tmpdir / 'staticfile'
-        staticfile_dir.mkdir()
-        (staticfile_dir / 'stdout.log').write_text('{"type":"ArchiveResult","status":"succeeded","output_str":"index.html"}\n')
-
-        dom_dir = tmpdir / 'dom'
-        dom_dir.mkdir()
-
-        result = subprocess.run(
-            ['node', str(DOM_HOOK), f'--url={TEST_URL}', '--snapshot-id=teststatic'],
-            cwd=dom_dir,  # Run from dom subdirectory
-            capture_output=True,
-            text=True,
-            timeout=30
-        ,
-            env=get_test_env())
-
-        assert result.returncode == 0, "Should exit 0 when permanently skipping"
-
-        # Permanent skip - should emit ArchiveResult with status='skipped'
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, "Should emit ArchiveResult JSONL for permanent skip"
-        assert result_json['status'] == 'skipped', f"Should have status='skipped': {result_json}"
-        assert 'staticfile' in result_json.get('output_str', '').lower(), "Should mention staticfile in output_str"
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
--- a/archivebox/plugins/env/on_Binary__15_env_install.py
+++ b/archivebox/plugins/env/on_Binary__15_env_install.py
@@ -1,72 +0,0 @@
-#!/usr/bin/env python3
-"""
-Check if a binary is already available in the system PATH.
-
-This is the simplest "provider" - it doesn't install anything,
-it just discovers binaries that are already installed.
-
-Usage: on_Binary__install_using_env_provider.py --binary-id=<uuid> --machine-id=<uuid> --name=<name>
-Output: Binary JSONL record to stdout if binary found in PATH
-
-Environment variables:
-    MACHINE_ID: Machine UUID (set by orchestrator)
-"""
-
-import json
-import os
-import sys
-
-import rich_click as click
-from abx_pkg import Binary, EnvProvider
-
-
-@click.command()
-@click.option('--machine-id', required=True, help="Machine UUID")
-@click.option('--binary-id', required=True, help="Dependency UUID")
-@click.option('--name', required=True, help="Binary name to find")
-@click.option('--binproviders', default='*', help="Allowed providers (comma-separated)")
-@click.option('--overrides', default=None, help="JSON-encoded overrides dict (unused)")
-def main(binary_id: str, machine_id: str, name: str, binproviders: str, overrides: str | None):
-    """Check if binary is available in PATH and record it."""
-
-    # Check if env provider is allowed
-    if binproviders != '*' and 'env' not in binproviders.split(','):
-        click.echo(f"env provider not allowed for {name}", err=True)
-        sys.exit(0)  # Not an error, just skip
-
-    # Use abx-pkg EnvProvider to find binary
-    provider = EnvProvider()
-    try:
-        binary = Binary(name=name, binproviders=[provider]).load()
-    except Exception as e:
-        click.echo(f"{name} not found in PATH: {e}", err=True)
-        sys.exit(1)
-
-    if not binary.abspath:
-        click.echo(f"{name} not found in PATH", err=True)
-        sys.exit(1)
-
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    # Output Binary JSONL record to stdout
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'abspath': str(binary.abspath),
-        'version': str(binary.version) if binary.version else '',
-        'sha256': binary.sha256 or '',
-        'binprovider': 'env',
-        'machine_id': machine_id,
-        'binary_id': binary_id,
-    }
-    print(json.dumps(record))
-
-    # Log human-readable info to stderr
-    click.echo(f"Found {name} at {binary.abspath}", err=True)
-    click.echo(f"  version: {binary.version}", err=True)
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
--- a/archivebox/plugins/env/templates/icon.html
+++ b/archivebox/plugins/env/templates/icon.html
--- a/archivebox/plugins/env/tests/test_env_provider.py
+++ b/archivebox/plugins/env/tests/test_env_provider.py
@@ -1,159 +0,0 @@
-"""
-Tests for the env binary provider plugin.
-
-Tests the real env provider hook with actual system binaries.
-"""
-
-import json
-import os
-import subprocess
-import sys
-import tempfile
-from pathlib import Path
-
-import pytest
-from django.test import TestCase
-
-
-# Get the path to the env provider hook
-PLUGIN_DIR = Path(__file__).parent.parent
-INSTALL_HOOK = next(PLUGIN_DIR.glob('on_Binary__*_env_install.py'), None)
-
-
-class TestEnvProviderHook(TestCase):
-    """Test the env binary provider hook."""
-
-    def setUp(self):
-        """Set up test environment."""
-        self.temp_dir = tempfile.mkdtemp()
-
-    def tearDown(self):
-        """Clean up."""
-        import shutil
-        shutil.rmtree(self.temp_dir, ignore_errors=True)
-
-    def test_hook_script_exists(self):
-        """Hook script should exist."""
-        self.assertTrue(INSTALL_HOOK and INSTALL_HOOK.exists(), f"Hook not found: {INSTALL_HOOK}")
-
-    def test_hook_finds_python(self):
-        """Hook should find python3 binary in PATH."""
-        env = os.environ.copy()
-        env['DATA_DIR'] = self.temp_dir
-
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=python3',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-            ],
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=env
-        )
-
-        # Should succeed and output JSONL
-        self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}")
-
-        # Parse JSONL output
-        for line in result.stdout.split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'Binary' and record.get('name') == 'python3':
-                        self.assertEqual(record['binprovider'], 'env')
-                        self.assertTrue(record['abspath'])
-                        self.assertTrue(Path(record['abspath']).exists())
-                        return
-                except json.JSONDecodeError:
-                    continue
-
-        self.fail("No Binary JSONL record found in output")
-
-    def test_hook_finds_bash(self):
-        """Hook should find bash binary in PATH."""
-        env = os.environ.copy()
-        env['DATA_DIR'] = self.temp_dir
-
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=bash',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-            ],
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=env
-        )
-
-        # Should succeed and output JSONL
-        self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}")
-
-        # Parse JSONL output
-        for line in result.stdout.split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'Binary' and record.get('name') == 'bash':
-                        self.assertEqual(record['binprovider'], 'env')
-                        self.assertTrue(record['abspath'])
-                        return
-                except json.JSONDecodeError:
-                    continue
-
-        self.fail("No Binary JSONL record found in output")
-
-    def test_hook_fails_for_missing_binary(self):
-        """Hook should fail for binary not in PATH."""
-        env = os.environ.copy()
-        env['DATA_DIR'] = self.temp_dir
-
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=nonexistent_binary_xyz123',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-            ],
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=env
-        )
-
-        # Should fail with exit code 1
-        self.assertEqual(result.returncode, 1)
-        self.assertIn('not found', result.stderr.lower())
-
-    def test_hook_skips_when_env_not_allowed(self):
-        """Hook should skip when env not in allowed binproviders."""
-        env = os.environ.copy()
-        env['DATA_DIR'] = self.temp_dir
-
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=python3',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-                '--binproviders=pip,apt',  # env not allowed
-            ],
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=env
-        )
-
-        # Should exit cleanly (code 0) when env not allowed
-        self.assertEqual(result.returncode, 0)
-        self.assertIn('env provider not allowed', result.stderr)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
--- a/archivebox/plugins/favicon/config.json
+++ b/archivebox/plugins/favicon/config.json
@@ -1,26 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "FAVICON_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_FAVICON", "USE_FAVICON"],
-      "description": "Enable favicon downloading"
-    },
-    "FAVICON_TIMEOUT": {
-      "type": "integer",
-      "default": 30,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for favicon fetch in seconds"
-    },
-    "FAVICON_USER_AGENT": {
-      "type": "string",
-      "default": "",
-      "x-fallback": "USER_AGENT",
-      "description": "User agent string"
-    }
-  }
-}
--- a/archivebox/plugins/favicon/on_Snapshot__11_favicon.bg.py
+++ b/archivebox/plugins/favicon/on_Snapshot__11_favicon.bg.py
@@ -1,153 +0,0 @@
-#!/usr/bin/env python3
-"""
-Extract favicon from a URL.
-
-Usage: on_Snapshot__favicon.bg.py --url=<url> --snapshot-id=<uuid>
-Output: Writes favicon.ico to $PWD
-
-Environment variables:
-    FAVICON_TIMEOUT: Timeout in seconds (default: 30)
-    USER_AGENT: User agent string
-
-    # Fallback to ARCHIVING_CONFIG values if FAVICON_* not set:
-    TIMEOUT: Fallback timeout
-
-Note: This extractor uses the 'requests' library which is bundled with ArchiveBox.
-      It can run standalone if requests is installed: pip install requests
-"""
-
-import json
-import os
-import re
-import sys
-from pathlib import Path
-from urllib.parse import urljoin, urlparse
-
-import rich_click as click
-
-
-# Extractor metadata
-PLUGIN_NAME = 'favicon'
-OUTPUT_DIR = '.'
-OUTPUT_FILE = 'favicon.ico'
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-
-def get_env_int(name: str, default: int = 0) -> int:
-    try:
-        return int(get_env(name, str(default)))
-    except ValueError:
-        return default
-
-
-def get_favicon(url: str) -> tuple[bool, str | None, str]:
-    """
-    Fetch favicon from URL.
-
-    Returns: (success, output_path, error_message)
-    """
-    try:
-        import requests
-    except ImportError:
-        return False, None, 'requests library not installed'
-
-    timeout = get_env_int('FAVICON_TIMEOUT') or get_env_int('TIMEOUT', 30)
-    user_agent = get_env('USER_AGENT', 'Mozilla/5.0 (compatible; ArchiveBox/1.0)')
-    headers = {'User-Agent': user_agent}
-
-    # Build list of possible favicon URLs
-    parsed = urlparse(url)
-    base_url = f"{parsed.scheme}://{parsed.netloc}"
-
-    favicon_urls = [
-        urljoin(base_url, '/favicon.ico'),
-        urljoin(base_url, '/favicon.png'),
-        urljoin(base_url, '/apple-touch-icon.png'),
-    ]
-
-    # Try to extract favicon URL from HTML link tags
-    try:
-        response = requests.get(url, timeout=timeout, headers=headers)
-        if response.ok:
-            # Look for <link rel="icon" href="...">
-            for match in re.finditer(
-                r'<link[^>]+rel=["\'](?:shortcut )?icon["\'][^>]+href=["\']([^"\']+)["\']',
-                response.text,
-                re.I
-            ):
-                favicon_urls.insert(0, urljoin(url, match.group(1)))
-
-            # Also check reverse order: href before rel
-            for match in re.finditer(
-                r'<link[^>]+href=["\']([^"\']+)["\'][^>]+rel=["\'](?:shortcut )?icon["\']',
-                response.text,
-                re.I
-            ):
-                favicon_urls.insert(0, urljoin(url, match.group(1)))
-    except Exception:
-        pass  # Continue with default favicon URLs
-
-    # Try each URL until we find one that works
-    for favicon_url in favicon_urls:
-        try:
-            response = requests.get(favicon_url, timeout=15, headers=headers)
-            if response.ok and len(response.content) > 0:
-                Path(OUTPUT_FILE).write_bytes(response.content)
-                return True, OUTPUT_FILE, ''
-        except Exception:
-            continue
-
-    # Try Google's favicon service as fallback
-    try:
-        google_url = f'https://www.google.com/s2/favicons?domain={parsed.netloc}'
-        response = requests.get(google_url, timeout=15, headers=headers)
-        if response.ok and len(response.content) > 0:
-            Path(OUTPUT_FILE).write_bytes(response.content)
-            return True, OUTPUT_FILE, ''
-    except Exception:
-        pass
-
-    return False, None, 'No favicon found'
-
-
-@click.command()
-@click.option('--url', required=True, help='URL to extract favicon from')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
-def main(url: str, snapshot_id: str):
-    """Extract favicon from a URL."""
-
-    output = None
-    status = 'failed'
-    error = ''
-
-    try:
-        # Run extraction
-        success, output, error = get_favicon(url)
-        if success:
-            status = 'succeeded'
-        else:
-            status = 'failed'
-
-    except Exception as e:
-        error = f'{type(e).__name__}: {e}'
-        status = 'failed'
-
-    if error:
-        print(f'ERROR: {error}', file=sys.stderr)
-
-    # Output clean JSONL (no RESULT_JSON= prefix)
-    result = {
-        'type': 'ArchiveResult',
-        'status': status,
-        'output_str': output or error or '',
-    }
-    print(json.dumps(result))
-
-    sys.exit(0 if status == 'succeeded' else 1)
-
-
-if __name__ == '__main__':
-    main()
--- a/archivebox/plugins/favicon/templates/card.html
+++ b/archivebox/plugins/favicon/templates/card.html
@@ -1,9 +0,0 @@
-<!-- Favicon thumbnail - small favicon preview -->
-<div class="extractor-thumbnail favicon-thumbnail" style="width: 100%; height: 100px; display: flex; align-items: center; justify-content: center; background: #fff;">
-    {% if output_path %}
-        <img src="{{ output_path }}"
-             alt="Favicon"
-             style="width: 30px; height: 30px; max-width: 30px; max-height: 30px; object-fit: contain;"
-             loading="lazy">
-    {% endif %}
-</div>
--- a/archivebox/plugins/favicon/templates/icon.html
+++ b/archivebox/plugins/favicon/templates/icon.html
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--favicon" title="Favicon"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M12 3l2.5 5.5 6 .5-4.5 3.8 1.5 5.7L12 15.5 6.5 18.5 8 12.8 3.5 9l6-.5z"/></svg></span>
--- a/archivebox/plugins/favicon/tests/test_favicon.py
+++ b/archivebox/plugins/favicon/tests/test_favicon.py
@@ -1,293 +0,0 @@
-"""
-Integration tests for favicon plugin
-
-Tests verify:
-1. Plugin script exists
-2. requests library is available
-3. Favicon extraction works for real example.com
-4. Output file is actual image data
-5. Tries multiple favicon URLs
-6. Falls back to Google's favicon service
-7. Config options work (TIMEOUT, USER_AGENT)
-8. Handles failures gracefully
-"""
-
-import json
-import subprocess
-import sys
-import tempfile
-from pathlib import Path
-
-import pytest
-
-from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-    get_plugin_dir,
-    get_hook_script,
-    parse_jsonl_output,
-)
-
-
-PLUGIN_DIR = get_plugin_dir(__file__)
-FAVICON_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_favicon.*')
-TEST_URL = 'https://example.com'
-
-
-def test_hook_script_exists():
-    """Verify hook script exists."""
-    assert FAVICON_HOOK.exists(), f"Hook script not found: {FAVICON_HOOK}"
-
-
-def test_requests_library_available():
-    """Test that requests library is available."""
-    result = subprocess.run(
-        [sys.executable, '-c', 'import requests; print(requests.__version__)'],
-        capture_output=True,
-        text=True
-    )
-
-    if result.returncode != 0:
-        pass
-
-    assert len(result.stdout.strip()) > 0, "Should report requests version"
-
-
-def test_extracts_favicon_from_example_com():
-    """Test full workflow: extract favicon from real example.com.
-
-    Note: example.com doesn't have a favicon and Google's service may also fail,
-    so we test that the extraction completes and reports appropriate status.
-    """
-
-    # Check requests is available
-    check_result = subprocess.run(
-        [sys.executable, '-c', 'import requests'],
-        capture_output=True
-    )
-    if check_result.returncode != 0:
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Run favicon extraction
-        result = subprocess.run(
-            [sys.executable, str(FAVICON_HOOK), '--url', TEST_URL, '--snapshot-id', 'test789'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            timeout=60
-        )
-
-        # May succeed (if Google service works) or fail (if no favicon)
-        assert result.returncode in (0, 1), "Should complete extraction attempt"
-
-        # Parse clean JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                pass
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, "Should have ArchiveResult JSONL output"
-
-        # If it succeeded, verify the favicon file
-        if result_json['status'] == 'succeeded':
-            favicon_file = tmpdir / 'favicon.ico'
-            assert favicon_file.exists(), "favicon.ico not created"
-
-            # Verify file is not empty and contains actual image data
-            file_size = favicon_file.stat().st_size
-            assert file_size > 0, "Favicon file should not be empty"
-            assert file_size < 1024 * 1024, f"Favicon file suspiciously large: {file_size} bytes"
-
-            # Check for common image magic bytes
-            favicon_data = favicon_file.read_bytes()
-            # ICO, PNG, GIF, JPEG, or WebP
-            is_image = (
-                favicon_data[:4] == b'\x00\x00\x01\x00' or  # ICO
-                favicon_data[:8] == b'\x89PNG\r\n\x1a\n' or  # PNG
-                favicon_data[:3] == b'GIF' or  # GIF
-                favicon_data[:2] == b'\xff\xd8' or  # JPEG
-                favicon_data[8:12] == b'WEBP'  # WebP
-            )
-            assert is_image, "Favicon file should be a valid image format"
-        else:
-            # Failed as expected
-            assert result_json['status'] == 'failed', f"Should report failure: {result_json}"
-
-
-def test_config_timeout_honored():
-    """Test that TIMEOUT config is respected."""
-
-    check_result = subprocess.run(
-        [sys.executable, '-c', 'import requests'],
-        capture_output=True
-    )
-    if check_result.returncode != 0:
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Set very short timeout (but example.com should still succeed)
-        import os
-        env = os.environ.copy()
-        env['TIMEOUT'] = '5'
-
-        result = subprocess.run(
-            [sys.executable, str(FAVICON_HOOK), '--url', TEST_URL, '--snapshot-id', 'testtimeout'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=30
-        )
-
-        # Should complete (success or fail, but not hang)
-        assert result.returncode in (0, 1), "Should complete without hanging"
-
-
-def test_config_user_agent():
-    """Test that USER_AGENT config is used."""
-
-    check_result = subprocess.run(
-        [sys.executable, '-c', 'import requests'],
-        capture_output=True
-    )
-    if check_result.returncode != 0:
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Set custom user agent
-        import os
-        env = os.environ.copy()
-        env['USER_AGENT'] = 'TestBot/1.0'
-
-        result = subprocess.run(
-            [sys.executable, str(FAVICON_HOOK), '--url', TEST_URL, '--snapshot-id', 'testua'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=60
-        )
-
-        # Should succeed (example.com doesn't block)
-        if result.returncode == 0:
-            # Parse clean JSONL output
-            result_json = None
-            for line in result.stdout.strip().split('\n'):
-                line = line.strip()
-                if line.startswith('{'):
-                    pass
-                    try:
-                        record = json.loads(line)
-                        if record.get('type') == 'ArchiveResult':
-                            result_json = record
-                            break
-                    except json.JSONDecodeError:
-                        pass
-
-            if result_json:
-                assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-
-def test_handles_https_urls():
-    """Test that HTTPS URLs work correctly."""
-
-    check_result = subprocess.run(
-        [sys.executable, '-c', 'import requests'],
-        capture_output=True
-    )
-    if check_result.returncode != 0:
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        result = subprocess.run(
-            [sys.executable, str(FAVICON_HOOK), '--url', 'https://example.org', '--snapshot-id', 'testhttps'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            timeout=60
-        )
-
-        if result.returncode == 0:
-            favicon_file = tmpdir / 'favicon.ico'
-            if favicon_file.exists():
-                assert favicon_file.stat().st_size > 0
-
-
-def test_handles_missing_favicon_gracefully():
-    """Test that favicon plugin handles sites without favicons gracefully.
-
-    Note: The plugin falls back to Google's favicon service, which generates
-    a generic icon even if the site doesn't have one, so extraction usually succeeds.
-    """
-
-    check_result = subprocess.run(
-        [sys.executable, '-c', 'import requests'],
-        capture_output=True
-    )
-    if check_result.returncode != 0:
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Try a URL that likely doesn't have a favicon
-        result = subprocess.run(
-            [sys.executable, str(FAVICON_HOOK), '--url', 'https://example.com/nonexistent', '--snapshot-id', 'test404'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            timeout=60
-        )
-
-        # May succeed (Google fallback) or fail gracefully
-        assert result.returncode in (0, 1), "Should complete (may succeed or fail)"
-
-        if result.returncode != 0:
-            combined = result.stdout + result.stderr
-            assert 'No favicon found' in combined or 'ERROR=' in combined
-
-
-def test_reports_missing_requests_library():
-    """Test that script reports error when requests library is missing."""
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Run with PYTHONPATH cleared to simulate missing requests
-        import os
-        env = os.environ.copy()
-        # Keep only minimal PATH, clear PYTHONPATH
-        env['PYTHONPATH'] = '/nonexistent'
-
-        result = subprocess.run(
-            [sys.executable, '-S', str(FAVICON_HOOK), '--url', TEST_URL, '--snapshot-id', 'test123'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env
-        )
-
-        # Should fail and report missing requests
-        if result.returncode != 0:
-            combined = result.stdout + result.stderr
-            # May report missing requests or other import errors
-            assert 'requests' in combined.lower() or 'import' in combined.lower() or 'ERROR=' in combined
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
--- a/archivebox/plugins/forumdl/config.json
+++ b/archivebox/plugins/forumdl/config.json
@@ -1,51 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "FORUMDL_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_FORUMDL", "USE_FORUMDL"],
-      "description": "Enable forum downloading with forum-dl"
-    },
-    "FORUMDL_BINARY": {
-      "type": "string",
-      "default": "forum-dl",
-      "description": "Path to forum-dl binary"
-    },
-    "FORUMDL_TIMEOUT": {
-      "type": "integer",
-      "default": 3600,
-      "minimum": 30,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for forum downloads in seconds"
-    },
-    "FORUMDL_OUTPUT_FORMAT": {
-      "type": "string",
-      "default": "jsonl",
-      "enum": ["jsonl", "warc", "mbox", "maildir", "mh", "mmdf", "babyl"],
-      "description": "Output format for forum downloads"
-    },
-    "FORUMDL_CHECK_SSL_VALIDITY": {
-      "type": "boolean",
-      "default": true,
-      "x-fallback": "CHECK_SSL_VALIDITY",
-      "description": "Whether to verify SSL certificates"
-    },
-    "FORUMDL_ARGS": {
-      "type": "array",
-      "items": {"type": "string"},
-      "default": [],
-      "x-aliases": ["FORUMDL_DEFAULT_ARGS"],
-      "description": "Default forum-dl arguments"
-    },
-    "FORUMDL_ARGS_EXTRA": {
-      "type": "array",
-      "items": {"type": "string"},
-      "default": [],
-      "x-aliases": ["FORUMDL_EXTRA_ARGS"],
-      "description": "Extra arguments to append to forum-dl command"
-    }
-  }
-}
--- a/archivebox/plugins/forumdl/forum-dl-wrapper.py
+++ b/archivebox/plugins/forumdl/forum-dl-wrapper.py
@@ -1,31 +0,0 @@
-#!/usr/bin/env python3
-"""
-Wrapper for forum-dl that applies Pydantic v2 compatibility patches.
-
-This wrapper fixes forum-dl 0.3.0's incompatibility with Pydantic v2 by monkey-patching
-the JsonlWriter class to use model_dump_json() instead of the deprecated json(models_as_dict=False).
-"""
-
-import sys
-
-# Apply Pydantic v2 compatibility patch BEFORE importing forum_dl
-try:
-    from forum_dl.writers.jsonl import JsonlWriter
-    from pydantic import BaseModel
-
-    # Check if we're using Pydantic v2
-    if hasattr(BaseModel, 'model_dump_json'):
-        def _patched_serialize_entry(self, entry):
-            """Use Pydantic v2's model_dump_json() instead of deprecated json(models_as_dict=False)"""
-            return entry.model_dump_json()
-
-        JsonlWriter._serialize_entry = _patched_serialize_entry
-except (ImportError, AttributeError):
-    # forum-dl not installed or already compatible - no patch needed
-    pass
-
-# Now import and run forum-dl's main function
-from forum_dl import main
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/archivebox/plugins/forumdl/on_Crawl__25_forumdl_install.py
+++ b/archivebox/plugins/forumdl/on_Crawl__25_forumdl_install.py
@@ -1,81 +0,0 @@
-#!/usr/bin/env python3
-"""
-Emit forum-dl Binary dependency for the crawl.
-"""
-
-import json
-import os
-import sys
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def output_binary(name: str, binproviders: str, overrides: dict | None = None):
-    """Output Binary JSONL record for a dependency."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,
-        'machine_id': machine_id,
-    }
-    if overrides:
-        record['overrides'] = overrides
-    print(json.dumps(record))
-
-
-def main():
-    forumdl_enabled = get_env_bool('FORUMDL_ENABLED', True)
-
-    if not forumdl_enabled:
-        sys.exit(0)
-
-    output_binary(
-        name='forum-dl',
-        binproviders='pip,env',
-        overrides={
-            'pip': {
-                'packages': [
-                    '--no-deps',
-                    '--prefer-binary',
-                    'forum-dl',
-                    'chardet==5.2.0',
-                    'pydantic',
-                    'pydantic-core',
-                    'typing-extensions',
-                    'annotated-types',
-                    'typing-inspection',
-                    'beautifulsoup4',
-                    'soupsieve',
-                    'lxml',
-                    'requests',
-                    'urllib3',
-                    'certifi',
-                    'idna',
-                    'charset-normalizer',
-                    'tenacity',
-                    'python-dateutil',
-                    'six',
-                    'html2text',
-                    'warcio',
-                ]
-            }
-        },
-    )
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
--- a/archivebox/plugins/forumdl/on_Snapshot__04_forumdl.bg.py
+++ b/archivebox/plugins/forumdl/on_Snapshot__04_forumdl.bg.py
@@ -1,266 +0,0 @@
-#!/usr/bin/env python3
-"""
-Download forum content from a URL using forum-dl.
-
-Usage: on_Snapshot__04_forumdl.bg.py --url=<url> --snapshot-id=<uuid>
-Output: Downloads forum content to $PWD/
-
-Environment variables:
-    FORUMDL_ENABLED: Enable forum downloading (default: True)
-    FORUMDL_BINARY: Path to forum-dl binary (default: forum-dl)
-    FORUMDL_TIMEOUT: Timeout in seconds (x-fallback: TIMEOUT)
-    FORUMDL_OUTPUT_FORMAT: Output format (default: jsonl)
-    FORUMDL_CHECK_SSL_VALIDITY: Whether to verify SSL certs (x-fallback: CHECK_SSL_VALIDITY)
-    FORUMDL_ARGS: Default forum-dl arguments (JSON array)
-    FORUMDL_ARGS_EXTRA: Extra arguments to append (JSON array)
-"""
-
-import json
-import os
-import shutil
-import subprocess
-import sys
-import threading
-from pathlib import Path
-
-import rich_click as click
-
-
-# Monkey patch forum-dl for Pydantic v2 compatibility
-# forum-dl 0.3.0 uses deprecated json(models_as_dict=False) which doesn't work in Pydantic v2
-try:
-    from forum_dl.writers.jsonl import JsonlWriter
-    from pydantic import BaseModel
-
-    # Check if we're using Pydantic v2 (has model_dump_json)
-    if hasattr(BaseModel, 'model_dump_json'):
-        # Patch JsonlWriter to use Pydantic v2 API
-        original_serialize = JsonlWriter._serialize_entry
-
-        def _patched_serialize_entry(self, entry):
-            # Use Pydantic v2's model_dump_json() instead of deprecated json(models_as_dict=False)
-            return entry.model_dump_json()
-
-        JsonlWriter._serialize_entry = _patched_serialize_entry
-except (ImportError, AttributeError):
-    # forum-dl not installed or already compatible
-    pass
-
-
-# Extractor metadata
-PLUGIN_NAME = 'forumdl'
-BIN_NAME = 'forum-dl'
-BIN_PROVIDERS = 'pip,env'
-OUTPUT_DIR = '.'
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def get_env_int(name: str, default: int = 0) -> int:
-    try:
-        return int(get_env(name, str(default)))
-    except ValueError:
-        return default
-
-
-def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
-    """Parse a JSON array from environment variable."""
-    val = get_env(name, '')
-    if not val:
-        return default if default is not None else []
-    try:
-        result = json.loads(val)
-        if isinstance(result, list):
-            return [str(item) for item in result]
-        return default if default is not None else []
-    except json.JSONDecodeError:
-        return default if default is not None else []
-
-
-def get_binary_shebang(binary_path: str) -> str | None:
-    """Return interpreter from shebang line if present (e.g., /path/to/python)."""
-    try:
-        with open(binary_path, 'r', encoding='utf-8') as f:
-            first_line = f.readline().strip()
-            if first_line.startswith('#!'):
-                return first_line[2:].strip().split(' ')[0]
-    except Exception:
-        pass
-    return None
-
-
-def resolve_binary_path(binary: str) -> str | None:
-    """Resolve binary to an absolute path if possible."""
-    if not binary:
-        return None
-    if Path(binary).is_file():
-        return binary
-    return shutil.which(binary)
-
-
-
-def save_forum(url: str, binary: str) -> tuple[bool, str | None, str]:
-    """
-    Download forum using forum-dl.
-
-    Returns: (success, output_path, error_message)
-    """
-    # Get config from env (with FORUMDL_ prefix, x-fallback handled by config loader)
-    timeout = get_env_int('FORUMDL_TIMEOUT') or get_env_int('TIMEOUT', 3600)
-    check_ssl = get_env_bool('FORUMDL_CHECK_SSL_VALIDITY', True) if get_env('FORUMDL_CHECK_SSL_VALIDITY') else get_env_bool('CHECK_SSL_VALIDITY', True)
-    forumdl_args = get_env_array('FORUMDL_ARGS', [])
-    forumdl_args_extra = get_env_array('FORUMDL_ARGS_EXTRA', [])
-    output_format = get_env('FORUMDL_OUTPUT_FORMAT', 'jsonl')
-
-    # Output directory is current directory (hook already runs in output dir)
-    output_dir = Path(OUTPUT_DIR)
-
-    # Build output filename based on format
-    if output_format == 'warc':
-        output_file = output_dir / 'forum.warc.gz'
-    elif output_format == 'jsonl':
-        output_file = output_dir / 'forum.jsonl'
-    elif output_format == 'maildir':
-        output_file = output_dir / 'forum'  # maildir is a directory
-    elif output_format in ('mbox', 'mh', 'mmdf', 'babyl'):
-        output_file = output_dir / f'forum.{output_format}'
-    else:
-        output_file = output_dir / f'forum.{output_format}'
-
-    # Use our Pydantic v2 compatible wrapper if available, otherwise fall back to binary
-    wrapper_path = Path(__file__).parent / 'forum-dl-wrapper.py'
-    resolved_binary = resolve_binary_path(binary) or binary
-    if wrapper_path.exists():
-        forumdl_python = get_binary_shebang(resolved_binary) or sys.executable
-        cmd = [forumdl_python, str(wrapper_path), *forumdl_args, '-f', output_format, '-o', str(output_file)]
-    else:
-        cmd = [resolved_binary, *forumdl_args, '-f', output_format, '-o', str(output_file)]
-
-    if not check_ssl:
-        cmd.append('--no-check-certificate')
-
-    if forumdl_args_extra:
-        cmd.extend(forumdl_args_extra)
-
-    cmd.append(url)
-
-    try:
-        print(f'[forumdl] Starting download (timeout={timeout}s)', file=sys.stderr)
-        output_lines: list[str] = []
-        process = subprocess.Popen(
-            cmd,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            text=True,
-            bufsize=1,
-        )
-
-        def _read_output() -> None:
-            if not process.stdout:
-                return
-            for line in process.stdout:
-                output_lines.append(line)
-                sys.stderr.write(line)
-
-        reader = threading.Thread(target=_read_output, daemon=True)
-        reader.start()
-
-        try:
-            process.wait(timeout=timeout)
-        except subprocess.TimeoutExpired:
-            process.kill()
-            reader.join(timeout=1)
-            return False, None, f'Timed out after {timeout} seconds'
-
-        reader.join(timeout=1)
-        combined_output = ''.join(output_lines)
-
-        # Check if output file was created
-        if output_file.exists() and output_file.stat().st_size > 0:
-            return True, str(output_file), ''
-        else:
-            stderr = combined_output
-
-            # These are NOT errors - page simply has no downloadable forum content
-            stderr_lower = stderr.lower()
-            if 'unsupported url' in stderr_lower:
-                return True, None, ''  # Not a forum site - success, no output
-            if 'no content' in stderr_lower:
-                return True, None, ''  # No forum found - success, no output
-            if 'extractornotfounderror' in stderr_lower:
-                return True, None, ''  # No forum extractor for this URL - success, no output
-            if process.returncode == 0:
-                return True, None, ''  # forum-dl exited cleanly, just no forum - success
-
-            # These ARE errors - something went wrong
-            if '404' in stderr:
-                return False, None, '404 Not Found'
-            if '403' in stderr:
-                return False, None, '403 Forbidden'
-            if 'unable to extract' in stderr_lower:
-                return False, None, 'Unable to extract forum info'
-
-            return False, None, f'forum-dl error: {stderr}'
-
-    except subprocess.TimeoutExpired:
-        return False, None, f'Timed out after {timeout} seconds'
-    except Exception as e:
-        return False, None, f'{type(e).__name__}: {e}'
-
-
-@click.command()
-@click.option('--url', required=True, help='URL to download forum from')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
-def main(url: str, snapshot_id: str):
-    """Download forum content from a URL using forum-dl."""
-
-    output = None
-    status = 'failed'
-    error = ''
-
-    try:
-        # Check if forum-dl is enabled
-        if not get_env_bool('FORUMDL_ENABLED', True):
-            print('Skipping forum-dl (FORUMDL_ENABLED=False)', file=sys.stderr)
-            # Temporary failure (config disabled) - NO JSONL emission
-            sys.exit(0)
-
-        # Get binary from environment
-        binary = get_env('FORUMDL_BINARY', 'forum-dl')
-
-        # Run extraction
-        success, output, error = save_forum(url, binary)
-
-        if success:
-            # Success - emit ArchiveResult
-            result = {
-                'type': 'ArchiveResult',
-                'status': 'succeeded',
-                'output_str': output or ''
-            }
-            print(json.dumps(result))
-            sys.exit(0)
-        else:
-            # Transient error - emit NO JSONL
-            print(f'ERROR: {error}', file=sys.stderr)
-            sys.exit(1)
-
-    except Exception as e:
-        # Transient error - emit NO JSONL
-        print(f'ERROR: {type(e).__name__}: {e}', file=sys.stderr)
-        sys.exit(1)
-
-
-if __name__ == '__main__':
-    main()
--- a/archivebox/plugins/forumdl/templates/card.html
+++ b/archivebox/plugins/forumdl/templates/card.html
@@ -1,7 +0,0 @@
-<!-- Forum thumbnail - shows icon placeholder -->
-<div class="extractor-thumbnail forumdl-thumbnail" style="width: 100%; height: 100px; overflow: hidden; background: #1a1a1a; display: flex; align-items: center; justify-content: center;">
-    <div style="display: flex; flex-direction: column; align-items: center; color: #888; font-size: 12px;">
-        <span style="font-size: 32px;">💬</span>
-        <span>Forum</span>
-    </div>
-</div>
--- a/archivebox/plugins/forumdl/templates/full.html
+++ b/archivebox/plugins/forumdl/templates/full.html
@@ -1,147 +0,0 @@
-<!-- Fullscreen forum view - renders JSONL forum posts -->
-<!DOCTYPE html>
-<html>
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Forum Thread</title>
-    <style>
-        body {
-            margin: 0;
-            padding: 20px;
-            background: #0d1117;
-            color: #c9d1d9;
-            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif;
-            line-height: 1.6;
-        }
-        .header {
-            max-width: 1000px;
-            margin: 0 auto 30px;
-            text-align: center;
-            padding: 20px;
-            border-bottom: 1px solid #30363d;
-        }
-        .icon {
-            font-size: 48px;
-            margin-bottom: 10px;
-        }
-        h1 {
-            margin: 0;
-            font-size: 28px;
-            color: #f0f6fc;
-        }
-        .container {
-            max-width: 1000px;
-            margin: 0 auto;
-        }
-        .post {
-            background: #161b22;
-            border: 1px solid #30363d;
-            border-radius: 6px;
-            margin-bottom: 16px;
-            padding: 16px;
-            transition: border-color 0.2s;
-        }
-        .post:hover {
-            border-color: #58a6ff;
-        }
-        .post-header {
-            display: flex;
-            justify-content: space-between;
-            align-items: center;
-            margin-bottom: 12px;
-            padding-bottom: 12px;
-            border-bottom: 1px solid #21262d;
-        }
-        .post-author {
-            font-weight: 600;
-            color: #58a6ff;
-            font-size: 14px;
-        }
-        .post-date {
-            color: #8b949e;
-            font-size: 12px;
-        }
-        .post-title {
-            margin: 0 0 12px 0;
-            font-size: 18px;
-            font-weight: 600;
-            color: #f0f6fc;
-        }
-        .post-content {
-            color: #c9d1d9;
-            word-wrap: break-word;
-        }
-        .post-content img {
-            max-width: 100%;
-            height: auto;
-            border-radius: 4px;
-        }
-        .post-content a {
-            color: #58a6ff;
-            text-decoration: none;
-        }
-        .post-content a:hover {
-            text-decoration: underline;
-        }
-        .loading {
-            text-align: center;
-            padding: 40px;
-            color: #8b949e;
-        }
-    </style>
-</head>
-<body>
-    <div class="header">
-        <div class="icon">💬</div>
-        <h1>Forum Thread</h1>
-    </div>
-    <div class="container">
-        <div id="forum-posts" class="loading">Loading posts...</div>
-    </div>
-    <script>
-        (async function() {
-            try {
-                const response = await fetch('{{ output_path }}');
-                const text = await response.text();
-                const posts = text.trim().split('\n').filter(line => line).map(line => JSON.parse(line));
-                const container = document.getElementById('forum-posts');
-                container.innerHTML = '';
-                container.className = '';
-
-                posts.forEach(post => {
-                    const postDiv = document.createElement('div');
-                    postDiv.className = 'post';
-
-                    const author = post.author || 'Anonymous';
-                    const date = post.date ? new Date(post.date).toLocaleString() : '';
-                    const title = post.title || '';
-                    const content = post.content || post.body || '';
-
-                    postDiv.innerHTML = `
-                        <div class="post-header">
-                            <span class="post-author">${escapeHtml(author)}</span>
-                            <span class="post-date">${escapeHtml(date)}</span>
-                        </div>
-                        ${title ? `<h2 class="post-title">${escapeHtml(title)}</h2>` : ''}
-                        <div class="post-content">${content}</div>
-                    `;
-                    container.appendChild(postDiv);
-                });
-
-                if (posts.length === 0) {
-                    container.innerHTML = '<div class="loading">No posts found</div>';
-                }
-            } catch(e) {
-                document.getElementById('forum-posts').innerHTML = '<div class="loading">Error loading posts: ' + e.message + '</div>';
-            }
-        })();
-
-        function escapeHtml(text) {
-            const div = document.createElement('div');
-            div.textContent = text;
-            return div.innerHTML;
-        }
-    </script>
-</body>
-</html>
--- a/archivebox/plugins/forumdl/templates/icon.html
+++ b/archivebox/plugins/forumdl/templates/icon.html
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--forumdl" title="Forum"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M4 5h16v10H7l-3 3V5z"/></svg></span>
--- a/archivebox/plugins/forumdl/tests/test_forumdl.py
+++ b/archivebox/plugins/forumdl/tests/test_forumdl.py
@@ -1,317 +0,0 @@
-"""
-Integration tests for forumdl plugin
-
-Tests verify:
-    pass
-1. Hook script exists
-2. Dependencies installed via validation hooks
-3. Verify deps with abx-pkg
-4. Forum extraction works on forum URLs
-5. JSONL output is correct
-6. Config options work
-7. Handles non-forum URLs gracefully
-"""
-
-import json
-import os
-import subprocess
-import sys
-import tempfile
-import time
-import uuid
-from pathlib import Path
-import pytest
-
-PLUGIN_DIR = Path(__file__).parent.parent
-PLUGINS_ROOT = PLUGIN_DIR.parent
-FORUMDL_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_forumdl.*'), None)
-TEST_URL = 'https://example.com'
-
-# Module-level cache for binary path
-_forumdl_binary_path = None
-_forumdl_lib_root = None
-
-def get_forumdl_binary_path():
-    """Get the installed forum-dl binary path from cache or by running installation."""
-    global _forumdl_binary_path
-    if _forumdl_binary_path:
-        return _forumdl_binary_path
-
-    # Try to find forum-dl binary using abx-pkg
-    from abx_pkg import Binary, PipProvider, EnvProvider, BinProviderOverrides
-
-    try:
-        binary = Binary(
-            name='forum-dl',
-            binproviders=[PipProvider(), EnvProvider()]
-        ).load()
-
-        if binary and binary.abspath:
-            _forumdl_binary_path = str(binary.abspath)
-            return _forumdl_binary_path
-    except Exception:
-        pass
-
-    # If not found, try to install via pip using the crawl hook overrides
-    pip_hook = PLUGINS_ROOT / 'pip' / 'on_Binary__11_pip_install.py'
-    crawl_hook = PLUGIN_DIR / 'on_Crawl__25_forumdl_install.py'
-    if pip_hook.exists():
-        binary_id = str(uuid.uuid4())
-        machine_id = str(uuid.uuid4())
-        overrides = None
-
-        if crawl_hook.exists():
-            crawl_result = subprocess.run(
-                [sys.executable, str(crawl_hook)],
-                capture_output=True,
-                text=True,
-                timeout=30,
-            )
-            for crawl_line in crawl_result.stdout.strip().split('\n'):
-                if crawl_line.strip().startswith('{'):
-                    try:
-                        crawl_record = json.loads(crawl_line)
-                        if crawl_record.get('type') == 'Binary' and crawl_record.get('name') == 'forum-dl':
-                            overrides = crawl_record.get('overrides')
-                            break
-                    except json.JSONDecodeError:
-                        continue
-
-        # Create a persistent temp LIB_DIR for the pip provider
-        import platform
-        global _forumdl_lib_root
-        if not _forumdl_lib_root:
-            _forumdl_lib_root = tempfile.mkdtemp(prefix='forumdl-lib-')
-        machine = platform.machine().lower()
-        system = platform.system().lower()
-        if machine in ('arm64', 'aarch64'):
-            machine = 'arm64'
-        elif machine in ('x86_64', 'amd64'):
-            machine = 'x86_64'
-        machine_type = f"{machine}-{system}"
-        lib_dir = Path(_forumdl_lib_root) / 'lib' / machine_type
-        lib_dir.mkdir(parents=True, exist_ok=True)
-        env = os.environ.copy()
-        env['LIB_DIR'] = str(lib_dir)
-        env['DATA_DIR'] = str(Path(_forumdl_lib_root) / 'data')
-
-        cmd = [
-            sys.executable, str(pip_hook),
-            '--binary-id', binary_id,
-            '--machine-id', machine_id,
-            '--name', 'forum-dl'
-        ]
-        if overrides:
-            cmd.append(f'--overrides={json.dumps(overrides)}')
-
-        install_result = subprocess.run(
-            cmd,
-            capture_output=True,
-            text=True,
-            timeout=300,
-            env=env,
-        )
-
-        # Parse Binary from pip installation
-        for install_line in install_result.stdout.strip().split('\n'):
-            if install_line.strip():
-                try:
-                    install_record = json.loads(install_line)
-                    if install_record.get('type') == 'Binary' and install_record.get('name') == 'forum-dl':
-                        _forumdl_binary_path = install_record.get('abspath')
-                        return _forumdl_binary_path
-                except json.JSONDecodeError:
-                    pass
-
-    return None
-
-
-def test_hook_script_exists():
-    """Verify on_Snapshot hook exists."""
-    assert FORUMDL_HOOK.exists(), f"Hook not found: {FORUMDL_HOOK}"
-
-
-def test_verify_deps_with_abx_pkg():
-    """Verify forum-dl is installed by calling the REAL installation hooks."""
-    binary_path = get_forumdl_binary_path()
-    if not binary_path:
-        assert False, (
-            "forum-dl installation failed. Install hook should install forum-dl automatically. "
-            "Note: forum-dl has a dependency on cchardet which may not compile on Python 3.14+ "
-            "due to removed longintrepr.h header."
-        )
-    assert Path(binary_path).is_file(), f"Binary path must be a valid file: {binary_path}"
-
-
-def test_handles_non_forum_url():
-    """Test that forum-dl extractor handles non-forum URLs gracefully via hook."""
-    import os
-
-    binary_path = get_forumdl_binary_path()
-    if not binary_path:
-        pass
-    assert Path(binary_path).is_file(), f"Binary must be a valid file: {binary_path}"
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        env = os.environ.copy()
-        env['FORUMDL_BINARY'] = binary_path
-
-        # Run forum-dl extraction hook on non-forum URL
-        result = subprocess.run(
-            [sys.executable, str(FORUMDL_HOOK), '--url', 'https://example.com', '--snapshot-id', 'test789'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=60
-        )
-
-        # Should exit 0 even for non-forum URL (graceful handling)
-        assert result.returncode == 0, f"Should handle non-forum URL gracefully: {result.stderr}"
-
-        # Parse clean JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                pass
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed even for non-forum URL: {result_json}"
-
-
-def test_config_save_forumdl_false_skips():
-    """Test that FORUMDL_ENABLED=False exits without emitting JSONL."""
-    import os
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        env = os.environ.copy()
-        env['FORUMDL_ENABLED'] = 'False'
-
-        result = subprocess.run(
-            [sys.executable, str(FORUMDL_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=30
-        )
-
-        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
-
-        # Feature disabled - temporary failure, should NOT emit JSONL
-        assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
-
-        # Should NOT emit any JSONL
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
-
-
-def test_config_timeout():
-    """Test that FORUMDL_TIMEOUT config is respected."""
-    import os
-
-    binary_path = get_forumdl_binary_path()
-    if not binary_path:
-        pass
-    assert Path(binary_path).is_file(), f"Binary must be a valid file: {binary_path}"
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        env = os.environ.copy()
-        env['FORUMDL_BINARY'] = binary_path
-        env['FORUMDL_TIMEOUT'] = '5'
-
-        start_time = time.time()
-        result = subprocess.run(
-            [sys.executable, str(FORUMDL_HOOK), '--url', 'https://example.com', '--snapshot-id', 'testtimeout'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=10  # Should complete in 5s, use 10s as safety margin
-        )
-        elapsed_time = time.time() - start_time
-
-        assert result.returncode == 0, f"Should complete without hanging: {result.stderr}"
-        # Allow 1 second overhead for subprocess startup and Python interpreter
-        assert elapsed_time <= 6.0, f"Should complete within 6 seconds (5s timeout + 1s overhead), took {elapsed_time:.2f}s"
-
-
-def test_real_forum_url():
-    """Test that forum-dl extracts content from a real HackerNews thread with jsonl output.
-
-    Uses our Pydantic v2 compatible wrapper to fix forum-dl 0.3.0's incompatibility.
-    """
-    import os
-
-    binary_path = get_forumdl_binary_path()
-    assert binary_path, "forum-dl binary not available"
-    assert Path(binary_path).is_file(), f"Binary must be a valid file: {binary_path}"
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Use HackerNews - one of the most reliable forum-dl extractors
-        forum_url = 'https://news.ycombinator.com/item?id=1'
-
-        env = os.environ.copy()
-        env['FORUMDL_BINARY'] = binary_path
-        env['FORUMDL_TIMEOUT'] = '60'
-        env['FORUMDL_OUTPUT_FORMAT'] = 'jsonl'  # Use jsonl format
-        # HTML output could be added via: env['FORUMDL_ARGS_EXTRA'] = json.dumps(['--files-output', './files'])
-
-        start_time = time.time()
-        result = subprocess.run(
-            [sys.executable, str(FORUMDL_HOOK), '--url', forum_url, '--snapshot-id', 'testforum'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=90
-        )
-        elapsed_time = time.time() - start_time
-
-        # Should succeed with our Pydantic v2 wrapper
-        assert result.returncode == 0, f"Should extract forum successfully: {result.stderr}"
-
-        # Parse JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, f"Should have ArchiveResult JSONL output. stdout: {result.stdout}"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-        # Check that forum files were downloaded
-        output_files = list(tmpdir.glob('**/*'))
-        forum_files = [f for f in output_files if f.is_file()]
-
-        assert len(forum_files) > 0, f"Should have downloaded at least one forum file. Files: {output_files}"
-
-        # Verify the JSONL file has content
-        jsonl_file = tmpdir / 'forum.jsonl'
-        assert jsonl_file.exists(), "Should have created forum.jsonl"
-        assert jsonl_file.stat().st_size > 0, "forum.jsonl should not be empty"
-
-        print(f"Successfully extracted {len(forum_files)} file(s) in {elapsed_time:.2f}s")
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
--- a/archivebox/plugins/gallerydl/config.json
+++ b/archivebox/plugins/gallerydl/config.json
@@ -1,54 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "GALLERYDL_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_GALLERYDL", "USE_GALLERYDL"],
-      "description": "Enable gallery downloading with gallery-dl"
-    },
-    "GALLERYDL_BINARY": {
-      "type": "string",
-      "default": "gallery-dl",
-      "description": "Path to gallery-dl binary"
-    },
-    "GALLERYDL_TIMEOUT": {
-      "type": "integer",
-      "default": 3600,
-      "minimum": 30,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for gallery downloads in seconds"
-    },
-    "GALLERYDL_COOKIES_FILE": {
-      "type": "string",
-      "default": "",
-      "x-fallback": "COOKIES_FILE",
-      "description": "Path to cookies file"
-    },
-    "GALLERYDL_CHECK_SSL_VALIDITY": {
-      "type": "boolean",
-      "default": true,
-      "x-fallback": "CHECK_SSL_VALIDITY",
-      "description": "Whether to verify SSL certificates"
-    },
-    "GALLERYDL_ARGS": {
-      "type": "array",
-      "items": {"type": "string"},
-      "default": [
-        "--write-metadata",
-        "--write-info-json"
-      ],
-      "x-aliases": ["GALLERYDL_DEFAULT_ARGS"],
-      "description": "Default gallery-dl arguments"
-    },
-    "GALLERYDL_ARGS_EXTRA": {
-      "type": "array",
-      "items": {"type": "string"},
-      "default": [],
-      "x-aliases": ["GALLERYDL_EXTRA_ARGS"],
-      "description": "Extra arguments to append to gallery-dl command"
-    }
-  }
-}
--- a/archivebox/plugins/gallerydl/on_Crawl__20_gallerydl_install.py
+++ b/archivebox/plugins/gallerydl/on_Crawl__20_gallerydl_install.py
@@ -1,48 +0,0 @@
-#!/usr/bin/env python3
-"""
-Emit gallery-dl Binary dependency for the crawl.
-"""
-
-import json
-import os
-import sys
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def output_binary(name: str, binproviders: str):
-    """Output Binary JSONL record for a dependency."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,
-        'machine_id': machine_id,
-    }
-    print(json.dumps(record))
-
-
-def main():
-    gallerydl_enabled = get_env_bool('GALLERYDL_ENABLED', True)
-
-    if not gallerydl_enabled:
-        sys.exit(0)
-
-    output_binary(name='gallery-dl', binproviders='pip,brew,apt,env')
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
--- a/archivebox/plugins/gallerydl/on_Snapshot__03_gallerydl.bg.py
+++ b/archivebox/plugins/gallerydl/on_Snapshot__03_gallerydl.bg.py
@@ -1,261 +0,0 @@
-#!/usr/bin/env python3
-"""
-Download image galleries from a URL using gallery-dl.
-
-Usage: on_Snapshot__03_gallerydl.bg.py --url=<url> --snapshot-id=<uuid>
-Output: Downloads gallery images to $PWD/gallerydl/
-
-Environment variables:
-    GALLERYDL_ENABLED: Enable gallery-dl gallery extraction (default: True)
-    GALLERYDL_BINARY: Path to gallery-dl binary (default: gallery-dl)
-    GALLERYDL_TIMEOUT: Timeout in seconds (x-fallback: TIMEOUT)
-    GALLERYDL_COOKIES_FILE: Path to cookies file (x-fallback: COOKIES_FILE)
-    GALLERYDL_CHECK_SSL_VALIDITY: Whether to verify SSL certs (x-fallback: CHECK_SSL_VALIDITY)
-    GALLERYDL_ARGS: Default gallery-dl arguments (JSON array)
-    GALLERYDL_ARGS_EXTRA: Extra arguments to append (JSON array)
-"""
-
-import json
-import os
-import subprocess
-import sys
-import threading
-from pathlib import Path
-
-import rich_click as click
-
-
-# Extractor metadata
-PLUGIN_NAME = 'gallerydl'
-BIN_NAME = 'gallery-dl'
-BIN_PROVIDERS = 'pip,env'
-OUTPUT_DIR = '.'
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def get_env_int(name: str, default: int = 0) -> int:
-    try:
-        return int(get_env(name, str(default)))
-    except ValueError:
-        return default
-
-
-def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
-    """Parse a JSON array from environment variable."""
-    val = get_env(name, '')
-    if not val:
-        return default if default is not None else []
-    try:
-        result = json.loads(val)
-        if isinstance(result, list):
-            return [str(item) for item in result]
-        return default if default is not None else []
-    except json.JSONDecodeError:
-        return default if default is not None else []
-
-
-STATICFILE_DIR = '../staticfile'
-
-def has_staticfile_output() -> bool:
-    """Check if staticfile extractor already downloaded this URL."""
-    staticfile_dir = Path(STATICFILE_DIR)
-    if not staticfile_dir.exists():
-        return False
-    stdout_log = staticfile_dir / 'stdout.log'
-    if not stdout_log.exists():
-        return False
-    for line in stdout_log.read_text(errors='ignore').splitlines():
-        line = line.strip()
-        if not line.startswith('{'):
-            continue
-        try:
-            record = json.loads(line)
-        except json.JSONDecodeError:
-            continue
-        if record.get('type') == 'ArchiveResult' and record.get('status') == 'succeeded':
-            return True
-    return False
-
-
-def save_gallery(url: str, binary: str) -> tuple[bool, str | None, str]:
-    """
-    Download gallery using gallery-dl.
-
-    Returns: (success, output_path, error_message)
-    """
-    # Get config from env (with GALLERYDL_ prefix, x-fallback handled by config loader)
-    timeout = get_env_int('GALLERYDL_TIMEOUT') or get_env_int('TIMEOUT', 3600)
-    check_ssl = get_env_bool('GALLERYDL_CHECK_SSL_VALIDITY', True) if get_env('GALLERYDL_CHECK_SSL_VALIDITY') else get_env_bool('CHECK_SSL_VALIDITY', True)
-    gallerydl_args = get_env_array('GALLERYDL_ARGS', [])
-    gallerydl_args_extra = get_env_array('GALLERYDL_ARGS_EXTRA', [])
-    cookies_file = get_env('GALLERYDL_COOKIES_FILE') or get_env('COOKIES_FILE', '')
-
-    # Output directory is current directory (hook already runs in output dir)
-    output_dir = Path(OUTPUT_DIR)
-
-    # Build command
-    # Use -D for exact directory (flat structure) instead of -d (nested structure)
-    cmd = [
-        binary,
-        *gallerydl_args,
-        '-D', str(output_dir),
-    ]
-
-    if not check_ssl:
-        cmd.append('--no-check-certificate')
-
-    if cookies_file and Path(cookies_file).exists():
-        cmd.extend(['-C', cookies_file])
-
-    if gallerydl_args_extra:
-        cmd.extend(gallerydl_args_extra)
-
-    cmd.append(url)
-
-    try:
-        print(f'[gallerydl] Starting download (timeout={timeout}s)', file=sys.stderr)
-        output_lines: list[str] = []
-        process = subprocess.Popen(
-            cmd,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            text=True,
-            bufsize=1,
-        )
-
-        def _read_output() -> None:
-            if not process.stdout:
-                return
-            for line in process.stdout:
-                output_lines.append(line)
-                sys.stderr.write(line)
-
-        reader = threading.Thread(target=_read_output, daemon=True)
-        reader.start()
-
-        try:
-            process.wait(timeout=timeout)
-        except subprocess.TimeoutExpired:
-            process.kill()
-            reader.join(timeout=1)
-            return False, None, f'Timed out after {timeout} seconds'
-
-        reader.join(timeout=1)
-        combined_output = ''.join(output_lines)
-
-        # Check if any gallery files were downloaded (search recursively)
-        gallery_extensions = (
-            '.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.svg',
-            '.mp4', '.webm', '.mkv', '.avi', '.mov', '.flv',
-            '.json', '.txt', '.zip',
-        )
-
-        downloaded_files = [
-            f for f in output_dir.rglob('*')
-            if f.is_file() and f.suffix.lower() in gallery_extensions
-        ]
-
-        if downloaded_files:
-            # Return first image file, or first file if no images
-            image_files = [
-                f for f in downloaded_files
-                if f.suffix.lower() in ('.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp')
-            ]
-            output = str(image_files[0]) if image_files else str(downloaded_files[0])
-            return True, output, ''
-        else:
-            stderr = combined_output
-
-            # These are NOT errors - page simply has no downloadable gallery
-            # Return success with no output (legitimate "nothing to download")
-            stderr_lower = stderr.lower()
-            if 'unsupported url' in stderr_lower:
-                return True, None, ''  # Not a gallery site - success, no output
-            if 'no results' in stderr_lower:
-                return True, None, ''  # No gallery found - success, no output
-            if process.returncode == 0:
-                return True, None, ''  # gallery-dl exited cleanly, just no gallery - success
-
-            # These ARE errors - something went wrong
-            if '404' in stderr:
-                return False, None, '404 Not Found'
-            if '403' in stderr:
-                return False, None, '403 Forbidden'
-            if 'unable to extract' in stderr_lower:
-                return False, None, 'Unable to extract gallery info'
-
-            return False, None, f'gallery-dl error: {stderr}'
-
-    except subprocess.TimeoutExpired:
-        return False, None, f'Timed out after {timeout} seconds'
-    except Exception as e:
-        return False, None, f'{type(e).__name__}: {e}'
-
-
-@click.command()
-@click.option('--url', required=True, help='URL to download gallery from')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
-def main(url: str, snapshot_id: str):
-    """Download image gallery from a URL using gallery-dl."""
-
-    output = None
-    status = 'failed'
-    error = ''
-
-    try:
-        # Check if gallery-dl is enabled
-        if not get_env_bool('GALLERYDL_ENABLED', True):
-            print('Skipping gallery-dl (GALLERYDL_ENABLED=False)', file=sys.stderr)
-            # Temporary failure (config disabled) - NO JSONL emission
-            sys.exit(0)
-
-        # Check if staticfile extractor already handled this (permanent skip)
-        if has_staticfile_output():
-            print(f'Skipping gallery-dl - staticfile extractor already downloaded this', file=sys.stderr)
-            print(json.dumps({
-                'type': 'ArchiveResult',
-                'status': 'skipped',
-                'output_str': 'staticfile already handled',
-            }))
-            sys.exit(0)
-
-        # Get binary from environment
-        binary = get_env('GALLERYDL_BINARY', 'gallery-dl')
-
-        # Run extraction
-        success, output, error = save_gallery(url, binary)
-
-        if success:
-            # Success - emit ArchiveResult
-            result = {
-                'type': 'ArchiveResult',
-                'status': 'succeeded',
-                'output_str': output or ''
-            }
-            print(json.dumps(result))
-            sys.exit(0)
-        else:
-            # Transient error - emit NO JSONL
-            print(f'ERROR: {error}', file=sys.stderr)
-            sys.exit(1)
-
-    except Exception as e:
-        # Transient error - emit NO JSONL
-        print(f'ERROR: {type(e).__name__}: {e}', file=sys.stderr)
-        sys.exit(1)
-
-
-if __name__ == '__main__':
-    main()
--- a/archivebox/plugins/gallerydl/templates/card.html
+++ b/archivebox/plugins/gallerydl/templates/card.html
@@ -1,11 +0,0 @@
-<!-- Gallery thumbnail - shows first image or placeholder -->
-<div class="extractor-thumbnail gallerydl-thumbnail" style="width: 100%; height: 100px; overflow: hidden; background: #1a1a1a; display: flex; align-items: center; justify-content: center;">
-    <img src="{{ output_path }}"
-         style="width: 100%; height: 100px; object-fit: contain;"
-         alt="Gallery thumbnail"
-         onerror="this.style.display='none'; this.nextElementSibling.style.display='flex';">
-    <div style="display: none; flex-direction: column; align-items: center; color: #888; font-size: 12px;">
-        <span style="font-size: 32px;">🖼️</span>
-        <span>Gallery</span>
-    </div>
-</div>
--- a/archivebox/plugins/gallerydl/templates/full.html
+++ b/archivebox/plugins/gallerydl/templates/full.html
@@ -1,28 +0,0 @@
-<!-- Fullscreen gallery view - shows image in full size -->
-<!DOCTYPE html>
-<html>
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Gallery</title>
-    <style>
-        body {
-            margin: 0;
-            padding: 0;
-            background: #000;
-            display: flex;
-            align-items: center;
-            justify-content: center;
-            min-height: 100vh;
-        }
-        img {
-            max-width: 100%;
-            max-height: 100vh;
-            object-fit: contain;
-        }
-    </style>
-</head>
-<body>
-    <img src="{{ output_path }}" alt="Gallery image">
-</body>
-</html>
--- a/archivebox/plugins/gallerydl/templates/icon.html
+++ b/archivebox/plugins/gallerydl/templates/icon.html
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--gallerydl" title="Gallery"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><rect x="3" y="5" width="18" height="14" rx="2"/><circle cx="8" cy="10" r="1.5" fill="currentColor" stroke="none"/><path d="M21 17l-5-5-5 5"/></svg></span>
--- a/archivebox/plugins/gallerydl/tests/test_gallerydl.py
+++ b/archivebox/plugins/gallerydl/tests/test_gallerydl.py
@@ -1,190 +0,0 @@
-"""
-Integration tests for gallerydl plugin
-
-Tests verify:
-    pass
-1. Hook script exists
-2. Dependencies installed via validation hooks
-3. Verify deps with abx-pkg
-4. Gallery extraction works on gallery URLs
-5. JSONL output is correct
-6. Config options work
-7. Handles non-gallery URLs gracefully
-"""
-
-import json
-import subprocess
-import sys
-import tempfile
-import time
-from pathlib import Path
-import pytest
-
-PLUGIN_DIR = Path(__file__).parent.parent
-PLUGINS_ROOT = PLUGIN_DIR.parent
-GALLERYDL_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_gallerydl.*'), None)
-TEST_URL = 'https://example.com'
-
-def test_hook_script_exists():
-    """Verify on_Snapshot hook exists."""
-    assert GALLERYDL_HOOK.exists(), f"Hook not found: {GALLERYDL_HOOK}"
-
-
-def test_verify_deps_with_abx_pkg():
-    """Verify gallery-dl is available via abx-pkg."""
-    from abx_pkg import Binary, PipProvider, EnvProvider, BinProviderOverrides
-
-    missing_binaries = []
-
-    # Verify gallery-dl is available
-    gallerydl_binary = Binary(name='gallery-dl', binproviders=[PipProvider(), EnvProvider()])
-    gallerydl_loaded = gallerydl_binary.load()
-    if not (gallerydl_loaded and gallerydl_loaded.abspath):
-        missing_binaries.append('gallery-dl')
-
-    if missing_binaries:
-        pass
-
-
-def test_handles_non_gallery_url():
-    """Test that gallery-dl extractor handles non-gallery URLs gracefully via hook."""
-    # Prerequisites checked by earlier test
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Run gallery-dl extraction hook on non-gallery URL
-        result = subprocess.run(
-            [sys.executable, str(GALLERYDL_HOOK), '--url', 'https://example.com', '--snapshot-id', 'test789'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            timeout=60
-        )
-
-        # Should exit 0 even for non-gallery URL
-        assert result.returncode == 0, f"Should handle non-gallery URL gracefully: {result.stderr}"
-
-        # Parse clean JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                pass
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-
-def test_config_save_gallery_dl_false_skips():
-    """Test that GALLERYDL_ENABLED=False exits without emitting JSONL."""
-    import os
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        env = os.environ.copy()
-        env['GALLERYDL_ENABLED'] = 'False'
-
-        result = subprocess.run(
-            [sys.executable, str(GALLERYDL_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=30
-        )
-
-        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
-
-        # Feature disabled - temporary failure, should NOT emit JSONL
-        assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
-
-        # Should NOT emit any JSONL
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
-
-
-def test_config_timeout():
-    """Test that GALLERY_DL_TIMEOUT config is respected."""
-    import os
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        env = os.environ.copy()
-        env['GALLERY_DL_TIMEOUT'] = '5'
-
-        start_time = time.time()
-        result = subprocess.run(
-            [sys.executable, str(GALLERYDL_HOOK), '--url', 'https://example.com', '--snapshot-id', 'testtimeout'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=10  # Should complete in 5s, use 10s as safety margin
-        )
-        elapsed_time = time.time() - start_time
-
-        assert result.returncode == 0, f"Should complete without hanging: {result.stderr}"
-        # Allow 1 second overhead for subprocess startup and Python interpreter
-        assert elapsed_time <= 6.0, f"Should complete within 6 seconds (5s timeout + 1s overhead), took {elapsed_time:.2f}s"
-
-
-def test_real_gallery_url():
-    """Test that gallery-dl can extract images from a real Flickr gallery URL."""
-    import os
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Use a real Flickr photo page
-        gallery_url = 'https://www.flickr.com/photos/gregorydolivet/55002388567/in/explore-2025-12-25/'
-
-        env = os.environ.copy()
-        env['GALLERY_DL_TIMEOUT'] = '60'  # Give it time to download
-
-        start_time = time.time()
-        result = subprocess.run(
-            [sys.executable, str(GALLERYDL_HOOK), '--url', gallery_url, '--snapshot-id', 'testflickr'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=90
-        )
-        elapsed_time = time.time() - start_time
-
-        # Should succeed
-        assert result.returncode == 0, f"Should extract gallery successfully: {result.stderr}"
-
-        # Parse JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, f"Should have ArchiveResult JSONL output. stdout: {result.stdout}"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-        # Check that some files were downloaded
-        output_files = list(tmpdir.glob('**/*'))
-        image_files = [f for f in output_files if f.is_file() and f.suffix.lower() in ('.jpg', '.jpeg', '.png', '.gif', '.webp')]
-
-        assert len(image_files) > 0, f"Should have downloaded at least one image. Files: {output_files}"
-
-        print(f"Successfully extracted {len(image_files)} image(s) in {elapsed_time:.2f}s")
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
--- a/archivebox/plugins/git/config.json
+++ b/archivebox/plugins/git/config.json
@@ -1,44 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "GIT_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_GIT", "USE_GIT"],
-      "description": "Enable git repository cloning"
-    },
-    "GIT_BINARY": {
-      "type": "string",
-      "default": "git",
-      "description": "Path to git binary"
-    },
-    "GIT_TIMEOUT": {
-      "type": "integer",
-      "default": 120,
-      "minimum": 10,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for git operations in seconds"
-    },
-    "GIT_DOMAINS": {
-      "type": "string",
-      "default": "github.com,gitlab.com,bitbucket.org,gist.github.com,codeberg.org,gitea.com,git.sr.ht",
-      "description": "Comma-separated list of domains to treat as git repositories"
-    },
-    "GIT_ARGS": {
-      "type": "array",
-      "items": {"type": "string"},
-      "default": ["clone", "--depth=1", "--recursive"],
-      "x-aliases": ["GIT_DEFAULT_ARGS"],
-      "description": "Default git arguments"
-    },
-    "GIT_ARGS_EXTRA": {
-      "type": "array",
-      "items": {"type": "string"},
-      "default": [],
-      "x-aliases": ["GIT_EXTRA_ARGS"],
-      "description": "Extra arguments to append to git command"
-    }
-  }
-}
--- a/archivebox/plugins/git/on_Crawl__05_git_install.py
+++ b/archivebox/plugins/git/on_Crawl__05_git_install.py
@@ -1,48 +0,0 @@
-#!/usr/bin/env python3
-"""
-Emit git Binary dependency for the crawl.
-"""
-
-import json
-import os
-import sys
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def output_binary(name: str, binproviders: str):
-    """Output Binary JSONL record for a dependency."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,
-        'machine_id': machine_id,
-    }
-    print(json.dumps(record))
-
-
-def main():
-    git_enabled = get_env_bool('GIT_ENABLED', True)
-
-    if not git_enabled:
-        sys.exit(0)
-
-    output_binary(name='git', binproviders='apt,brew,env')
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
--- a/archivebox/plugins/git/on_Snapshot__05_git.bg.py
+++ b/archivebox/plugins/git/on_Snapshot__05_git.bg.py
@@ -1,145 +0,0 @@
-#!/usr/bin/env python3
-"""
-Clone a git repository from a URL.
-
-Usage: on_Snapshot__05_git.bg.py --url=<url> --snapshot-id=<uuid>
-Output: Clones repository to $PWD/repo
-
-Environment variables:
-    GIT_BINARY: Path to git binary
-    GIT_TIMEOUT: Timeout in seconds (default: 120)
-    GIT_ARGS: Default git arguments (JSON array, default: ["clone", "--depth=1", "--recursive"])
-    GIT_ARGS_EXTRA: Extra arguments to append (JSON array, default: [])
-
-    # Fallback to ARCHIVING_CONFIG values if GIT_* not set:
-    TIMEOUT: Fallback timeout
-"""
-
-import json
-import os
-import subprocess
-import sys
-from pathlib import Path
-
-import rich_click as click
-
-
-# Extractor metadata
-PLUGIN_NAME = 'git'
-BIN_NAME = 'git'
-BIN_PROVIDERS = 'apt,brew,env'
-OUTPUT_DIR = '.'
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-
-def get_env_int(name: str, default: int = 0) -> int:
-    try:
-        return int(get_env(name, str(default)))
-    except ValueError:
-        return default
-
-
-def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
-    """Parse a JSON array from environment variable."""
-    val = get_env(name, '')
-    if not val:
-        return default if default is not None else []
-    try:
-        result = json.loads(val)
-        if isinstance(result, list):
-            return [str(item) for item in result]
-        return default if default is not None else []
-    except json.JSONDecodeError:
-        return default if default is not None else []
-
-
-def is_git_url(url: str) -> bool:
-    """Check if URL looks like a git repository."""
-    git_patterns = [
-        '.git',
-        'github.com',
-        'gitlab.com',
-        'bitbucket.org',
-        'git://',
-        'ssh://git@',
-    ]
-    return any(p in url.lower() for p in git_patterns)
-
-
-def clone_git(url: str, binary: str) -> tuple[bool, str | None, str]:
-    """
-    Clone git repository.
-
-    Returns: (success, output_path, error_message)
-    """
-    timeout = get_env_int('GIT_TIMEOUT') or get_env_int('TIMEOUT', 120)
-    git_args = get_env_array('GIT_ARGS', ["clone", "--depth=1", "--recursive"])
-    git_args_extra = get_env_array('GIT_ARGS_EXTRA', [])
-
-    cmd = [binary, *git_args, *git_args_extra, url, OUTPUT_DIR]
-
-    try:
-        result = subprocess.run(cmd, timeout=timeout)
-
-        if result.returncode == 0 and Path(OUTPUT_DIR).is_dir():
-            return True, OUTPUT_DIR, ''
-        else:
-            return False, None, f'git clone failed (exit={result.returncode})'
-
-    except subprocess.TimeoutExpired:
-        return False, None, f'Timed out after {timeout} seconds'
-    except Exception as e:
-        return False, None, f'{type(e).__name__}: {e}'
-
-
-@click.command()
-@click.option('--url', required=True, help='Git repository URL')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
-def main(url: str, snapshot_id: str):
-    """Clone a git repository from a URL."""
-
-    output = None
-    status = 'failed'
-    error = ''
-
-    try:
-        # Check if URL looks like a git repo
-        if not is_git_url(url):
-            print(f'Skipping git clone for non-git URL: {url}', file=sys.stderr)
-            print(json.dumps({
-                'type': 'ArchiveResult',
-                'status': 'skipped',
-                'output_str': 'Not a git URL',
-            }))
-            sys.exit(0)
-
-        # Get binary from environment
-        binary = get_env('GIT_BINARY', 'git')
-
-        # Run extraction
-        success, output, error = clone_git(url, binary)
-        status = 'succeeded' if success else 'failed'
-
-    except Exception as e:
-        error = f'{type(e).__name__}: {e}'
-        status = 'failed'
-
-    if error:
-        print(f'ERROR: {error}', file=sys.stderr)
-
-    # Output clean JSONL (no RESULT_JSON= prefix)
-    result = {
-        'type': 'ArchiveResult',
-        'status': status,
-        'output_str': output or error or '',
-    }
-    print(json.dumps(result))
-
-    sys.exit(0 if status == 'succeeded' else 1)
-
-
-if __name__ == '__main__':
-    main()
--- a/archivebox/plugins/git/templates/card.html
+++ b/archivebox/plugins/git/templates/card.html
@@ -1,5 +0,0 @@
-<!-- Git thumbnail - shows git repository icon and info -->
-<div class="extractor-thumbnail git-thumbnail" style="width: 100%; height: 100px; overflow: hidden; background: #f6f8fa; display: flex; flex-direction: column; align-items: center; justify-content: center; padding: 10px;">
-    <span style="font-size: 32px;">📂</span>
-    <span style="font-size: 11px; color: #586069; margin-top: 4px;">Git Repository</span>
-</div>
--- a/archivebox/plugins/git/templates/icon.html
+++ b/archivebox/plugins/git/templates/icon.html
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--git" title="Git"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><circle cx="6" cy="6" r="2"/><circle cx="6" cy="18" r="2"/><circle cx="18" cy="12" r="2"/><path d="M8 6h5a3 3 0 0 1 3 3v1"/><path d="M8 18h5a3 3 0 0 0 3-3v-1"/></svg></span>
--- a/archivebox/plugins/git/tests/test_git.py
+++ b/archivebox/plugins/git/tests/test_git.py
@@ -1,130 +0,0 @@
-"""
-Integration tests for git plugin
-
-Tests verify:
-    pass
-1. Validate hook checks for git binary
-2. Verify deps with abx-pkg
-3. Standalone git extractor execution
-"""
-
-import json
-import shutil
-import subprocess
-import sys
-import tempfile
-import time
-from pathlib import Path
-import pytest
-
-PLUGIN_DIR = Path(__file__).parent.parent
-GIT_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_git.*'), None)
-TEST_URL = 'https://github.com/ArchiveBox/abx-pkg.git'
-
-def test_hook_script_exists():
-    assert GIT_HOOK.exists()
-
-def test_verify_deps_with_abx_pkg():
-    """Verify git is available via abx-pkg."""
-    from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides
-
-    git_binary = Binary(name='git', binproviders=[AptProvider(), BrewProvider(), EnvProvider()])
-    git_loaded = git_binary.load()
-
-    assert git_loaded and git_loaded.abspath, "git is required for git plugin tests"
-
-def test_reports_missing_git():
-    with tempfile.TemporaryDirectory() as tmpdir:
-        env = {'PATH': '/nonexistent'}
-        result = subprocess.run(
-            [sys.executable, str(GIT_HOOK), '--url', TEST_URL, '--snapshot-id', 'test123'],
-            cwd=tmpdir, capture_output=True, text=True, env=env
-        )
-        if result.returncode != 0:
-            combined = result.stdout + result.stderr
-            assert 'DEPENDENCY_NEEDED' in combined or 'git' in combined.lower() or 'ERROR=' in combined
-
-def test_handles_non_git_url():
-    assert shutil.which('git'), "git binary not available"
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        result = subprocess.run(
-            [sys.executable, str(GIT_HOOK), '--url', 'https://example.com', '--snapshot-id', 'test789'],
-            cwd=tmpdir, capture_output=True, text=True, timeout=30
-        )
-        # Should fail or skip for non-git URL
-        assert result.returncode in (0, 1)
-
-        # Parse clean JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                pass
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        if result_json:
-            # Should report failure or skip for non-git URL
-            assert result_json['status'] in ['failed', 'skipped'], f"Should fail or skip: {result_json}"
-
-
-def test_real_git_repo():
-    """Test that git can clone a real GitHub repository."""
-    import os
-
-    assert shutil.which('git'), "git binary not available"
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Use a real but small GitHub repository
-        git_url = 'https://github.com/ArchiveBox/abx-pkg'
-
-        env = os.environ.copy()
-        env['GIT_TIMEOUT'] = '120'  # Give it time to clone
-
-        start_time = time.time()
-        result = subprocess.run(
-            [sys.executable, str(GIT_HOOK), '--url', git_url, '--snapshot-id', 'testgit'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=180
-        )
-        elapsed_time = time.time() - start_time
-
-        # Should succeed
-        assert result.returncode == 0, f"Should clone repository successfully: {result.stderr}"
-
-        # Parse JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, f"Should have ArchiveResult JSONL output. stdout: {result.stdout}"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-        # Check that the git repo was cloned
-        git_dirs = list(tmpdir.glob('**/.git'))
-        assert len(git_dirs) > 0, f"Should have cloned a git repository. Contents: {list(tmpdir.rglob('*'))}"
-
-        print(f"Successfully cloned repository in {elapsed_time:.2f}s")
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
--- a/archivebox/plugins/hashes/config.json
+++ b/archivebox/plugins/hashes/config.json
@@ -1,20 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "HASHES_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_HASHES", "USE_HASHES"],
-      "description": "Enable merkle tree hash generation"
-    },
-    "HASHES_TIMEOUT": {
-      "type": "integer",
-      "default": 30,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for merkle tree generation in seconds"
-    }
-  }
-}
--- a/archivebox/plugins/hashes/on_Snapshot__93_hashes.py
+++ b/archivebox/plugins/hashes/on_Snapshot__93_hashes.py
@@ -1,185 +0,0 @@
-#!/usr/bin/env python3
-"""
-Create a hashed Merkle tree of all archived outputs.
-
-This plugin runs after all extractors complete (priority 93) and generates
-a cryptographic Merkle hash tree of all files in the snapshot directory.
-
-Output: hashes.json containing root_hash, tree structure, file list, metadata
-
-Usage: on_Snapshot__93_hashes.py --url=<url> --snapshot-id=<uuid>
-
-Environment variables:
-    SAVE_HASHES: Enable hash merkle tree generation (default: true)
-    DATA_DIR: ArchiveBox data directory
-    ARCHIVE_DIR: Archive output directory
-"""
-
-import os
-import sys
-import json
-import hashlib
-from pathlib import Path
-from datetime import datetime, timezone
-from typing import Dict, List, Optional, Tuple, Any
-
-import click
-
-
-def sha256_file(filepath: Path) -> str:
-    """Compute SHA256 hash of a file."""
-    h = hashlib.sha256()
-    try:
-        with open(filepath, 'rb') as f:
-            while chunk := f.read(65536):
-                h.update(chunk)
-        return h.hexdigest()
-    except (OSError, PermissionError):
-        return '0' * 64
-
-
-def sha256_data(data: bytes) -> str:
-    """Compute SHA256 hash of raw data."""
-    return hashlib.sha256(data).hexdigest()
-
-
-def collect_files(snapshot_dir: Path, exclude_dirs: Optional[List[str]] = None) -> List[Tuple[Path, str, int]]:
-    """Recursively collect all files in snapshot directory."""
-    exclude_dirs = exclude_dirs or ['hashes', '.git', '__pycache__']
-    files = []
-
-    for root, dirs, filenames in os.walk(snapshot_dir):
-        dirs[:] = [d for d in dirs if d not in exclude_dirs]
-
-        for filename in filenames:
-            filepath = Path(root) / filename
-            rel_path = filepath.relative_to(snapshot_dir)
-
-            if filepath.is_symlink():
-                continue
-
-            file_hash = sha256_file(filepath)
-            file_size = filepath.stat().st_size if filepath.exists() else 0
-            files.append((rel_path, file_hash, file_size))
-
-    files.sort(key=lambda x: str(x[0]))
-    return files
-
-
-def build_merkle_tree(file_hashes: List[str]) -> Tuple[str, List[List[str]]]:
-    """Build a Merkle tree from a list of leaf hashes."""
-    if not file_hashes:
-        return sha256_data(b''), [[]]
-
-    tree_levels = [file_hashes.copy()]
-
-    while len(tree_levels[-1]) > 1:
-        current_level = tree_levels[-1]
-        next_level = []
-
-        for i in range(0, len(current_level), 2):
-            left = current_level[i]
-            if i + 1 < len(current_level):
-                right = current_level[i + 1]
-                combined = left + right
-            else:
-                combined = left + left
-
-            parent_hash = sha256_data(combined.encode('utf-8'))
-            next_level.append(parent_hash)
-
-        tree_levels.append(next_level)
-
-    root_hash = tree_levels[-1][0]
-    return root_hash, tree_levels
-
-
-def create_hashes(snapshot_dir: Path) -> Dict[str, Any]:
-    """Create a complete Merkle hash tree of all files in snapshot directory."""
-    files = collect_files(snapshot_dir)
-    file_hashes = [file_hash for _, file_hash, _ in files]
-    root_hash, tree_levels = build_merkle_tree(file_hashes)
-    total_size = sum(size for _, _, size in files)
-
-    file_list = [
-        {'path': str(path), 'hash': file_hash, 'size': size}
-        for path, file_hash, size in files
-    ]
-
-    return {
-        'root_hash': root_hash,
-        'tree_levels': tree_levels,
-        'files': file_list,
-        'metadata': {
-            'timestamp': datetime.now(timezone.utc).isoformat(),
-            'file_count': len(files),
-            'total_size': total_size,
-            'tree_depth': len(tree_levels),
-        },
-    }
-
-
-@click.command()
-@click.option('--url', required=True, help='URL being archived')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
-def main(url: str, snapshot_id: str):
-    """Generate Merkle tree of all archived outputs."""
-    status = 'failed'
-    output = None
-    error = ''
-    root_hash = None
-    file_count = 0
-
-    try:
-        # Check if enabled
-        save_hashes = os.getenv('HASHES_ENABLED', 'true').lower() in ('true', '1', 'yes', 'on')
-
-        if not save_hashes:
-            status = 'skipped'
-            click.echo(json.dumps({'status': status, 'output': 'HASHES_ENABLED=false'}))
-            sys.exit(0)
-
-        # Working directory is the extractor output dir (e.g., <snapshot>/hashes/)
-        # Parent is the snapshot directory
-        output_dir = Path.cwd()
-        snapshot_dir = output_dir.parent
-
-        if not snapshot_dir.exists():
-            raise FileNotFoundError(f'Snapshot directory not found: {snapshot_dir}')
-
-        # Ensure output directory exists
-        output_dir.mkdir(exist_ok=True)
-        output_path = output_dir / 'hashes.json'
-
-        # Generate Merkle tree
-        merkle_data = create_hashes(snapshot_dir)
-
-        # Write output
-        with open(output_path, 'w', encoding='utf-8') as f:
-            json.dump(merkle_data, f, indent=2)
-
-        status = 'succeeded'
-        output = 'hashes.json'
-        root_hash = merkle_data['root_hash']
-        file_count = merkle_data['metadata']['file_count']
-
-    except Exception as e:
-        error = f'{type(e).__name__}: {e}'
-        status = 'failed'
-        click.echo(f'Error: {error}', err=True)
-
-    # Print JSON result for hook runner
-    result = {
-        'status': status,
-        'output': output,
-        'error': error or None,
-        'root_hash': root_hash,
-        'file_count': file_count,
-    }
-    click.echo(json.dumps(result))
-
-    sys.exit(0 if status in ('succeeded', 'skipped') else 1)
-
-
-if __name__ == '__main__':
-    main()
--- a/archivebox/plugins/hashes/templates/icon.html
+++ b/archivebox/plugins/hashes/templates/icon.html
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--hashes" title="Authenticity Hashes"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="5" r="2"/><circle cx="6" cy="18" r="2"/><circle cx="18" cy="18" r="2"/><path d="M12 7v6"/><path d="M12 13l-4 3"/><path d="M12 13l4 3"/></svg></span>
--- a/archivebox/plugins/hashes/tests/test_hashes.py
+++ b/archivebox/plugins/hashes/tests/test_hashes.py
@@ -1,157 +0,0 @@
-"""
-Tests for the hashes plugin.
-
-Tests the real merkle tree generation with actual files.
-"""
-
-import json
-import os
-import subprocess
-import sys
-import tempfile
-from pathlib import Path
-
-import pytest
-from django.test import TestCase
-
-
-# Get the path to the hashes hook
-PLUGIN_DIR = Path(__file__).parent.parent
-HASHES_HOOK = PLUGIN_DIR / 'on_Snapshot__93_hashes.py'
-
-
-class TestHashesPlugin(TestCase):
-    """Test the hashes plugin."""
-
-    def test_hashes_hook_exists(self):
-        """Hashes hook script should exist."""
-        self.assertTrue(HASHES_HOOK.exists(), f"Hook not found: {HASHES_HOOK}")
-
-    def test_hashes_generates_tree_for_files(self):
-        """Hashes hook should generate merkle tree for files in snapshot directory."""
-        with tempfile.TemporaryDirectory() as temp_dir:
-            # Create a mock snapshot directory structure
-            snapshot_dir = Path(temp_dir) / 'snapshot'
-            snapshot_dir.mkdir()
-
-            # Create output directory for hashes
-            output_dir = snapshot_dir / 'hashes'
-            output_dir.mkdir()
-
-            # Create some test files
-            (snapshot_dir / 'index.html').write_text('<html><body>Test</body></html>')
-            (snapshot_dir / 'screenshot.png').write_bytes(b'\x89PNG\r\n\x1a\n' + b'\x00' * 100)
-
-            subdir = snapshot_dir / 'media'
-            subdir.mkdir()
-            (subdir / 'video.mp4').write_bytes(b'\x00\x00\x00\x18ftypmp42')
-
-            # Run the hook from the output directory
-            env = os.environ.copy()
-            env['HASHES_ENABLED'] = 'true'
-
-            result = subprocess.run(
-                [
-                    sys.executable, str(HASHES_HOOK),
-                    '--url=https://example.com',
-                    '--snapshot-id=test-snapshot',
-                ],
-                capture_output=True,
-                text=True,
-                cwd=str(output_dir),  # Hook expects to run from output dir
-                env=env,
-                timeout=30
-            )
-
-            # Should succeed
-            self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}")
-
-            # Check output file exists
-            output_file = output_dir / 'hashes.json'
-            self.assertTrue(output_file.exists(), "hashes.json not created")
-
-            # Parse and verify output
-            with open(output_file) as f:
-                data = json.load(f)
-
-            self.assertIn('root_hash', data)
-            self.assertIn('files', data)
-            self.assertIn('metadata', data)
-
-            # Should have indexed our test files
-            file_paths = [f['path'] for f in data['files']]
-            self.assertIn('index.html', file_paths)
-            self.assertIn('screenshot.png', file_paths)
-
-            # Verify metadata
-            self.assertGreater(data['metadata']['file_count'], 0)
-            self.assertGreater(data['metadata']['total_size'], 0)
-
-    def test_hashes_skips_when_disabled(self):
-        """Hashes hook should skip when HASHES_ENABLED=false."""
-        with tempfile.TemporaryDirectory() as temp_dir:
-            snapshot_dir = Path(temp_dir) / 'snapshot'
-            snapshot_dir.mkdir()
-            output_dir = snapshot_dir / 'hashes'
-            output_dir.mkdir()
-
-            env = os.environ.copy()
-            env['HASHES_ENABLED'] = 'false'
-
-            result = subprocess.run(
-                [
-                    sys.executable, str(HASHES_HOOK),
-                    '--url=https://example.com',
-                    '--snapshot-id=test-snapshot',
-                ],
-                capture_output=True,
-                text=True,
-                cwd=str(output_dir),
-                env=env,
-                timeout=30
-            )
-
-            # Should succeed (exit 0) but skip
-            self.assertEqual(result.returncode, 0)
-            self.assertIn('skipped', result.stdout)
-
-    def test_hashes_handles_empty_directory(self):
-        """Hashes hook should handle empty snapshot directory."""
-        with tempfile.TemporaryDirectory() as temp_dir:
-            snapshot_dir = Path(temp_dir) / 'snapshot'
-            snapshot_dir.mkdir()
-            output_dir = snapshot_dir / 'hashes'
-            output_dir.mkdir()
-
-            env = os.environ.copy()
-            env['HASHES_ENABLED'] = 'true'
-
-            result = subprocess.run(
-                [
-                    sys.executable, str(HASHES_HOOK),
-                    '--url=https://example.com',
-                    '--snapshot-id=test-snapshot',
-                ],
-                capture_output=True,
-                text=True,
-                cwd=str(output_dir),
-                env=env,
-                timeout=30
-            )
-
-            # Should succeed even with empty directory
-            self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}")
-
-            # Check output file exists
-            output_file = output_dir / 'hashes.json'
-            self.assertTrue(output_file.exists())
-
-            with open(output_file) as f:
-                data = json.load(f)
-
-            # Should have empty file list
-            self.assertEqual(data['metadata']['file_count'], 0)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
--- a/archivebox/plugins/headers/config.json
+++ b/archivebox/plugins/headers/config.json
@@ -1,21 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "required_plugins": ["chrome"],
-  "properties": {
-    "HEADERS_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_HEADERS", "USE_HEADERS"],
-      "description": "Enable HTTP headers capture"
-    },
-    "HEADERS_TIMEOUT": {
-      "type": "integer",
-      "default": 30,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for headers capture in seconds"
-    }
-  }
-}
--- a/archivebox/plugins/headers/on_Snapshot__27_headers.bg.js
+++ b/archivebox/plugins/headers/on_Snapshot__27_headers.bg.js
@@ -1,247 +0,0 @@
-#!/usr/bin/env node
-/**
- * Capture original request + response headers for the main navigation.
- *
- * This hook sets up CDP listeners BEFORE chrome_navigate loads the page,
- * then waits for navigation to complete. It records the first top-level
- * request headers and the corresponding response headers (with :status).
- *
- * Usage: on_Snapshot__27_headers.bg.js --url=<url> --snapshot-id=<uuid>
- * Output: Writes headers.json
- */
-
-const fs = require('fs');
-const path = require('path');
-
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-
-const puppeteer = require('puppeteer-core');
-
-// Import shared utilities from chrome_utils.js
-const {
-    getEnvBool,
-    getEnvInt,
-    parseArgs,
-    connectToPage,
-    waitForPageLoaded,
-} = require('../chrome/chrome_utils.js');
-
-const PLUGIN_NAME = 'headers';
-const OUTPUT_DIR = '.';
-const OUTPUT_FILE = 'headers.json';
-const CHROME_SESSION_DIR = '../chrome';
-const CHROME_SESSION_REQUIRED_ERROR = 'No Chrome session found (chrome plugin must run first)';
-
-let browser = null;
-let page = null;
-let client = null;
-let shuttingDown = false;
-let headersWritten = false;
-
-let requestId = null;
-let requestUrl = null;
-let requestHeaders = null;
-let responseHeaders = null;
-let responseStatus = null;
-let responseStatusText = null;
-let responseUrl = null;
-let originalUrl = null;
-
-function getFinalUrl() {
-    const finalUrlFile = path.join(CHROME_SESSION_DIR, 'final_url.txt');
-    if (fs.existsSync(finalUrlFile)) {
-        return fs.readFileSync(finalUrlFile, 'utf8').trim();
-    }
-    return page ? page.url() : null;
-}
-
-function writeHeadersFile() {
-    if (headersWritten) return;
-    if (!responseHeaders) return;
-
-    const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
-    const responseHeadersWithStatus = {
-        ...(responseHeaders || {}),
-    };
-
-    if (responseStatus !== null && responseStatus !== undefined &&
-        responseHeadersWithStatus[':status'] === undefined) {
-        responseHeadersWithStatus[':status'] = String(responseStatus);
-    }
-
-    const record = {
-        url: requestUrl || originalUrl,
-        final_url: getFinalUrl(),
-        status: responseStatus !== undefined ? responseStatus : null,
-        request_headers: requestHeaders || {},
-        response_headers: responseHeadersWithStatus,
-        headers: responseHeadersWithStatus, // backwards compatibility
-    };
-
-    if (responseStatusText) {
-        record.statusText = responseStatusText;
-    }
-    if (responseUrl) {
-        record.response_url = responseUrl;
-    }
-
-    fs.writeFileSync(outputPath, JSON.stringify(record, null, 2));
-    headersWritten = true;
-}
-
-async function setupListener(url) {
-    const timeout = getEnvInt('HEADERS_TIMEOUT', getEnvInt('TIMEOUT', 30)) * 1000;
-    const cdpFile = path.join(CHROME_SESSION_DIR, 'cdp_url.txt');
-    const targetIdFile = path.join(CHROME_SESSION_DIR, 'target_id.txt');
-    const pidFile = path.join(CHROME_SESSION_DIR, 'chrome.pid');
-
-    if (!fs.existsSync(cdpFile) || !fs.existsSync(targetIdFile) || !fs.existsSync(pidFile)) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-    try {
-        const pid = parseInt(fs.readFileSync(pidFile, 'utf8').trim(), 10);
-        if (!pid || Number.isNaN(pid)) throw new Error('Invalid pid');
-        process.kill(pid, 0);
-    } catch (e) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-
-    const { browser, page } = await connectToPage({
-        chromeSessionDir: CHROME_SESSION_DIR,
-        timeoutMs: timeout,
-        puppeteer,
-    });
-
-    client = await page.target().createCDPSession();
-    await client.send('Network.enable');
-
-    client.on('Network.requestWillBeSent', (params) => {
-        try {
-            if (requestId && !responseHeaders && params.redirectResponse && params.requestId === requestId) {
-                responseHeaders = params.redirectResponse.headers || {};
-                responseStatus = params.redirectResponse.status || null;
-                responseStatusText = params.redirectResponse.statusText || null;
-                responseUrl = params.redirectResponse.url || null;
-                writeHeadersFile();
-            }
-
-            if (requestId) return;
-            if (params.type && params.type !== 'Document') return;
-            if (!params.request || !params.request.url) return;
-            if (!params.request.url.startsWith('http')) return;
-
-            requestId = params.requestId;
-            requestUrl = params.request.url;
-            requestHeaders = params.request.headers || {};
-        } catch (e) {
-            // Ignore errors
-        }
-    });
-
-    client.on('Network.responseReceived', (params) => {
-        try {
-            if (!requestId || params.requestId !== requestId || responseHeaders) return;
-            const response = params.response || {};
-            responseHeaders = response.headers || {};
-            responseStatus = response.status || null;
-            responseStatusText = response.statusText || null;
-            responseUrl = response.url || null;
-            writeHeadersFile();
-        } catch (e) {
-            // Ignore errors
-        }
-    });
-
-    return { browser, page };
-}
-
-function emitResult(status = 'succeeded', outputStr = OUTPUT_FILE) {
-    if (shuttingDown) return;
-    shuttingDown = true;
-
-    console.log(JSON.stringify({
-        type: 'ArchiveResult',
-        status,
-        output_str: outputStr,
-    }));
-}
-
-async function handleShutdown(signal) {
-    console.error(`\nReceived ${signal}, emitting final results...`);
-    if (!headersWritten) {
-        writeHeadersFile();
-    }
-    if (headersWritten) {
-        emitResult('succeeded', OUTPUT_FILE);
-    } else {
-        emitResult('failed', 'No headers captured');
-    }
-
-    if (browser) {
-        try {
-            browser.disconnect();
-        } catch (e) {}
-    }
-    process.exit(headersWritten ? 0 : 1);
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-    const snapshotId = args.snapshot_id;
-
-    if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__27_headers.bg.js --url=<url> --snapshot-id=<uuid>');
-        process.exit(1);
-    }
-
-    originalUrl = url;
-
-    if (!getEnvBool('HEADERS_ENABLED', true)) {
-        console.error('Skipping (HEADERS_ENABLED=False)');
-        console.log(JSON.stringify({type: 'ArchiveResult', status: 'skipped', output_str: 'HEADERS_ENABLED=False'}));
-        process.exit(0);
-    }
-
-    try {
-        // Set up listeners BEFORE navigation
-        const connection = await setupListener(url);
-        browser = connection.browser;
-        page = connection.page;
-
-        // Register signal handlers for graceful shutdown
-        process.on('SIGTERM', () => handleShutdown('SIGTERM'));
-        process.on('SIGINT', () => handleShutdown('SIGINT'));
-
-        // Wait for chrome_navigate to complete (non-fatal)
-        try {
-            const timeout = getEnvInt('HEADERS_TIMEOUT', getEnvInt('TIMEOUT', 30)) * 1000;
-            await waitForPageLoaded(CHROME_SESSION_DIR, timeout * 4, 200);
-        } catch (e) {
-            console.error(`WARN: ${e.message}`);
-        }
-
-        // Keep alive until SIGTERM
-        await new Promise(() => {});
-        return;
-
-    } catch (e) {
-        const errorMessage = (e && e.message)
-            ? `${e.name || 'Error'}: ${e.message}`
-            : String(e || 'Unknown error');
-        console.error(`ERROR: ${errorMessage}`);
-
-        console.log(JSON.stringify({
-            type: 'ArchiveResult',
-            status: 'failed',
-            output_str: errorMessage,
-        }));
-        process.exit(1);
-    }
-}
-
-main().catch(e => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
--- a/archivebox/plugins/headers/templates/icon.html
+++ b/archivebox/plugins/headers/templates/icon.html
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--headers" title="Headers"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><circle cx="4" cy="7" r="1" fill="currentColor" stroke="none"/><circle cx="4" cy="12" r="1" fill="currentColor" stroke="none"/><circle cx="4" cy="17" r="1" fill="currentColor" stroke="none"/><path d="M7 7h13"/><path d="M7 12h13"/><path d="M7 17h13"/></svg></span>
--- a/archivebox/plugins/headers/tests/test_headers.py
+++ b/archivebox/plugins/headers/tests/test_headers.py
@@ -1,409 +0,0 @@
-"""
-Integration tests for headers plugin
-
-Tests verify:
-    pass
-1. Plugin script exists and is executable
-2. Node.js is available
-3. Headers extraction works for real example.com
-4. Output JSON contains actual HTTP headers
-5. Config options work (TIMEOUT, USER_AGENT)
-"""
-
-import json
-import shutil
-import subprocess
-import tempfile
-import time
-from pathlib import Path
-
-import pytest
-
-from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-    CHROME_NAVIGATE_HOOK,
-    get_test_env,
-    chrome_session,
-)
-
-PLUGIN_DIR = Path(__file__).parent.parent
-HEADERS_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_headers.*'), None)
-TEST_URL = 'https://example.com'
-
-def normalize_root_url(url: str) -> str:
-    return url.rstrip('/')
-
-def run_headers_capture(headers_dir, snapshot_chrome_dir, env, url, snapshot_id):
-    hook_proc = subprocess.Popen(
-        ['node', str(HEADERS_HOOK), f'--url={url}', f'--snapshot-id={snapshot_id}'],
-        cwd=headers_dir,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-        text=True,
-        env=env,
-    )
-
-    nav_result = subprocess.run(
-        ['node', str(CHROME_NAVIGATE_HOOK), f'--url={url}', f'--snapshot-id={snapshot_id}'],
-        cwd=snapshot_chrome_dir,
-        capture_output=True,
-        text=True,
-        timeout=120,
-        env=env,
-    )
-
-    headers_file = headers_dir / 'headers.json'
-    for _ in range(60):
-        if headers_file.exists() and headers_file.stat().st_size > 0:
-            break
-        time.sleep(1)
-
-    if hook_proc.poll() is None:
-        hook_proc.terminate()
-        try:
-            stdout, stderr = hook_proc.communicate(timeout=5)
-        except subprocess.TimeoutExpired:
-            hook_proc.kill()
-            stdout, stderr = hook_proc.communicate()
-    else:
-        stdout, stderr = hook_proc.communicate()
-
-    return hook_proc.returncode, stdout, stderr, nav_result, headers_file
-
-
-def test_hook_script_exists():
-    """Verify hook script exists."""
-    assert HEADERS_HOOK.exists(), f"Hook script not found: {HEADERS_HOOK}"
-
-
-def test_node_is_available():
-    """Test that Node.js is available on the system."""
-    result = subprocess.run(
-        ['which', 'node'],
-        capture_output=True,
-        text=True
-    )
-
-    if result.returncode != 0:
-        pass
-
-    binary_path = result.stdout.strip()
-    assert Path(binary_path).exists(), f"Binary should exist at {binary_path}"
-
-    # Test that node is executable and get version
-    result = subprocess.run(
-        ['node', '--version'],
-        capture_output=True,
-        text=True,
-        timeout=10
-    ,
-            env=get_test_env())
-    assert result.returncode == 0, f"node not executable: {result.stderr}"
-    assert result.stdout.startswith('v'), f"Unexpected node version format: {result.stdout}"
-
-
-def test_extracts_headers_from_example_com():
-    """Test full workflow: extract headers from real example.com."""
-
-    # Check node is available
-    if not shutil.which('node'):
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        with chrome_session(tmpdir, test_url=TEST_URL, navigate=False) as (_process, _pid, snapshot_chrome_dir, env):
-            headers_dir = snapshot_chrome_dir.parent / 'headers'
-            headers_dir.mkdir(exist_ok=True)
-
-            result = run_headers_capture(
-                headers_dir,
-                snapshot_chrome_dir,
-                env,
-                TEST_URL,
-                'test789',
-            )
-
-        hook_code, stdout, stderr, nav_result, headers_file = result
-        assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
-        assert hook_code == 0, f"Extraction failed: {stderr}"
-
-        # Parse clean JSONL output
-        result_json = None
-        for line in stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                pass
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-        # Verify output file exists (hook writes to current directory)
-        assert headers_file.exists(), "headers.json not created"
-
-        # Verify headers JSON contains REAL example.com response
-        headers_data = json.loads(headers_file.read_text())
-
-        assert 'url' in headers_data, "Should have url field"
-        assert normalize_root_url(headers_data['url']) == normalize_root_url(TEST_URL), f"URL should be {TEST_URL}"
-
-        assert 'status' in headers_data, "Should have status field"
-        assert headers_data['status'] in [200, 301, 302], \
-            f"Should have valid HTTP status, got {headers_data['status']}"
-
-        assert 'request_headers' in headers_data, "Should have request_headers field"
-        assert isinstance(headers_data['request_headers'], dict), "Request headers should be a dict"
-
-        assert 'response_headers' in headers_data, "Should have response_headers field"
-        assert isinstance(headers_data['response_headers'], dict), "Response headers should be a dict"
-        assert len(headers_data['response_headers']) > 0, "Response headers dict should not be empty"
-
-        assert 'headers' in headers_data, "Should have headers field"
-        assert isinstance(headers_data['headers'], dict), "Headers should be a dict"
-
-        # Verify common HTTP headers are present
-        headers_lower = {k.lower(): v for k, v in headers_data['response_headers'].items()}
-        assert 'content-type' in headers_lower or 'content-length' in headers_lower, \
-            "Should have at least one common HTTP header"
-
-        assert headers_data['response_headers'].get(':status') == str(headers_data['status']), \
-            "Response headers should include :status pseudo header"
-
-
-def test_headers_output_structure():
-    """Test that headers plugin produces correctly structured output."""
-
-    if not shutil.which('node'):
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        with chrome_session(tmpdir, test_url=TEST_URL, navigate=False) as (_process, _pid, snapshot_chrome_dir, env):
-            headers_dir = snapshot_chrome_dir.parent / 'headers'
-            headers_dir.mkdir(exist_ok=True)
-
-            result = run_headers_capture(
-                headers_dir,
-                snapshot_chrome_dir,
-                env,
-                TEST_URL,
-                'testformat',
-            )
-
-        hook_code, stdout, stderr, nav_result, headers_file = result
-        assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
-        assert hook_code == 0, f"Extraction failed: {stderr}"
-
-        # Parse clean JSONL output
-        result_json = None
-        for line in stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                pass
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-        # Verify output structure
-        assert headers_file.exists(), "Output headers.json not created"
-
-        output_data = json.loads(headers_file.read_text())
-
-        # Verify all required fields are present
-        assert 'url' in output_data, "Output should have url field"
-        assert 'status' in output_data, "Output should have status field"
-        assert 'request_headers' in output_data, "Output should have request_headers field"
-        assert 'response_headers' in output_data, "Output should have response_headers field"
-        assert 'headers' in output_data, "Output should have headers field"
-
-        # Verify data types
-        assert isinstance(output_data['status'], int), "Status should be integer"
-        assert isinstance(output_data['request_headers'], dict), "Request headers should be dict"
-        assert isinstance(output_data['response_headers'], dict), "Response headers should be dict"
-        assert isinstance(output_data['headers'], dict), "Headers should be dict"
-
-        # Verify example.com returns expected headers
-        assert normalize_root_url(output_data['url']) == normalize_root_url(TEST_URL)
-        assert output_data['status'] in [200, 301, 302]
-
-
-def test_fails_without_chrome_session():
-    """Test that headers plugin fails when chrome session is missing."""
-
-    if not shutil.which('node'):
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Run headers extraction
-        result = subprocess.run(
-            ['node', str(HEADERS_HOOK), f'--url={TEST_URL}', '--snapshot-id=testhttp'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            timeout=60
-        ,
-            env=get_test_env())
-
-        assert result.returncode != 0, "Should fail without chrome session"
-        assert 'No Chrome session found (chrome plugin must run first)' in (result.stdout + result.stderr)
-
-
-def test_config_timeout_honored():
-    """Test that TIMEOUT config is respected."""
-
-    if not shutil.which('node'):
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Set very short timeout (but example.com should still succeed)
-        import os
-        env_override = os.environ.copy()
-        env_override['TIMEOUT'] = '5'
-
-        with chrome_session(tmpdir, test_url=TEST_URL, navigate=False) as (_process, _pid, snapshot_chrome_dir, env):
-            headers_dir = snapshot_chrome_dir.parent / 'headers'
-            headers_dir.mkdir(exist_ok=True)
-            env.update(env_override)
-
-            result = run_headers_capture(
-                headers_dir,
-                snapshot_chrome_dir,
-                env,
-                TEST_URL,
-                'testtimeout',
-            )
-
-        # Should complete (success or fail, but not hang)
-        hook_code, _stdout, _stderr, nav_result, _headers_file = result
-        assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
-        assert hook_code in (0, 1), "Should complete without hanging"
-
-
-def test_config_user_agent():
-    """Test that USER_AGENT config is used."""
-
-    if not shutil.which('node'):
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Set custom user agent
-        import os
-        env_override = os.environ.copy()
-        env_override['USER_AGENT'] = 'TestBot/1.0'
-
-        with chrome_session(tmpdir, test_url=TEST_URL, navigate=False) as (_process, _pid, snapshot_chrome_dir, env):
-            headers_dir = snapshot_chrome_dir.parent / 'headers'
-            headers_dir.mkdir(exist_ok=True)
-            env.update(env_override)
-
-            result = run_headers_capture(
-                headers_dir,
-                snapshot_chrome_dir,
-                env,
-                TEST_URL,
-                'testua',
-            )
-
-        # Should succeed (example.com doesn't block)
-        hook_code, stdout, _stderr, nav_result, _headers_file = result
-        assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
-        if hook_code == 0:
-            # Parse clean JSONL output
-            result_json = None
-            for line in stdout.strip().split('\n'):
-                line = line.strip()
-                if line.startswith('{'):
-                    pass
-                    try:
-                        record = json.loads(line)
-                        if record.get('type') == 'ArchiveResult':
-                            result_json = record
-                            break
-                    except json.JSONDecodeError:
-                        pass
-
-            assert result_json, "Should have ArchiveResult JSONL output"
-            assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-
-def test_handles_https_urls():
-    """Test that HTTPS URLs work correctly."""
-
-    if not shutil.which('node'):
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        with chrome_session(tmpdir, test_url='https://example.org', navigate=False) as (_process, _pid, snapshot_chrome_dir, env):
-            headers_dir = snapshot_chrome_dir.parent / 'headers'
-            headers_dir.mkdir(exist_ok=True)
-            result = run_headers_capture(
-                headers_dir,
-                snapshot_chrome_dir,
-                env,
-                'https://example.org',
-                'testhttps',
-            )
-
-        hook_code, _stdout, _stderr, nav_result, headers_file = result
-        assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
-        if hook_code == 0:
-            if headers_file.exists():
-                output_data = json.loads(headers_file.read_text())
-                assert normalize_root_url(output_data['url']) == normalize_root_url('https://example.org')
-                assert output_data['status'] in [200, 301, 302]
-
-
-def test_handles_404_gracefully():
-    """Test that headers plugin handles 404s gracefully."""
-
-    if not shutil.which('node'):
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        with chrome_session(tmpdir, test_url='https://example.com/nonexistent-page-404', navigate=False) as (_process, _pid, snapshot_chrome_dir, env):
-            headers_dir = snapshot_chrome_dir.parent / 'headers'
-            headers_dir.mkdir(exist_ok=True)
-            result = run_headers_capture(
-                headers_dir,
-                snapshot_chrome_dir,
-                env,
-                'https://example.com/nonexistent-page-404',
-                'test404',
-            )
-
-        # May succeed or fail depending on server behavior
-        # If it succeeds, verify 404 status is captured
-        hook_code, _stdout, _stderr, nav_result, headers_file = result
-        assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
-        if hook_code == 0:
-            if headers_file.exists():
-                output_data = json.loads(headers_file.read_text())
-                assert output_data['status'] == 404, "Should capture 404 status"
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
--- a/archivebox/plugins/htmltotext/config.json
+++ b/archivebox/plugins/htmltotext/config.json
@@ -1,20 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "HTMLTOTEXT_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_HTMLTOTEXT", "USE_HTMLTOTEXT"],
-      "description": "Enable HTML to text conversion"
-    },
-    "HTMLTOTEXT_TIMEOUT": {
-      "type": "integer",
-      "default": 30,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for HTML to text conversion in seconds"
-    }
-  }
-}
--- a/archivebox/plugins/htmltotext/on_Snapshot__58_htmltotext.py
+++ b/archivebox/plugins/htmltotext/on_Snapshot__58_htmltotext.py
@@ -1,161 +0,0 @@
-#!/usr/bin/env python3
-"""
-Convert HTML to plain text for search indexing.
-
-This extractor reads HTML from other extractors (wget, singlefile, dom)
-and converts it to plain text for full-text search.
-
-Usage: on_Snapshot__htmltotext.py --url=<url> --snapshot-id=<uuid>
-Output: Writes htmltotext.txt to $PWD
-
-Environment variables:
-    TIMEOUT: Timeout in seconds (not used, but kept for consistency)
-
-Note: This extractor does not require any external binaries.
-      It uses Python's built-in html.parser module.
-"""
-
-import json
-import os
-import re
-import sys
-from html.parser import HTMLParser
-from pathlib import Path
-
-import rich_click as click
-
-
-# Extractor metadata
-PLUGIN_NAME = 'htmltotext'
-OUTPUT_DIR = '.'
-OUTPUT_FILE = 'htmltotext.txt'
-
-
-class HTMLTextExtractor(HTMLParser):
-    """Extract text content from HTML, ignoring scripts/styles."""
-
-    def __init__(self):
-        super().__init__()
-        self.result = []
-        self.skip_tags = {'script', 'style', 'head', 'meta', 'link', 'noscript'}
-        self.current_tag = None
-
-    def handle_starttag(self, tag, attrs):
-        self.current_tag = tag.lower()
-
-    def handle_endtag(self, tag):
-        self.current_tag = None
-
-    def handle_data(self, data):
-        if self.current_tag not in self.skip_tags:
-            text = data.strip()
-            if text:
-                self.result.append(text)
-
-    def get_text(self) -> str:
-        return ' '.join(self.result)
-
-
-def html_to_text(html: str) -> str:
-    """Convert HTML to plain text."""
-    parser = HTMLTextExtractor()
-    try:
-        parser.feed(html)
-        return parser.get_text()
-    except Exception:
-        # Fallback: strip HTML tags with regex
-        text = re.sub(r'<script[^>]*>.*?</script>', '', html, flags=re.DOTALL | re.IGNORECASE)
-        text = re.sub(r'<style[^>]*>.*?</style>', '', text, flags=re.DOTALL | re.IGNORECASE)
-        text = re.sub(r'<[^>]+>', ' ', text)
-        text = re.sub(r'\s+', ' ', text)
-        return text.strip()
-
-
-def find_html_source() -> str | None:
-    """Find HTML content from other extractors in the snapshot directory."""
-    # Hooks run in snapshot_dir, sibling extractor outputs are in subdirectories
-    search_patterns = [
-        'singlefile/singlefile.html',
-        '*_singlefile/singlefile.html',
-        'singlefile/*.html',
-        '*_singlefile/*.html',
-        'dom/output.html',
-        '*_dom/output.html',
-        'dom/*.html',
-        '*_dom/*.html',
-        'wget/**/*.html',
-        '*_wget/**/*.html',
-        'wget/**/*.htm',
-        '*_wget/**/*.htm',
-    ]
-
-    for base in (Path.cwd(), Path.cwd().parent):
-        for pattern in search_patterns:
-            matches = list(base.glob(pattern))
-            for match in matches:
-                if match.is_file() and match.stat().st_size > 0:
-                    try:
-                        return match.read_text(errors='ignore')
-                    except Exception:
-                        continue
-
-    return None
-
-
-def extract_htmltotext(url: str) -> tuple[bool, str | None, str]:
-    """
-    Extract plain text from HTML sources.
-
-    Returns: (success, output_path, error_message)
-    """
-    # Find HTML source from other extractors
-    html_content = find_html_source()
-    if not html_content:
-        return False, None, 'No HTML source found (run singlefile, dom, or wget first)'
-
-    # Convert HTML to text
-    text = html_to_text(html_content)
-
-    if not text or len(text) < 10:
-        return False, None, 'No meaningful text extracted from HTML'
-
-    # Output directory is current directory (hook already runs in output dir)
-    output_dir = Path(OUTPUT_DIR)
-    output_path = output_dir / OUTPUT_FILE
-    output_path.write_text(text, encoding='utf-8')
-
-    return True, str(output_path), ''
-
-
-@click.command()
-@click.option('--url', required=True, help='URL that was archived')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
-def main(url: str, snapshot_id: str):
-    """Convert HTML to plain text for search indexing."""
-
-    try:
-        # Run extraction
-        success, output, error = extract_htmltotext(url)
-
-        if success:
-            # Success - emit ArchiveResult
-            result = {
-                'type': 'ArchiveResult',
-                'status': 'succeeded',
-                'output_str': output or ''
-            }
-            print(json.dumps(result))
-            sys.exit(0)
-        else:
-            # Transient error - emit NO JSONL
-            print(f'ERROR: {error}', file=sys.stderr)
-            sys.exit(1)
-
-    except Exception as e:
-        # Transient error - emit NO JSONL
-        print(f'ERROR: {type(e).__name__}: {e}', file=sys.stderr)
-        sys.exit(1)
-
-
-if __name__ == '__main__':
-    main()
--- a/archivebox/plugins/htmltotext/templates/icon.html
+++ b/archivebox/plugins/htmltotext/templates/icon.html
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--htmltotext" title="HTML to Text"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M4 7h16"/><path d="M4 12h12"/><path d="M4 17h14"/></svg></span>
--- a/archivebox/plugins/htmltotext/tests/test_htmltotext.py
+++ b/archivebox/plugins/htmltotext/tests/test_htmltotext.py
@@ -1,84 +0,0 @@
-"""
-Integration tests for htmltotext plugin
-
-Tests verify standalone htmltotext extractor execution.
-"""
-
-import json
-import subprocess
-import sys
-import tempfile
-from pathlib import Path
-import pytest
-
-PLUGIN_DIR = Path(__file__).parent.parent
-HTMLTOTEXT_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_htmltotext.*'), None)
-TEST_URL = 'https://example.com'
-
-def test_hook_script_exists():
-    assert HTMLTOTEXT_HOOK.exists()
-
-def test_extracts_text_from_html():
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-        # Create HTML source
-        (tmpdir / 'singlefile').mkdir()
-        (tmpdir / 'singlefile' / 'singlefile.html').write_text('<html><body><h1>Example Domain</h1><p>This domain is for examples.</p></body></html>')
-
-        result = subprocess.run(
-            [sys.executable, str(HTMLTOTEXT_HOOK), '--url', TEST_URL, '--snapshot-id', 'test789'],
-            cwd=tmpdir, capture_output=True, text=True, timeout=30
-        )
-
-        assert result.returncode == 0, f"Extraction failed: {result.stderr}"
-
-        # Parse clean JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-        # Verify output file (hook writes to current directory)
-        output_file = tmpdir / 'htmltotext.txt'
-        assert output_file.exists(), f"htmltotext.txt not created. Files: {list(tmpdir.iterdir())}"
-        content = output_file.read_text()
-        assert len(content) > 0, "Content should not be empty"
-        assert 'Example Domain' in content, "Should contain text from HTML"
-
-def test_fails_gracefully_without_html():
-    with tempfile.TemporaryDirectory() as tmpdir:
-        result = subprocess.run(
-            [sys.executable, str(HTMLTOTEXT_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
-            cwd=tmpdir, capture_output=True, text=True, timeout=30
-        )
-
-        # Should exit with non-zero or emit failure JSONL
-        # Parse clean JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        if result_json:
-            # Should report failure or skip since no HTML source
-            assert result_json['status'] in ['failed', 'skipped'], f"Should fail or skip without HTML: {result_json}"
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
--- a/archivebox/plugins/infiniscroll/config.json
+++ b/archivebox/plugins/infiniscroll/config.json
@@ -1,51 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "required_plugins": ["chrome"],
-  "properties": {
-    "INFINISCROLL_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_INFINISCROLL", "USE_INFINISCROLL"],
-      "description": "Enable infinite scroll page expansion"
-    },
-    "INFINISCROLL_TIMEOUT": {
-      "type": "integer",
-      "default": 120,
-      "minimum": 10,
-      "x-fallback": "TIMEOUT",
-      "description": "Maximum timeout for scrolling in seconds"
-    },
-    "INFINISCROLL_SCROLL_DELAY": {
-      "type": "integer",
-      "default": 2000,
-      "minimum": 500,
-      "description": "Delay between scrolls in milliseconds"
-    },
-    "INFINISCROLL_SCROLL_DISTANCE": {
-      "type": "integer",
-      "default": 1600,
-      "minimum": 100,
-      "description": "Distance to scroll per step in pixels"
-    },
-    "INFINISCROLL_SCROLL_LIMIT": {
-      "type": "integer",
-      "default": 10,
-      "minimum": 1,
-      "maximum": 100,
-      "description": "Maximum number of scroll steps"
-    },
-    "INFINISCROLL_MIN_HEIGHT": {
-      "type": "integer",
-      "default": 16000,
-      "minimum": 1000,
-      "description": "Minimum page height to scroll to in pixels"
-    },
-    "INFINISCROLL_EXPAND_DETAILS": {
-      "type": "boolean",
-      "default": true,
-      "description": "Expand <details> elements and click 'load more' buttons for comments"
-    }
-  }
-}
--- a/archivebox/plugins/infiniscroll/on_Snapshot__45_infiniscroll.js
+++ b/archivebox/plugins/infiniscroll/on_Snapshot__45_infiniscroll.js
@@ -1,427 +0,0 @@
-#!/usr/bin/env node
-/**
- * Scroll the page down to trigger infinite scroll / lazy loading.
- *
- * Scrolls down 1 page at a time, up to INFINISCROLL_SCROLL_LIMIT times,
- * ensuring at least INFINISCROLL_MIN_HEIGHT (default 16,000px) is reached.
- * Stops early if no new content loads after a scroll.
- *
- * Optionally expands <details> elements and clicks "load more" buttons.
- *
- * Usage: on_Snapshot__45_infiniscroll.js --url=<url> --snapshot-id=<uuid>
- * Output: JSONL with scroll stats (no files created)
- *
- * Environment variables:
- *     INFINISCROLL_ENABLED: Enable/disable (default: true)
- *     INFINISCROLL_TIMEOUT: Max timeout in seconds (default: 120)
- *     INFINISCROLL_SCROLL_DELAY: Delay between scrolls in ms (default: 2000)
- *     INFINISCROLL_SCROLL_DISTANCE: Pixels per scroll (default: 1600)
- *     INFINISCROLL_SCROLL_LIMIT: Max scroll iterations (default: 10)
- *     INFINISCROLL_MIN_HEIGHT: Min page height to reach in px (default: 16000)
- *     INFINISCROLL_EXPAND_DETAILS: Expand <details> and comments (default: true)
- */
-
-const fs = require('fs');
-const path = require('path');
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-
-const {
-    getEnv,
-    getEnvBool,
-    getEnvInt,
-} = require('../chrome/chrome_utils.js');
-
-// Check if infiniscroll is enabled BEFORE requiring puppeteer
-if (!getEnvBool('INFINISCROLL_ENABLED', true)) {
-    console.error('Skipping infiniscroll (INFINISCROLL_ENABLED=False)');
-    process.exit(0);
-}
-
-const puppeteer = require('puppeteer-core');
-
-const PLUGIN_NAME = 'infiniscroll';
-const CHROME_SESSION_DIR = '../chrome';
-const CHROME_SESSION_REQUIRED_ERROR = 'No Chrome session found (chrome plugin must run first)';
-
-function parseArgs() {
-    const args = {};
-    process.argv.slice(2).forEach(arg => {
-        if (arg.startsWith('--')) {
-            const [key, ...valueParts] = arg.slice(2).split('=');
-            args[key.replace(/-/g, '_')] = valueParts.join('=') || true;
-        }
-    });
-    return args;
-}
-
-function getCdpUrl() {
-    const cdpFile = path.join(CHROME_SESSION_DIR, 'cdp_url.txt');
-    if (fs.existsSync(cdpFile)) {
-        return fs.readFileSync(cdpFile, 'utf8').trim();
-    }
-    return null;
-}
-
-function getPageId() {
-    const targetIdFile = path.join(CHROME_SESSION_DIR, 'target_id.txt');
-    if (fs.existsSync(targetIdFile)) {
-        return fs.readFileSync(targetIdFile, 'utf8').trim();
-    }
-    return null;
-}
-
-async function waitForChromeTabLoaded(timeoutMs = 60000) {
-    const navigationFile = path.join(CHROME_SESSION_DIR, 'navigation.json');
-    const startTime = Date.now();
-
-    while (Date.now() - startTime < timeoutMs) {
-        if (fs.existsSync(navigationFile)) {
-            return true;
-        }
-        await new Promise(resolve => setTimeout(resolve, 100));
-    }
-    return false;
-}
-
-function sleep(ms) {
-    return new Promise(resolve => setTimeout(resolve, ms));
-}
-
-/**
- * Expand <details> elements and click "load more" buttons for comments.
- * Based on archivebox.ts expandComments function.
- */
-async function expandDetails(page, options = {}) {
-    const {
-        timeout = 30000,
-        limit = 500,
-        delay = 500,
-    } = options;
-
-    const startTime = Date.now();
-
-    // First, expand all <details> elements
-    const detailsExpanded = await page.evaluate(() => {
-        let count = 0;
-        // Generic <details> elements
-        document.querySelectorAll('details:not([open])').forEach(el => {
-            el.open = true;
-            count++;
-        });
-        // Github README details sections
-        document.querySelectorAll('article details:not([open])').forEach(el => {
-            el.open = true;
-            count++;
-        });
-        // Github issue discussion hidden comments
-        document.querySelectorAll('div.js-discussion details:not(.details-overlay):not([open])').forEach(el => {
-            el.open = true;
-            count++;
-        });
-        // HedgeDoc/Markdown details sections
-        document.querySelectorAll('.markdown-body details:not([open])').forEach(el => {
-            el.open = true;
-            count++;
-        });
-        return count;
-    });
-
-    if (detailsExpanded > 0) {
-        console.error(`Expanded ${detailsExpanded} <details> elements`);
-    }
-
-    // Then click "load more" buttons for comments
-    const numExpanded = await page.evaluate(async ({ timeout, limit, delay }) => {
-        // Helper to find elements by XPath
-        function getElementsByXPath(xpath) {
-            const results = [];
-            const xpathResult = document.evaluate(
-                xpath,
-                document,
-                null,
-                XPathResult.ORDERED_NODE_ITERATOR_TYPE,
-                null
-            );
-            let node;
-            while ((node = xpathResult.iterateNext()) != null) {
-                results.push(node);
-            }
-            return results;
-        }
-
-        const wait = (ms) => new Promise(res => setTimeout(res, ms));
-
-        // Find all "load more" type buttons/links
-        const getLoadMoreLinks = () => [
-            // Reddit (new)
-            ...document.querySelectorAll('faceplate-partial[loading=action]'),
-            // Reddit (old) - show more replies
-            ...document.querySelectorAll('a[onclick^="return morechildren"]'),
-            // Reddit (old) - show hidden replies
-            ...document.querySelectorAll('a[onclick^="return togglecomment"]'),
-            // Twitter/X - show more replies
-            ...getElementsByXPath("//*[text()='Show more replies']"),
-            ...getElementsByXPath("//*[text()='Show replies']"),
-            // Generic "load more" / "show more" buttons
-            ...getElementsByXPath("//*[contains(text(),'Load more')]"),
-            ...getElementsByXPath("//*[contains(text(),'Show more')]"),
-            // Hacker News
-            ...document.querySelectorAll('a.morelink'),
-        ];
-
-        let expanded = 0;
-        let loadMoreLinks = getLoadMoreLinks();
-        const startTime = Date.now();
-
-        while (loadMoreLinks.length > 0) {
-            for (const link of loadMoreLinks) {
-                // Skip certain elements
-                if (link.slot === 'children') continue;
-
-                try {
-                    link.scrollIntoView({ behavior: 'smooth' });
-                    link.click();
-                    expanded++;
-                    await wait(delay);
-                } catch (e) {
-                    // Ignore click errors
-                }
-
-                // Check limits
-                if (expanded >= limit) return expanded;
-                if (Date.now() - startTime >= timeout) return expanded;
-            }
-
-            // Check for new load more links after clicking
-            await wait(delay);
-            loadMoreLinks = getLoadMoreLinks();
-        }
-
-        return expanded;
-    }, { timeout, limit, delay });
-
-    if (numExpanded > 0) {
-        console.error(`Clicked ${numExpanded} "load more" buttons`);
-    }
-
-    return {
-        detailsExpanded,
-        commentsExpanded: numExpanded,
-        total: detailsExpanded + numExpanded,
-    };
-}
-
-async function scrollDown(page, options = {}) {
-    const {
-        timeout = 120000,
-        scrollDelay = 2000,
-        scrollDistance = 1600,
-        scrollLimit = 10,
-        minHeight = 16000,
-    } = options;
-
-    const startTime = Date.now();
-
-    // Get page height using multiple methods (some pages use different scroll containers)
-    const getPageHeight = () => page.evaluate(() => {
-        return Math.max(
-            document.body.scrollHeight || 0,
-            document.body.offsetHeight || 0,
-            document.documentElement.scrollHeight || 0,
-            document.documentElement.offsetHeight || 0
-        );
-    });
-
-    const startingHeight = await getPageHeight();
-    let lastHeight = startingHeight;
-    let scrollCount = 0;
-    let scrollPosition = 0;
-
-    console.error(`Initial page height: ${startingHeight}px`);
-
-    // Scroll to top first
-    await page.evaluate(() => {
-        window.scrollTo({ top: 0, left: 0, behavior: 'smooth' });
-    });
-    await sleep(500);
-
-    while (scrollCount < scrollLimit) {
-        // Check timeout
-        const elapsed = Date.now() - startTime;
-        if (elapsed >= timeout) {
-            console.error(`Timeout reached after ${scrollCount} scrolls`);
-            break;
-        }
-
-        scrollPosition = (scrollCount + 1) * scrollDistance;
-        console.error(`Scrolling down ${scrollCount + 1}x ${scrollDistance}px... (${scrollPosition}/${lastHeight})`);
-
-        await page.evaluate((yOffset) => {
-            window.scrollTo({ top: yOffset, left: 0, behavior: 'smooth' });
-        }, scrollPosition);
-
-        scrollCount++;
-        await sleep(scrollDelay);
-
-        // Check if new content was added (infinite scroll detection)
-        const newHeight = await getPageHeight();
-        const addedPx = newHeight - lastHeight;
-
-        if (addedPx > 0) {
-            console.error(`Detected infini-scrolling: ${lastHeight}+${addedPx} => ${newHeight}`);
-        } else if (scrollPosition >= newHeight + scrollDistance) {
-            // Reached the bottom
-            if (scrollCount > 2) {
-                console.error(`Reached bottom of page at ${newHeight}px`);
-                break;
-            }
-        }
-
-        lastHeight = newHeight;
-
-        // Check if we've reached minimum height and can stop
-        if (lastHeight >= minHeight && scrollPosition >= lastHeight) {
-            console.error(`Reached minimum height target (${minHeight}px)`);
-            break;
-        }
-    }
-
-    // Scroll to absolute bottom
-    if (scrollPosition < lastHeight) {
-        await page.evaluate(() => {
-            window.scrollTo({ top: document.documentElement.scrollHeight, left: 0, behavior: 'smooth' });
-        });
-        await sleep(scrollDelay);
-    }
-
-    // Scroll back to top
-    console.error(`Reached bottom of page at ${lastHeight}px, scrolling back to top...`);
-    await page.evaluate(() => {
-        window.scrollTo({ top: 0, left: 0, behavior: 'smooth' });
-    });
-    await sleep(scrollDelay);
-
-    const totalElapsed = Date.now() - startTime;
-
-    return {
-        scrollCount,
-        finalHeight: lastHeight,
-        startingHeight,
-        elapsedMs: totalElapsed,
-    };
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-    const snapshotId = args.snapshot_id;
-
-    if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__45_infiniscroll.js --url=<url> --snapshot-id=<uuid>');
-        process.exit(1);
-    }
-
-    const timeout = getEnvInt('INFINISCROLL_TIMEOUT', 120) * 1000;
-    const scrollDelay = getEnvInt('INFINISCROLL_SCROLL_DELAY', 2000);
-    const scrollDistance = getEnvInt('INFINISCROLL_SCROLL_DISTANCE', 1600);
-    const scrollLimit = getEnvInt('INFINISCROLL_SCROLL_LIMIT', 10);
-    const minHeight = getEnvInt('INFINISCROLL_MIN_HEIGHT', 16000);
-    const expandDetailsEnabled = getEnvBool('INFINISCROLL_EXPAND_DETAILS', true);
-
-    const cdpUrl = getCdpUrl();
-    if (!cdpUrl) {
-        console.error(CHROME_SESSION_REQUIRED_ERROR);
-        process.exit(1);
-    }
-
-    // Wait for page to be loaded
-    const pageLoaded = await waitForChromeTabLoaded(60000);
-    if (!pageLoaded) {
-        console.error('ERROR: Page not loaded after 60s (chrome_navigate must complete first)');
-        process.exit(1);
-    }
-
-    let browser = null;
-    try {
-        browser = await puppeteer.connect({ browserWSEndpoint: cdpUrl });
-
-        const pages = await browser.pages();
-        if (pages.length === 0) {
-            throw new Error('No pages found in browser');
-        }
-
-        // Find the right page by target ID
-        const targetId = getPageId();
-        let page = null;
-        if (targetId) {
-            page = pages.find(p => {
-                const target = p.target();
-                return target && target._targetId === targetId;
-            });
-        }
-        if (!page) {
-            page = pages[pages.length - 1];
-        }
-
-        console.error(`Starting infinite scroll on ${url}`);
-
-        // Expand <details> and comments before scrolling (if enabled)
-        let expandResult = { total: 0, detailsExpanded: 0, commentsExpanded: 0 };
-        if (expandDetailsEnabled) {
-            console.error('Expanding <details> and comments...');
-            expandResult = await expandDetails(page, {
-                timeout: Math.min(timeout / 4, 30000),
-                limit: 500,
-                delay: scrollDelay / 4,
-            });
-        }
-
-        const result = await scrollDown(page, {
-            timeout,
-            scrollDelay,
-            scrollDistance,
-            scrollLimit,
-            minHeight,
-        });
-
-        // Expand again after scrolling (new content may have loaded)
-        if (expandDetailsEnabled) {
-            const expandResult2 = await expandDetails(page, {
-                timeout: Math.min(timeout / 4, 30000),
-                limit: 500,
-                delay: scrollDelay / 4,
-            });
-            expandResult.total += expandResult2.total;
-            expandResult.detailsExpanded += expandResult2.detailsExpanded;
-            expandResult.commentsExpanded += expandResult2.commentsExpanded;
-        }
-
-        browser.disconnect();
-
-        const elapsedSec = (result.elapsedMs / 1000).toFixed(1);
-        const finalHeightStr = result.finalHeight.toLocaleString();
-        const addedHeight = result.finalHeight - result.startingHeight;
-        const addedStr = addedHeight > 0 ? `+${addedHeight.toLocaleString()}px new content` : 'no new content';
-        const expandStr = expandResult.total > 0 ? `, expanded ${expandResult.total}` : '';
-        const outputStr = `scrolled to ${finalHeightStr}px (${addedStr}${expandStr}) over ${elapsedSec}s`;
-
-        console.error(`Success: ${outputStr}`);
-        console.log(JSON.stringify({
-            type: 'ArchiveResult',
-            status: 'succeeded',
-            output_str: outputStr,
-        }));
-        process.exit(0);
-
-    } catch (e) {
-        if (browser) browser.disconnect();
-        console.error(`ERROR: ${e.name}: ${e.message}`);
-        process.exit(1);
-    }
-}
-
-main().catch(e => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
--- a/archivebox/plugins/infiniscroll/templates/icon.html
+++ b/archivebox/plugins/infiniscroll/templates/icon.html
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--infiniscroll" title="Infinite Scroll"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M12 5v9"/><path d="M8 10l4 4 4-4"/><circle cx="6" cy="19" r="1" fill="currentColor" stroke="none"/><circle cx="12" cy="19" r="1" fill="currentColor" stroke="none"/><circle cx="18" cy="19" r="1" fill="currentColor" stroke="none"/></svg></span>
--- a/archivebox/plugins/infiniscroll/tests/test_infiniscroll.py
+++ b/archivebox/plugins/infiniscroll/tests/test_infiniscroll.py
@@ -1,245 +0,0 @@
-"""
-Integration tests for infiniscroll plugin
-
-Tests verify:
-1. Hook script exists
-2. Dependencies installed via chrome validation hooks
-3. Verify deps with abx-pkg
-4. INFINISCROLL_ENABLED=False skips without JSONL
-5. Fails gracefully when no chrome session exists
-6. Full integration test: scrolls page and outputs stats
-7. Config options work (scroll limit, min height)
-"""
-
-import json
-import os
-import re
-import subprocess
-import time
-import tempfile
-from pathlib import Path
-
-import pytest
-
-# Import shared Chrome test helpers
-from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-    get_test_env,
-    chrome_session,
-)
-
-
-PLUGIN_DIR = Path(__file__).parent.parent
-INFINISCROLL_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_infiniscroll.*'), None)
-TEST_URL = 'https://www.singsing.movie/'
-
-
-def test_hook_script_exists():
-    """Verify on_Snapshot hook exists."""
-    assert INFINISCROLL_HOOK is not None, "Infiniscroll hook not found"
-    assert INFINISCROLL_HOOK.exists(), f"Hook not found: {INFINISCROLL_HOOK}"
-
-
-def test_verify_deps_with_abx_pkg():
-    """Verify dependencies are available via abx-pkg after hook installation."""
-    from abx_pkg import Binary, EnvProvider, BinProviderOverrides
-
-    EnvProvider.model_rebuild()
-
-    # Verify node is available
-    node_binary = Binary(name='node', binproviders=[EnvProvider()])
-    node_loaded = node_binary.load()
-    assert node_loaded and node_loaded.abspath, "Node.js required for infiniscroll plugin"
-
-
-def test_config_infiniscroll_disabled_skips():
-    """Test that INFINISCROLL_ENABLED=False exits without emitting JSONL."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-        env = get_test_env()
-        env['INFINISCROLL_ENABLED'] = 'False'
-
-        result = subprocess.run(
-            ['node', str(INFINISCROLL_HOOK), f'--url={TEST_URL}', '--snapshot-id=test-disabled'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=30
-        )
-
-        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
-        assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
-
-        # Should NOT emit any JSONL
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, got: {jsonl_lines}"
-
-
-def test_fails_gracefully_without_chrome_session():
-    """Test that hook fails gracefully when no chrome session exists."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-        infiniscroll_dir = tmpdir / 'snapshot' / 'infiniscroll'
-        infiniscroll_dir.mkdir(parents=True, exist_ok=True)
-
-        result = subprocess.run(
-            ['node', str(INFINISCROLL_HOOK), f'--url={TEST_URL}', '--snapshot-id=test-no-chrome'],
-            cwd=infiniscroll_dir,
-            capture_output=True,
-            text=True,
-            env=get_test_env(),
-            timeout=30
-        )
-
-        # Should fail (exit 1) when no chrome session
-        assert result.returncode != 0, "Should fail when no chrome session exists"
-        # Error could be about chrome/CDP not found, or puppeteer module missing
-        err_lower = result.stderr.lower()
-        assert any(x in err_lower for x in ['chrome', 'cdp', 'puppeteer', 'module']), \
-            f"Should mention chrome/CDP/puppeteer in error: {result.stderr}"
-
-
-def test_scrolls_page_and_outputs_stats():
-    """Integration test: scroll page and verify JSONL output format."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        with chrome_session(
-            Path(tmpdir),
-            crawl_id='test-infiniscroll',
-            snapshot_id='snap-infiniscroll',
-            test_url=TEST_URL,
-        ) as (chrome_launch_process, chrome_pid, snapshot_chrome_dir, env):
-            # Create infiniscroll output directory (sibling to chrome)
-            infiniscroll_dir = snapshot_chrome_dir.parent / 'infiniscroll'
-            infiniscroll_dir.mkdir()
-
-            # Run infiniscroll hook
-            env['INFINISCROLL_SCROLL_LIMIT'] = '3'  # Limit scrolls for faster test
-            env['INFINISCROLL_SCROLL_DELAY'] = '500'  # Faster scrolling
-            env['INFINISCROLL_MIN_HEIGHT'] = '1000'  # Lower threshold for test
-
-            result = subprocess.run(
-                ['node', str(INFINISCROLL_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-infiniscroll'],
-                cwd=str(infiniscroll_dir),
-                capture_output=True,
-                text=True,
-                timeout=60,
-                env=env
-            )
-
-            assert result.returncode == 0, f"Infiniscroll failed: {result.stderr}\nStdout: {result.stdout}"
-
-            # Parse JSONL output
-            result_json = None
-            for line in result.stdout.strip().split('\n'):
-                line = line.strip()
-                if line.startswith('{'):
-                    try:
-                        record = json.loads(line)
-                        if record.get('type') == 'ArchiveResult':
-                            result_json = record
-                            break
-                    except json.JSONDecodeError:
-                        pass
-
-            assert result_json is not None, f"Should have ArchiveResult JSONL output. Stdout: {result.stdout}"
-            assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-            # Verify output_str format: "scrolled to X,XXXpx (+Y,YYYpx new content) over Z.Zs"
-            output_str = result_json.get('output_str', '')
-            assert output_str.startswith('scrolled to'), f"output_str should start with 'scrolled to': {output_str}"
-            assert 'px' in output_str, f"output_str should contain pixel count: {output_str}"
-            assert re.search(r'over \d+(\.\d+)?s', output_str), f"output_str should contain duration: {output_str}"
-
-            # Verify no files created in output directory
-            output_files = list(infiniscroll_dir.iterdir())
-            assert len(output_files) == 0, f"Should not create any files, but found: {output_files}"
-
-
-def test_config_scroll_limit_honored():
-    """Test that INFINISCROLL_SCROLL_LIMIT config is respected."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        with chrome_session(
-            Path(tmpdir),
-            crawl_id='test-scroll-limit',
-            snapshot_id='snap-limit',
-            test_url=TEST_URL,
-        ) as (chrome_launch_process, chrome_pid, snapshot_chrome_dir, env):
-
-            infiniscroll_dir = snapshot_chrome_dir.parent / 'infiniscroll'
-            infiniscroll_dir.mkdir()
-
-            # Set scroll limit to 2 (use env from setup_chrome_session)
-            env['INFINISCROLL_SCROLL_LIMIT'] = '2'
-            env['INFINISCROLL_SCROLL_DELAY'] = '500'
-            env['INFINISCROLL_MIN_HEIGHT'] = '100000'  # High threshold so limit kicks in
-
-            result = subprocess.run(
-                ['node', str(INFINISCROLL_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-limit'],
-                cwd=str(infiniscroll_dir),
-                capture_output=True,
-                text=True,
-                timeout=60,
-                env=env
-            )
-
-            assert result.returncode == 0, f"Infiniscroll failed: {result.stderr}"
-
-            # Parse output and verify scroll count
-            result_json = None
-            for line in result.stdout.strip().split('\n'):
-                if line.strip().startswith('{'):
-                    try:
-                        record = json.loads(line)
-                        if record.get('type') == 'ArchiveResult':
-                            result_json = record
-                            break
-                    except json.JSONDecodeError:
-                        pass
-
-            assert result_json is not None, "Should have JSONL output"
-            output_str = result_json.get('output_str', '')
-
-            # Verify output format and that it completed (scroll limit enforced internally)
-            assert output_str.startswith('scrolled to'), f"Should have valid output_str: {output_str}"
-            assert result_json['status'] == 'succeeded', f"Should succeed with scroll limit: {result_json}"
-
-
-
-def test_config_timeout_honored():
-    """Test that INFINISCROLL_TIMEOUT config is respected."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        with chrome_session(
-            Path(tmpdir),
-            crawl_id='test-timeout',
-            snapshot_id='snap-timeout',
-            test_url=TEST_URL,
-        ) as (chrome_launch_process, chrome_pid, snapshot_chrome_dir, env):
-
-            infiniscroll_dir = snapshot_chrome_dir.parent / 'infiniscroll'
-            infiniscroll_dir.mkdir()
-
-            # Set very short timeout (use env from setup_chrome_session)
-            env['INFINISCROLL_TIMEOUT'] = '3'  # 3 seconds
-            env['INFINISCROLL_SCROLL_DELAY'] = '2000'  # 2s delay - timeout should trigger
-            env['INFINISCROLL_SCROLL_LIMIT'] = '100'  # High limit
-            env['INFINISCROLL_MIN_HEIGHT'] = '100000'
-
-            start_time = time.time()
-            result = subprocess.run(
-                ['node', str(INFINISCROLL_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-timeout'],
-                cwd=str(infiniscroll_dir),
-                capture_output=True,
-                text=True,
-                timeout=30,
-                env=env
-            )
-            elapsed = time.time() - start_time
-
-            # Should complete within reasonable time (timeout + buffer)
-            assert elapsed < 15, f"Should respect timeout, took {elapsed:.1f}s"
-            assert result.returncode == 0, f"Should complete even with timeout: {result.stderr}"
-
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
--- a/archivebox/plugins/istilldontcareaboutcookies/config.json
+++ b/archivebox/plugins/istilldontcareaboutcookies/config.json
@@ -1,14 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "required_plugins": ["chrome"],
-  "properties": {
-    "ISTILLDONTCAREABOUTCOOKIES_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["USE_ISTILLDONTCAREABOUTCOOKIES"],
-      "description": "Enable I Still Don't Care About Cookies browser extension"
-    }
-  }
-}
--- a/Show More
+++ b/Show More
				`@@ -1 +0,0 @@`
				`<span class="abx-output-icon abx-output-icon--accessibility" title="Accessibility"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="4.5" r="2" fill="currentColor" stroke="none"/><path d="M4 7.5h16"/><path d="M12 7.5v12"/><path d="M7 20l5-6 5 6"/></svg></span>`
				`@@ -1 +0,0 @@`
				<span class="abx-output-icon abx-output-icon--headers" title="Headers"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><circle cx="4" cy="7" r="1" fill="currentColor" stroke="none"/><circle cx="4" cy="12" r="1" fill="currentColor" stroke="none"/><circle cx="4" cy="17" r="1" fill="currentColor" stroke="none"/><path d="M7 7h13"/><path d="M7 12h13"/><path d="M7 17h13"/></svg></span>