diff --git a/.claude/settings.local.json b/.claude/settings.local.json
index abce917c..77ce73ec 100644
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -30,7 +30,8 @@
       "WebFetch(domain:python-statemachine.readthedocs.io)",
       "Bash(./bin/run_plugin_tests.sh:*)",
       "Bash(done)",
-      "Bash(coverage erase:*)"
+      "Bash(coverage erase:*)",
+      "Bash(gh api:*)"
     ]
   },
   "hooks": {
diff --git a/README.md b/README.md
index 9a74338e..40598258 100644
--- a/README.md
+++ b/README.md
@@ -491,6 +491,7 @@ docker run -it -v $PWD:/data archivebox/archivebox help
 # optional: import your browser cookies into a persona for logged-in archiving
 archivebox persona create --import=chrome personal
 # supported: chrome/chromium/brave/edge (Chromium-based only)
+# use --profile to target a specific profile (e.g. Default, Profile 1)
 # re-running import merges/dedupes cookies.txt (by domain/path/name) but replaces chrome_user_data
 ```
 
diff --git a/archivebox/__init__.py b/archivebox/__init__.py
index 7d471b40..40eb6692 100755
--- a/archivebox/__init__.py
+++ b/archivebox/__init__.py
@@ -18,6 +18,7 @@ from pathlib import Path
 # Import uuid_compat early to monkey-patch uuid.uuid7 before Django loads migrations
 # This fixes migrations generated on Python 3.14+ that reference uuid.uuid7 directly
 from archivebox import uuid_compat  # noqa: F401
+from abx_plugins import get_plugins_dir
 
 # Force unbuffered output for real-time logs
 if hasattr(sys.stdout, 'reconfigure'):
@@ -56,9 +57,13 @@ check_io_encoding()
 # Install monkey patches for third-party libraries
 from .misc.monkey_patches import *                    # noqa
 
-# Built-in plugin directories
-BUILTIN_PLUGINS_DIR = PACKAGE_DIR / 'plugins'
-USER_PLUGINS_DIR = Path(os.getcwd()) / 'plugins'
+# Plugin directories
+BUILTIN_PLUGINS_DIR = Path(get_plugins_dir()).resolve()
+USER_PLUGINS_DIR = Path(
+    os.environ.get('ARCHIVEBOX_USER_PLUGINS_DIR')
+    or os.environ.get('USER_PLUGINS_DIR')
+    or os.environ.get('DATA_DIR', os.getcwd())
+) / 'custom_plugins'
 
 # These are kept for backwards compatibility with existing code
 # that checks for plugins. The new hook system uses discover_hooks()
diff --git a/archivebox/cli/archivebox_persona.py b/archivebox/cli/archivebox_persona.py
index 4a53e513..1e1d4e60 100644
--- a/archivebox/cli/archivebox_persona.py
+++ b/archivebox/cli/archivebox_persona.py
@@ -33,6 +33,7 @@ import shutil
 import platform
 import subprocess
 import tempfile
+import json
 from pathlib import Path
 from typing import Optional, Iterable
 from collections import OrderedDict
@@ -138,6 +139,55 @@ def get_edge_user_data_dir() -> Optional[Path]:
     return None
 
 
+def get_browser_binary(browser: str) -> Optional[str]:
+    system = platform.system()
+    home = Path.home()
+    browser = browser.lower()
+
+    if system == 'Darwin':
+        candidates = {
+            'chrome': ['/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'],
+            'chromium': ['/Applications/Chromium.app/Contents/MacOS/Chromium'],
+            'brave': ['/Applications/Brave Browser.app/Contents/MacOS/Brave Browser'],
+            'edge': ['/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge'],
+        }.get(browser, [])
+    elif system == 'Linux':
+        candidates = {
+            'chrome': ['/usr/bin/google-chrome', '/usr/bin/google-chrome-stable', '/usr/bin/google-chrome-beta', '/usr/bin/google-chrome-unstable'],
+            'chromium': ['/usr/bin/chromium', '/usr/bin/chromium-browser'],
+            'brave': ['/usr/bin/brave-browser', '/usr/bin/brave-browser-beta', '/usr/bin/brave-browser-nightly'],
+            'edge': ['/usr/bin/microsoft-edge', '/usr/bin/microsoft-edge-stable', '/usr/bin/microsoft-edge-beta', '/usr/bin/microsoft-edge-dev'],
+        }.get(browser, [])
+    elif system == 'Windows':
+        local_app_data = Path(os.environ.get('LOCALAPPDATA', home / 'AppData' / 'Local'))
+        candidates = {
+            'chrome': [
+                str(local_app_data / 'Google' / 'Chrome' / 'Application' / 'chrome.exe'),
+                'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe',
+                'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe',
+            ],
+            'chromium': [str(local_app_data / 'Chromium' / 'Application' / 'chrome.exe')],
+            'brave': [
+                str(local_app_data / 'BraveSoftware' / 'Brave-Browser' / 'Application' / 'brave.exe'),
+                'C:\\Program Files\\BraveSoftware\\Brave-Browser\\Application\\brave.exe',
+                'C:\\Program Files (x86)\\BraveSoftware\\Brave-Browser\\Application\\brave.exe',
+            ],
+            'edge': [
+                str(local_app_data / 'Microsoft' / 'Edge' / 'Application' / 'msedge.exe'),
+                'C:\\Program Files\\Microsoft\\Edge\\Application\\msedge.exe',
+                'C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe',
+            ],
+        }.get(browser, [])
+    else:
+        candidates = []
+
+    for candidate in candidates:
+        if candidate and Path(candidate).exists():
+            return candidate
+
+    return None
+
+
 BROWSER_PROFILE_FINDERS = {
     'chrome': get_chrome_user_data_dir,
     'chromium': get_chrome_user_data_dir,  # Same locations
@@ -194,7 +244,12 @@ def _merge_netscape_cookies(existing_file: Path, new_file: Path) -> None:
     _write_netscape_cookies(existing_file, existing)
 
 
-def extract_cookies_via_cdp(user_data_dir: Path, output_file: Path) -> bool:
+def extract_cookies_via_cdp(
+    user_data_dir: Path,
+    output_file: Path,
+    profile_dir: str | None = None,
+    chrome_binary: str | None = None,
+) -> bool:
     """
     Launch Chrome with the given user data dir and extract cookies via CDP.
 
@@ -218,6 +273,8 @@ def extract_cookies_via_cdp(user_data_dir: Path, output_file: Path) -> bool:
     env['NODE_MODULES_DIR'] = str(node_modules_dir)
     env['CHROME_USER_DATA_DIR'] = str(user_data_dir)
     env['CHROME_HEADLESS'] = 'true'
+    if chrome_binary:
+        env['CHROME_BINARY'] = str(chrome_binary)
     output_path = output_file
     temp_output = None
     temp_dir = None
@@ -225,6 +282,23 @@ def extract_cookies_via_cdp(user_data_dir: Path, output_file: Path) -> bool:
         temp_dir = Path(tempfile.mkdtemp(prefix='ab_cookies_'))
         temp_output = temp_dir / 'cookies.txt'
         output_path = temp_output
+    if profile_dir:
+        extra_arg = f'--profile-directory={profile_dir}'
+        existing_extra = env.get('CHROME_ARGS_EXTRA', '').strip()
+        args_list = []
+        if existing_extra:
+            if existing_extra.startswith('['):
+                try:
+                    parsed = json.loads(existing_extra)
+                    if isinstance(parsed, list):
+                        args_list.extend(str(x) for x in parsed)
+                except Exception:
+                    args_list.extend([s.strip() for s in existing_extra.split(',') if s.strip()])
+            else:
+                args_list.extend([s.strip() for s in existing_extra.split(',') if s.strip()])
+        args_list.append(extra_arg)
+        env['CHROME_ARGS_EXTRA'] = json.dumps(args_list)
+
     env['COOKIES_OUTPUT_FILE'] = str(output_path)
 
     try:
@@ -322,6 +396,7 @@ def ensure_path_within_personas_dir(persona_path: Path) -> bool:
 def create_personas(
     names: Iterable[str],
     import_from: Optional[str] = None,
+    profile: Optional[str] = None,
 ) -> int:
     """
     Create Personas from names.
@@ -360,6 +435,15 @@ def create_personas(
 
         rprint(f'[dim]Found {import_from} profile: {source_profile_dir}[/dim]', file=sys.stderr)
 
+        if profile is None and (source_profile_dir / 'Default').exists():
+            profile = 'Default'
+
+        browser_binary = get_browser_binary(import_from)
+        if browser_binary:
+            rprint(f'[dim]Using {import_from} binary: {browser_binary}[/dim]', file=sys.stderr)
+    else:
+        browser_binary = None
+
     created_count = 0
     for name in name_list:
         name = name.strip()
@@ -414,7 +498,12 @@ def create_personas(
                 # Extract cookies via CDP
                 rprint(f'[dim]Extracting cookies via CDP...[/dim]', file=sys.stderr)
 
-                if extract_cookies_via_cdp(persona_chrome_dir, cookies_file):
+                if extract_cookies_via_cdp(
+                    persona_chrome_dir,
+                    cookies_file,
+                    profile_dir=profile,
+                    chrome_binary=browser_binary,
+                ):
                     rprint(f'[green]Extracted cookies to {cookies_file}[/green]', file=sys.stderr)
                 else:
                     rprint(f'[yellow]Could not extract cookies automatically.[/yellow]', file=sys.stderr)
@@ -652,9 +741,10 @@ def main():
 @main.command('create')
 @click.argument('names', nargs=-1)
 @click.option('--import', 'import_from', help='Import profile from browser (chrome, chromium, brave, edge)')
-def create_cmd(names: tuple, import_from: Optional[str]):
+@click.option('--profile', help='Profile directory name under the user data dir (e.g. Default, Profile 1)')
+def create_cmd(names: tuple, import_from: Optional[str], profile: Optional[str]):
     """Create Personas, optionally importing from a browser profile."""
-    sys.exit(create_personas(names, import_from=import_from))
+    sys.exit(create_personas(names, import_from=import_from, profile=profile))
 
 
 @main.command('list')
diff --git a/archivebox/config/views.py b/archivebox/config/views.py
index 67805c7d..316e1aa3 100644
--- a/archivebox/config/views.py
+++ b/archivebox/config/views.py
@@ -277,7 +277,7 @@ def plugins_list_view(request: HttpRequest, **kwargs) -> TableContext:
         # Show a helpful message when no plugins found
         rows['Name'].append('(no plugins found)')
         rows['Source'].append('-')
-        rows['Path'].append(mark_safe('<code>archivebox/plugins/</code> or <code>data/plugins/</code>'))
+        rows['Path'].append(mark_safe('<code>abx_plugins/plugins/</code> or <code>data/custom_plugins/</code>'))
         rows['Hooks'].append('-')
         rows['Config'].append('-')
 
diff --git a/archivebox/core/admin_snapshots.py b/archivebox/core/admin_snapshots.py
index 25c89e15..6d01c25b 100644
--- a/archivebox/core/admin_snapshots.py
+++ b/archivebox/core/admin_snapshots.py
@@ -140,6 +140,10 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
     list_filter = ('created_at', 'downloaded_at', 'archiveresult__status', 'crawl__created_by', TagNameListFilter)
 
     fieldsets = (
+        ('Actions', {
+            'fields': ('admin_actions',),
+            'classes': ('card', 'wide', 'actions-card'),
+        }),
         ('URL', {
             'fields': ('url', 'title'),
             'classes': ('card', 'wide'),
@@ -168,10 +172,6 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
             'fields': ('output_dir',),
             'classes': ('card',),
         }),
-        ('Actions', {
-            'fields': ('admin_actions',),
-            'classes': ('card', 'wide'),
-        }),
         ('Archive Results', {
             'fields': ('archiveresults_list',),
             'classes': ('card', 'wide'),
@@ -179,7 +179,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
     )
 
     ordering = ['-created_at']
-    actions = ['add_tags', 'remove_tags', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
+    actions = ['add_tags', 'remove_tags', 'resnapshot_snapshot', 'update_snapshots', 'overwrite_snapshots', 'delete_snapshots']
     inlines = []  # Removed TagInline, using TagEditorWidget instead
     list_per_page = min(max(5, SERVER_CONFIG.SNAPSHOTS_PER_PAGE), 5000)
 
@@ -301,6 +301,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
     #         obj.pk,
     #     )
 
+    @admin.display(description='')
     def admin_actions(self, obj):
         summary_url = build_web_url(f'/{obj.archive_path}')
         results_url = build_web_url(f'/{obj.archive_path}/index.html#all')
@@ -311,13 +312,13 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
                    href="{}"
                    onmouseover="this.style.background='#f1f5f9'; this.style.borderColor='#cbd5e1';"
                    onmouseout="this.style.background='#f8fafc'; this.style.borderColor='#e2e8f0';">
-                    📄 Summary Page
+                    📄 View Snapshot
                 </a>
                 <a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #f8fafc; border: 1px solid #e2e8f0; border-radius: 8px; color: #334155; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
                    href="{}"
                    onmouseover="this.style.background='#f1f5f9'; this.style.borderColor='#cbd5e1';"
                    onmouseout="this.style.background='#f8fafc'; this.style.borderColor='#e2e8f0';">
-                    📁 Result Files
+                    📁 All files
                 </a>
                 <a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #f8fafc; border: 1px solid #e2e8f0; border-radius: 8px; color: #334155; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
                    href="{}"
@@ -329,19 +330,19 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
 
                 <span style="border-left: 1px solid #e2e8f0; height: 24px; margin: 0 4px;"></span>
 
-                <a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #ecfdf5; border: 1px solid #a7f3d0; border-radius: 8px; color: #065f46; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
-                   href="/admin/core/snapshot/?id__exact={}"
-                   title="Get missing extractors"
-                   onmouseover="this.style.background='#d1fae5';"
-                   onmouseout="this.style.background='#ecfdf5';">
-                    ⬇️ Finish
-                </a>
                 <a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #eff6ff; border: 1px solid #bfdbfe; border-radius: 8px; color: #1e40af; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
                    href="/admin/core/snapshot/?id__exact={}"
                    title="Create a fresh new snapshot of this URL"
                    onmouseover="this.style.background='#dbeafe';"
                    onmouseout="this.style.background='#eff6ff';">
-                    🆕 Archive Again
+                    🆕 Archive Now
+                </a>
+                <a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #ecfdf5; border: 1px solid #a7f3d0; border-radius: 8px; color: #065f46; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
+                   href="/admin/core/snapshot/?id__exact={}"
+                   title="Redo failed extractors (missing outputs)"
+                   onmouseover="this.style.background='#d1fae5';"
+                   onmouseout="this.style.background='#ecfdf5';">
+                    🔁 Redo Failed
                 </a>
                 <a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #fffbeb; border: 1px solid #fde68a; border-radius: 8px; color: #92400e; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
                    href="/admin/core/snapshot/?id__exact={}"
@@ -707,7 +708,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
     #     return super().changelist_view(request, extra_context=None)
 
     @admin.action(
-        description="⏯️ Finish"
+        description="🔁 Redo Failed"
     )
     def update_snapshots(self, request, queryset):
         count = queryset.count()
@@ -721,7 +722,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
 
 
     @admin.action(
-        description="⬇️ Fresh"
+        description="🆕 Archive Now"
     )
     def resnapshot_snapshot(self, request, queryset):
         for snapshot in queryset:
diff --git a/archivebox/core/models.py b/archivebox/core/models.py
index 10c44c2a..79807277 100755
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -1704,8 +1704,8 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
         Create ArchiveResult records for all enabled hooks.
 
         Uses the hooks system to discover available hooks from:
-        - archivebox/plugins/*/on_Snapshot__*.{py,sh,js}
-        - data/plugins/*/on_Snapshot__*.{py,sh,js}
+        - abx_plugins/plugins/*/on_Snapshot__*.{py,sh,js}
+        - data/custom_plugins/*/on_Snapshot__*.{py,sh,js}
 
         Creates one ArchiveResult per hook (not per plugin), with hook_name set.
         This enables step-based execution where all hooks in a step can run in parallel.
@@ -2486,7 +2486,7 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
     @property
     def plugin_module(self) -> Any | None:
         # Hook scripts are now used instead of Python plugin modules
-        # The plugin name maps to hooks in archivebox/plugins/{plugin}/
+        # The plugin name maps to hooks in abx_plugins/plugins/{plugin}/
         return None
 
     def output_exists(self) -> bool:
diff --git a/archivebox/core/templatetags/core_tags.py b/archivebox/core/templatetags/core_tags.py
index e9a38023..859a4c6f 100644
--- a/archivebox/core/templatetags/core_tags.py
+++ b/archivebox/core/templatetags/core_tags.py
@@ -349,15 +349,6 @@ def plugin_name(value: str) -> str:
     return get_plugin_name(value)
 
 
-@register.filter
-def plugin_display_name(value: str) -> str:
-    """
-    Human-friendly plugin name overrides for UI display.
-    """
-    name = get_plugin_name(value)
-    if name == 'merkletree':
-        return 'hashes'
-    return name
 
 
 @register.simple_tag(takes_context=True)
diff --git a/archivebox/core/views.py b/archivebox/core/views.py
index 7225cd8e..fb7fabe7 100644
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -1145,13 +1145,31 @@ def live_progress_view(request):
         for proc in running_workers:
             env = proc.env or {}
             if not isinstance(env, dict):
-                continue
+                env = {}
+
+            cmd = proc.cmd or []
             if proc.worker_type == 'crawl':
                 crawl_id = env.get('CRAWL_ID')
+                if not crawl_id:
+                    for i, part in enumerate(cmd):
+                        if part == '--crawl-id' and i + 1 < len(cmd):
+                            crawl_id = cmd[i + 1]
+                            break
+                        if part.startswith('--crawl-id='):
+                            crawl_id = part.split('=', 1)[1]
+                            break
                 if crawl_id:
                     crawl_worker_pids[str(crawl_id)] = proc.pid
             elif proc.worker_type == 'snapshot':
                 snapshot_id = env.get('SNAPSHOT_ID')
+                if not snapshot_id:
+                    for i, part in enumerate(cmd):
+                        if part == '--snapshot-id' and i + 1 < len(cmd):
+                            snapshot_id = cmd[i + 1]
+                            break
+                        if part.startswith('--snapshot-id='):
+                            snapshot_id = part.split('=', 1)[1]
+                            break
                 if snapshot_id:
                     snapshot_worker_pids[str(snapshot_id)] = proc.pid
 
@@ -1243,7 +1261,7 @@ def live_progress_view(request):
                         'plugin': ar.plugin,
                         'status': status,
                     }
-                    if ar.process_id and ar.process and ar.process.status == Process.StatusChoices.RUNNING:
+                    if status == ArchiveResult.StatusChoices.STARTED and ar.process_id and ar.process:
                         plugin_payload['pid'] = ar.process.pid
                     if status == ArchiveResult.StatusChoices.STARTED:
                         plugin_payload['progress'] = progress_value
diff --git a/archivebox/hooks.py b/archivebox/hooks.py
index b8429c11..1fab24af 100644
--- a/archivebox/hooks.py
+++ b/archivebox/hooks.py
@@ -6,8 +6,8 @@ with ArchiveBox via CLI arguments and stdout JSON output. This keeps the plugin
 system simple and language-agnostic.
 
 Directory structure:
-    archivebox/plugins/<plugin_name>/on_<Event>__<hook_name>.<ext>  (built-in)
-    data/plugins/<plugin_name>/on_<Event>__<hook_name>.<ext>        (user)
+    abx_plugins/plugins/<plugin_name>/on_<Event>__<hook_name>.<ext>     (built-in package)
+    data/custom_plugins/<plugin_name>/on_<Event>__<hook_name>.<ext>     (user)
 
 Hook contract:
     Input:  --url=<url> (and other --key=value args)
@@ -66,14 +66,20 @@ from functools import lru_cache
 from pathlib import Path
 from typing import List, Dict, Any, Optional, TypedDict
 
+from abx_plugins import get_plugins_dir
 from django.conf import settings
 from django.utils import timezone
 from django.utils.safestring import mark_safe
+from archivebox.config.constants import CONSTANTS
 
 
 # Plugin directories
-BUILTIN_PLUGINS_DIR = Path(__file__).parent / 'plugins'
-USER_PLUGINS_DIR = Path(getattr(settings, 'DATA_DIR', Path.cwd())) / 'plugins'
+BUILTIN_PLUGINS_DIR = Path(get_plugins_dir()).resolve()
+USER_PLUGINS_DIR = Path(
+    os.environ.get('ARCHIVEBOX_USER_PLUGINS_DIR')
+    or getattr(settings, 'USER_PLUGINS_DIR', '')
+    or str(CONSTANTS.USER_PLUGINS_DIR)
+).expanduser()
 
 
 # =============================================================================
@@ -197,11 +203,11 @@ def discover_hooks(
 
         for hook in hooks:
             # Get plugin name from parent directory
-            # e.g., archivebox/plugins/wget/on_Snapshot__50_wget.py -> 'wget'
+            # e.g., abx_plugins/plugins/wget/on_Snapshot__50_wget.py -> 'wget'
             plugin_name = hook.parent.name
 
             # Check if this is a plugin directory (not the root plugins dir)
-            if plugin_name in ('plugins', '.'):
+            if hook.parent.resolve() in (BUILTIN_PLUGINS_DIR.resolve(), USER_PLUGINS_DIR.resolve()):
                 # Hook is in root plugins directory, not a plugin subdir
                 # Include it by default (no filtering for non-plugin hooks)
                 enabled_hooks.append(hook)
@@ -581,7 +587,7 @@ def get_plugins() -> List[str]:
     The plugin name is the plugin directory name, not the hook script name.
 
     Example:
-    archivebox/plugins/chrome/on_Snapshot__10_chrome_tab.bg.js
+    abx_plugins/plugins/chrome/on_Snapshot__10_chrome_tab.bg.js
     -> plugin = 'chrome'
 
     Sorted alphabetically (plugins control their hook order via numeric prefixes in hook names).
@@ -728,7 +734,7 @@ def discover_plugins_that_provide_interface(
             try:
                 # Import the module dynamically
                 spec = importlib.util.spec_from_file_location(
-                    f'archivebox.plugins.{plugin_name}.{module_name}',
+                    f'archivebox.dynamic_plugins.{plugin_name}.{module_name}',
                     module_path
                 )
                 if spec is None or spec.loader is None:
@@ -942,7 +948,7 @@ def get_plugin_special_config(plugin_name: str, config: Dict[str, Any]) -> Dict[
 # Plugins can provide custom templates for rendering their output in the UI.
 # Templates are discovered by filename convention inside each plugin's templates/ dir:
 #
-#     archivebox/plugins/<plugin_name>/
+#     abx_plugins/plugins/<plugin_name>/
 #         templates/
 #             icon.html          # Icon for admin table view (small inline HTML)
 #             card.html          # Preview card for snapshot header
diff --git a/archivebox/plugins/search_backend_ripgrep/__init__.py b/archivebox/ideas/__init__.py
similarity index 100%
rename from archivebox/plugins/search_backend_ripgrep/__init__.py
rename to archivebox/ideas/__init__.py
diff --git a/archivebox/ideas/process_plugin.py b/archivebox/ideas/process_plugin.py
new file mode 100644
index 00000000..cca7e743
--- /dev/null
+++ b/archivebox/ideas/process_plugin.py
@@ -0,0 +1,318 @@
+__package__ = 'archivebox.ideas'
+
+import asyncio
+import json
+import os
+import shlex
+import signal
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Callable, Mapping, MutableMapping, Optional
+
+from pydantic import BaseModel, Field
+
+try:
+    from bubus import BaseEvent, EventBus
+except Exception as exc:  # pragma: no cover - optional dependency
+    raise ImportError('ProcessPlugin requires bubus to be installed') from exc
+
+try:
+    from bubus.service import uuid7str
+except Exception:  # pragma: no cover - optional dependency
+    from uuid import uuid4 as _uuid4
+
+    def uuid7str() -> str:
+        return str(_uuid4())
+
+
+def _utcnow() -> datetime:
+    return datetime.now(timezone.utc)
+
+
+class ProcessRecord(BaseModel):
+    id: str = Field(default_factory=uuid7str)
+    cmd: list[str]
+    cwd: str | None = None
+    env: dict[str, str] = Field(default_factory=dict)
+    pid: int | None = None
+    started_at: datetime | None = None
+    ended_at: datetime | None = None
+    exit_code: int | None = None
+    stdout_path: str | None = None
+    stderr_path: str | None = None
+    cmd_path: str | None = None
+    pid_path: str | None = None
+    is_background: bool = False
+    parent_process_id: str | None = None
+
+
+class ProcessLaunch(BaseEvent[ProcessRecord]):
+    cmd: list[str]
+    cwd: str | None = None
+    env: dict[str, str] | None = None
+    timeout: float | None = None
+    output_dir: str | None = None
+    log_prefix: str | None = None
+    is_background: bool = False
+    parent_process_id: str | None = None
+    parse_stdout_events: bool = True
+
+
+class ProcessStarted(BaseEvent[None]):
+    process: ProcessRecord
+
+
+class ProcessExited(BaseEvent[None]):
+    process: ProcessRecord
+
+
+class ProcessKill(BaseEvent[ProcessRecord]):
+    process_id: str
+    signal: int = signal.SIGTERM
+    timeout: float | None = 10.0
+
+
+@dataclass
+class _RunningProcess:
+    process: asyncio.subprocess.Process
+    record: ProcessRecord
+    stdout_task: asyncio.Task[None] | None
+    stderr_task: asyncio.Task[None] | None
+    watcher_task: asyncio.Task[None] | None
+    parent_event_id: str | None
+
+
+JsonEventAdapter = Callable[[dict[str, Any], str | None], Optional[BaseEvent[Any]]]
+
+
+class ProcessPlugin:
+    """Spawn and monitor processes using events (no Django required)."""
+
+    def __init__(
+        self,
+        bus: EventBus,
+        *,
+        env: Mapping[str, str] | None = None,
+        json_event_adapter: JsonEventAdapter | None = None,
+    ) -> None:
+        self.bus = bus
+        self.env = dict(env or os.environ)
+        self.json_event_adapter = json_event_adapter
+        self._running: MutableMapping[str, _RunningProcess] = {}
+
+    def register_event_handlers(self) -> None:
+        self.bus.on(ProcessLaunch, self.on_ProcessLaunch)
+        self.bus.on(ProcessKill, self.on_ProcessKill)
+
+    async def on_ProcessLaunch(self, event: ProcessLaunch) -> ProcessRecord:
+        parent_event_id = event.event_id
+        proc_id = uuid7str()
+        cwd = event.cwd or event.output_dir or os.getcwd()
+        output_dir = Path(event.output_dir or cwd)
+        output_dir.mkdir(parents=True, exist_ok=True)
+
+        env = {**self.env, **(event.env or {})}
+
+        log_prefix = event.log_prefix or proc_id
+        stdout_path = output_dir / f'{log_prefix}.stdout.log'
+        stderr_path = output_dir / f'{log_prefix}.stderr.log'
+        cmd_path = output_dir / f'{log_prefix}.sh'
+        pid_path = output_dir / f'{log_prefix}.pid'
+
+        self._write_cmd_file(cmd_path, event.cmd)
+
+        proc = await asyncio.create_subprocess_exec(
+            *event.cmd,
+            cwd=str(cwd),
+            env=env,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+            start_new_session=True,
+        )
+
+        self._write_pid_file(pid_path, proc.pid)
+
+        record = ProcessRecord(
+            id=proc_id,
+            cmd=event.cmd,
+            cwd=str(cwd),
+            env=env,
+            pid=proc.pid,
+            started_at=_utcnow(),
+            stdout_path=str(stdout_path),
+            stderr_path=str(stderr_path),
+            cmd_path=str(cmd_path),
+            pid_path=str(pid_path),
+            is_background=event.is_background,
+            parent_process_id=event.parent_process_id,
+        )
+
+        await event.event_bus.dispatch(
+            ProcessStarted(process=record, event_parent_id=parent_event_id)
+        )
+
+        stdout_task = asyncio.create_task(
+            self._consume_stream(
+                proc.stdout, stdout_path, parent_event_id, event.parse_stdout_events
+            )
+        )
+        stderr_task = asyncio.create_task(
+            self._consume_stream(proc.stderr, stderr_path, parent_event_id, False)
+        )
+
+        running = _RunningProcess(
+            process=proc,
+            record=record,
+            stdout_task=stdout_task,
+            stderr_task=stderr_task,
+            watcher_task=None,
+            parent_event_id=parent_event_id,
+        )
+        self._running[proc_id] = running
+
+        if event.is_background:
+            running.watcher_task = asyncio.create_task(
+                self._watch_process(proc_id, event.timeout)
+            )
+            return record
+
+        await self._watch_process(proc_id, event.timeout)
+        return self._running.get(proc_id, running).record
+
+    async def on_ProcessKill(self, event: ProcessKill) -> ProcessRecord:
+        running = self._running.get(event.process_id)
+        if not running:
+            raise RuntimeError(f'Process not found: {event.process_id}')
+
+        proc = running.process
+        self._terminate_process(proc, event.signal)
+
+        if event.timeout is not None:
+            try:
+                await asyncio.wait_for(proc.wait(), timeout=event.timeout)
+            except asyncio.TimeoutError:
+                self._terminate_process(proc, signal.SIGKILL)
+        else:
+            await proc.wait()
+
+        await self._finalize_process(event.process_id)
+        return self._running.get(event.process_id, running).record
+
+    async def _watch_process(self, process_id: str, timeout: float | None) -> None:
+        running = self._running.get(process_id)
+        if not running:
+            return
+        proc = running.process
+        try:
+            if timeout is not None:
+                await asyncio.wait_for(proc.wait(), timeout=timeout)
+            else:
+                await proc.wait()
+        except asyncio.TimeoutError:
+            self._terminate_process(proc, signal.SIGTERM)
+            await asyncio.sleep(2)
+            if proc.returncode is None:
+                self._terminate_process(proc, signal.SIGKILL)
+                await proc.wait()
+        await self._finalize_process(process_id)
+
+    async def _finalize_process(self, process_id: str) -> None:
+        running = self._running.get(process_id)
+        if not running:
+            return
+
+        proc = running.process
+        record = running.record
+
+        if running.stdout_task:
+            await running.stdout_task
+        if running.stderr_task:
+            await running.stderr_task
+
+        record.exit_code = proc.returncode
+        record.ended_at = _utcnow()
+
+        await self.bus.dispatch(
+            ProcessExited(process=record, event_parent_id=running.parent_event_id)
+        )
+
+        self._running.pop(process_id, None)
+
+    async def _consume_stream(
+        self,
+        stream: asyncio.StreamReader | None,
+        path: Path,
+        parent_event_id: str | None,
+        parse_events: bool,
+    ) -> None:
+        if stream is None:
+            return
+        with path.open('w', encoding='utf-8') as fh:
+            while True:
+                line = await stream.readline()
+                if not line:
+                    break
+                text = line.decode('utf-8', errors='replace')
+                fh.write(text)
+                fh.flush()
+                if parse_events:
+                    await self._maybe_dispatch_json_event(text, parent_event_id)
+
+    async def _maybe_dispatch_json_event(self, line: str, parent_event_id: str | None) -> None:
+        text = line.strip()
+        if not text.startswith('{') or not text.endswith('}'):
+            return
+        try:
+            data = json.loads(text)
+        except json.JSONDecodeError:
+            return
+
+        event = None
+        if self.json_event_adapter:
+            event = self.json_event_adapter(data, parent_event_id)
+        elif isinstance(data, dict) and 'event_type' in data:
+            try:
+                event = BaseEvent.model_validate(data)
+            except Exception:
+                event = None
+
+        if event is None:
+            return
+
+        if not getattr(event, 'event_parent_id', None) and parent_event_id:
+            event.event_parent_id = parent_event_id
+        await self.bus.dispatch(event)
+
+    @staticmethod
+    def _write_cmd_file(path: Path, cmd: list[str]) -> None:
+        cmd_line = ' '.join(shlex.quote(part) for part in cmd)
+        path.write_text(cmd_line + '\n', encoding='utf-8')
+
+    @staticmethod
+    def _write_pid_file(path: Path, pid: int) -> None:
+        path.write_text(str(pid), encoding='utf-8')
+        ts = datetime.now().timestamp()
+        os.utime(path, (ts, ts))
+
+    @staticmethod
+    def _terminate_process(proc: asyncio.subprocess.Process, sig: int) -> None:
+        if proc.returncode is not None:
+            return
+        try:
+            os.killpg(proc.pid, sig)
+        except Exception:
+            try:
+                os.kill(proc.pid, sig)
+            except Exception:
+                pass
+
+
+__all__ = [
+    'ProcessRecord',
+    'ProcessLaunch',
+    'ProcessStarted',
+    'ProcessExited',
+    'ProcessKill',
+    'ProcessPlugin',
+]
diff --git a/archivebox/plugins/accessibility/config.json b/archivebox/plugins/accessibility/config.json
deleted file mode 100644
index 208d2332..00000000
--- a/archivebox/plugins/accessibility/config.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "required_plugins": ["chrome"],
-  "properties": {
-    "ACCESSIBILITY_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_ACCESSIBILITY", "USE_ACCESSIBILITY"],
-      "description": "Enable accessibility tree capture"
-    },
-    "ACCESSIBILITY_TIMEOUT": {
-      "type": "integer",
-      "default": 30,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for accessibility capture in seconds"
-    }
-  }
-}
diff --git a/archivebox/plugins/accessibility/on_Snapshot__39_accessibility.js b/archivebox/plugins/accessibility/on_Snapshot__39_accessibility.js
deleted file mode 100755
index 7b73a422..00000000
--- a/archivebox/plugins/accessibility/on_Snapshot__39_accessibility.js
+++ /dev/null
@@ -1,288 +0,0 @@
-#!/usr/bin/env node
-/**
- * Extract accessibility tree and page outline from a URL.
- *
- * Extracts:
- * - Page outline (headings h1-h6, sections, articles)
- * - Iframe tree
- * - Accessibility snapshot
- * - ARIA labels and roles
- *
- * Usage: on_Snapshot__39_accessibility.js --url=<url> --snapshot-id=<uuid>
- * Output: Writes accessibility/accessibility.json
- *
- * Environment variables:
- *     SAVE_ACCESSIBILITY: Enable accessibility extraction (default: true)
- */
-
-const fs = require('fs');
-const path = require('path');
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-const puppeteer = require('puppeteer-core');
-
-// Extractor metadata
-const PLUGIN_NAME = 'accessibility';
-const OUTPUT_DIR = '.';
-const OUTPUT_FILE = 'accessibility.json';
-const CHROME_SESSION_DIR = '../chrome';
-const CHROME_SESSION_REQUIRED_ERROR = 'No Chrome session found (chrome plugin must run first)';
-
-// Parse command line arguments
-function parseArgs() {
-    const args = {};
-    process.argv.slice(2).forEach(arg => {
-        if (arg.startsWith('--')) {
-            const [key, ...valueParts] = arg.slice(2).split('=');
-            args[key.replace(/-/g, '_')] = valueParts.join('=') || true;
-        }
-    });
-    return args;
-}
-
-// Get environment variable with default
-function getEnv(name, defaultValue = '') {
-    return (process.env[name] || defaultValue).trim();
-}
-
-function getEnvBool(name, defaultValue = false) {
-    const val = getEnv(name, '').toLowerCase();
-    if (['true', '1', 'yes', 'on'].includes(val)) return true;
-    if (['false', '0', 'no', 'off'].includes(val)) return false;
-    return defaultValue;
-}
-
-// Wait for chrome tab to be fully loaded
-async function waitForChromeTabLoaded(timeoutMs = 60000) {
-    const navigationFile = path.join(CHROME_SESSION_DIR, 'navigation.json');
-    const startTime = Date.now();
-
-    while (Date.now() - startTime < timeoutMs) {
-        if (fs.existsSync(navigationFile)) {
-            return true;
-        }
-        // Wait 100ms before checking again
-        await new Promise(resolve => setTimeout(resolve, 100));
-    }
-
-    return false;
-}
-
-// Get CDP URL from chrome plugin
-function getCdpUrl() {
-    const cdpFile = path.join(CHROME_SESSION_DIR, 'cdp_url.txt');
-    if (fs.existsSync(cdpFile)) {
-        return fs.readFileSync(cdpFile, 'utf8').trim();
-    }
-    return null;
-}
-
-function assertChromeSession() {
-    const cdpFile = path.join(CHROME_SESSION_DIR, 'cdp_url.txt');
-    const targetIdFile = path.join(CHROME_SESSION_DIR, 'target_id.txt');
-    const pidFile = path.join(CHROME_SESSION_DIR, 'chrome.pid');
-    if (!fs.existsSync(cdpFile) || !fs.existsSync(targetIdFile) || !fs.existsSync(pidFile)) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-    try {
-        const pid = parseInt(fs.readFileSync(pidFile, 'utf8').trim(), 10);
-        if (!pid || Number.isNaN(pid)) throw new Error('Invalid pid');
-        process.kill(pid, 0);
-    } catch (e) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-    const cdpUrl = getCdpUrl();
-    if (!cdpUrl) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-    return cdpUrl;
-}
-
-// Extract accessibility info
-async function extractAccessibility(url) {
-    // Output directory is current directory (hook already runs in output dir)
-    const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
-
-    let browser = null;
-
-    try {
-        // Connect to existing Chrome session
-        const cdpUrl = assertChromeSession();
-
-        browser = await puppeteer.connect({
-            browserWSEndpoint: cdpUrl,
-        });
-
-        // Get the page
-        const pages = await browser.pages();
-        const page = pages.find(p => p.url().startsWith('http')) || pages[0];
-
-        if (!page) {
-            return { success: false, error: 'No page found in Chrome session' };
-        }
-
-        // Get accessibility snapshot
-        const accessibilityTree = await page.accessibility.snapshot({ interestingOnly: true });
-
-        // Extract page outline (headings, sections, etc.)
-        const outline = await page.evaluate(() => {
-            const headings = [];
-            const elements = document.querySelectorAll(
-                'h1, h2, h3, h4, h5, h6, a[name], header, footer, article, main, aside, nav, section, figure, summary, table, form, iframe'
-            );
-
-            elements.forEach(elem => {
-                // Skip unnamed anchors
-                if (elem.tagName.toLowerCase() === 'a' && !elem.name) return;
-
-                const tagName = elem.tagName.toLowerCase();
-                const elemId = elem.id || elem.name || elem.getAttribute('aria-label') || elem.role || '';
-                const elemClasses = (elem.className || '').toString().trim().split(/\s+/).slice(0, 3).join(' .');
-                const action = elem.action?.split('/').pop() || '';
-
-                let summary = (elem.innerText || '').slice(0, 128);
-                if (summary.length >= 128) summary += '...';
-
-                let prefix = '';
-                let title = '';
-
-                // Format headings with # prefix
-                const level = parseInt(tagName.replace('h', ''));
-                if (!isNaN(level)) {
-                    prefix = '#'.repeat(level);
-                    title = elem.innerText || elemId || elemClasses;
-                } else {
-                    // For other elements, create breadcrumb path
-                    const parents = [tagName];
-                    let node = elem.parentNode;
-                    while (node && parents.length < 5) {
-                        if (node.tagName) {
-                            const tag = node.tagName.toLowerCase();
-                            if (!['div', 'span', 'p', 'body', 'html'].includes(tag)) {
-                                parents.unshift(tag);
-                            } else {
-                                parents.unshift('');
-                            }
-                        }
-                        node = node.parentNode;
-                    }
-                    prefix = parents.join('>');
-
-                    title = elemId ? `#${elemId}` : '';
-                    if (!title && elemClasses) title = `.${elemClasses}`;
-                    if (action) title += ` /${action}`;
-                    if (summary && !title.includes(summary)) title += `: ${summary}`;
-                }
-
-                // Clean up title
-                title = title.replace(/\s+/g, ' ').trim();
-
-                if (prefix) {
-                    headings.push(`${prefix} ${title}`);
-                }
-            });
-
-            return headings;
-        });
-
-        // Get iframe tree
-        const iframes = [];
-        function dumpFrameTree(frame, indent = '>') {
-            iframes.push(indent + frame.url());
-            for (const child of frame.childFrames()) {
-                dumpFrameTree(child, indent + '>');
-            }
-        }
-        dumpFrameTree(page.mainFrame(), '');
-
-        const accessibilityData = {
-            url,
-            headings: outline,
-            iframes,
-            tree: accessibilityTree,
-        };
-
-        // Write output
-        fs.writeFileSync(outputPath, JSON.stringify(accessibilityData, null, 2));
-
-        return { success: true, output: outputPath, accessibilityData };
-
-    } catch (e) {
-        return { success: false, error: `${e.name}: ${e.message}` };
-    } finally {
-        if (browser) {
-            browser.disconnect();
-        }
-    }
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-    const snapshotId = args.snapshot_id;
-
-    if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__39_accessibility.js --url=<url> --snapshot-id=<uuid>');
-        process.exit(1);
-    }
-
-    const startTs = new Date();
-    let status = 'failed';
-    let output = null;
-    let error = '';
-
-    try {
-        // Check if enabled
-        if (!getEnvBool('ACCESSIBILITY_ENABLED', true)) {
-            console.log('Skipping accessibility (ACCESSIBILITY_ENABLED=False)');
-            // Output clean JSONL (no RESULT_JSON= prefix)
-            console.log(JSON.stringify({
-                type: 'ArchiveResult',
-                status: 'skipped',
-                output_str: 'ACCESSIBILITY_ENABLED=False',
-            }));
-            process.exit(0);
-        }
-
-        // Check if Chrome session exists, then wait for page load
-        assertChromeSession();
-        const pageLoaded = await waitForChromeTabLoaded(60000);
-        if (!pageLoaded) {
-            throw new Error('Page not loaded after 60s (chrome_navigate must complete first)');
-        }
-
-        const result = await extractAccessibility(url);
-
-        if (result.success) {
-            status = 'succeeded';
-            output = result.output;
-            const headingCount = result.accessibilityData.headings.length;
-            const iframeCount = result.accessibilityData.iframes.length;
-            console.log(`Accessibility extracted: ${headingCount} headings, ${iframeCount} iframes`);
-        } else {
-            status = 'failed';
-            error = result.error;
-        }
-    } catch (e) {
-        error = `${e.name}: ${e.message}`;
-        status = 'failed';
-    }
-
-    const endTs = new Date();
-
-    if (error) console.error(`ERROR: ${error}`);
-
-    // Output clean JSONL (no RESULT_JSON= prefix)
-    console.log(JSON.stringify({
-        type: 'ArchiveResult',
-        status,
-        output_str: output || error || '',
-    }));
-
-    process.exit(status === 'succeeded' ? 0 : 1);
-}
-
-main().catch(e => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
diff --git a/archivebox/plugins/accessibility/templates/icon.html b/archivebox/plugins/accessibility/templates/icon.html
deleted file mode 100644
index e1c30fa0..00000000
--- a/archivebox/plugins/accessibility/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--accessibility" title="Accessibility"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="4.5" r="2" fill="currentColor" stroke="none"/><path d="M4 7.5h16"/><path d="M12 7.5v12"/><path d="M7 20l5-6 5 6"/></svg></span>
diff --git a/archivebox/plugins/accessibility/tests/test_accessibility.py b/archivebox/plugins/accessibility/tests/test_accessibility.py
deleted file mode 100644
index cccfa215..00000000
--- a/archivebox/plugins/accessibility/tests/test_accessibility.py
+++ /dev/null
@@ -1,195 +0,0 @@
-"""
-Tests for the accessibility plugin.
-
-Tests the real accessibility hook with an actual URL to verify
-accessibility tree and page outline extraction.
-"""
-
-import json
-import shutil
-import subprocess
-import sys
-import tempfile
-from pathlib import Path
-
-import pytest
-from django.test import TestCase
-
-# Import chrome test helpers
-sys.path.insert(0, str(Path(__file__).parent.parent.parent / 'chrome' / 'tests'))
-from chrome_test_helpers import (
-    chrome_session,
-    get_test_env,
-    get_plugin_dir,
-    get_hook_script,
-)
-
-
-def chrome_available() -> bool:
-    """Check if Chrome/Chromium is available."""
-    for name in ['chromium', 'chromium-browser', 'google-chrome', 'chrome']:
-        if shutil.which(name):
-            return True
-    return False
-
-
-# Get the path to the accessibility hook
-PLUGIN_DIR = get_plugin_dir(__file__)
-ACCESSIBILITY_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_accessibility.*')
-
-
-class TestAccessibilityPlugin(TestCase):
-    """Test the accessibility plugin."""
-
-    def test_accessibility_hook_exists(self):
-        """Accessibility hook script should exist."""
-        self.assertIsNotNone(ACCESSIBILITY_HOOK, "Accessibility hook not found in plugin directory")
-        self.assertTrue(ACCESSIBILITY_HOOK.exists(), f"Hook not found: {ACCESSIBILITY_HOOK}")
-
-
-class TestAccessibilityWithChrome(TestCase):
-    """Integration tests for accessibility plugin with Chrome."""
-
-    def setUp(self):
-        """Set up test environment."""
-        self.temp_dir = Path(tempfile.mkdtemp())
-
-    def tearDown(self):
-        """Clean up."""
-        shutil.rmtree(self.temp_dir, ignore_errors=True)
-
-    def test_accessibility_extracts_page_outline(self):
-        """Accessibility hook should extract headings and accessibility tree."""
-        test_url = 'https://example.com'
-        snapshot_id = 'test-accessibility-snapshot'
-
-        try:
-            with chrome_session(
-                self.temp_dir,
-                crawl_id='test-accessibility-crawl',
-                snapshot_id=snapshot_id,
-                test_url=test_url,
-                navigate=True,
-                timeout=30,
-            ) as (chrome_process, chrome_pid, snapshot_chrome_dir, env):
-                # Use the environment from chrome_session (already has CHROME_HEADLESS=true)
-
-                # Run accessibility hook with the active Chrome session
-                result = subprocess.run(
-                    ['node', str(ACCESSIBILITY_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                    cwd=str(snapshot_chrome_dir),
-                    capture_output=True,
-                    text=True,
-                    timeout=60,
-                    env=env
-                )
-
-                # Check for output file
-                accessibility_output = snapshot_chrome_dir / 'accessibility.json'
-
-                accessibility_data = None
-
-                # Try parsing from file first
-                if accessibility_output.exists():
-                    with open(accessibility_output) as f:
-                        try:
-                            accessibility_data = json.load(f)
-                        except json.JSONDecodeError:
-                            pass
-
-                # Verify hook ran successfully
-                self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}")
-                self.assertNotIn('Traceback', result.stderr)
-
-                # example.com has headings, so we should get accessibility data
-                self.assertIsNotNone(accessibility_data, "No accessibility data was generated")
-
-                # Verify we got page outline data
-                self.assertIn('headings', accessibility_data, f"Missing headings: {accessibility_data}")
-                self.assertIn('url', accessibility_data, f"Missing url: {accessibility_data}")
-
-        except RuntimeError:
-            raise
-
-    def test_accessibility_disabled_skips(self):
-        """Test that ACCESSIBILITY_ENABLED=False skips without error."""
-        test_url = 'https://example.com'
-        snapshot_id = 'test-disabled'
-
-        env = get_test_env()
-        env['ACCESSIBILITY_ENABLED'] = 'False'
-
-        result = subprocess.run(
-            ['node', str(ACCESSIBILITY_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-            cwd=str(self.temp_dir),
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=env
-        )
-
-        # Should exit 0 even when disabled
-        self.assertEqual(result.returncode, 0, f"Should succeed when disabled: {result.stderr}")
-
-        # Should NOT create output file when disabled
-        accessibility_output = self.temp_dir / 'accessibility.json'
-        self.assertFalse(accessibility_output.exists(), "Should not create file when disabled")
-
-    def test_accessibility_missing_url_argument(self):
-        """Test that missing --url argument causes error."""
-        snapshot_id = 'test-missing-url'
-
-        result = subprocess.run(
-            ['node', str(ACCESSIBILITY_HOOK), f'--snapshot-id={snapshot_id}'],
-            cwd=str(self.temp_dir),
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=get_test_env()
-        )
-
-        # Should fail with non-zero exit code
-        self.assertNotEqual(result.returncode, 0, "Should fail when URL missing")
-
-    def test_accessibility_missing_snapshot_id_argument(self):
-        """Test that missing --snapshot-id argument causes error."""
-        test_url = 'https://example.com'
-
-        result = subprocess.run(
-            ['node', str(ACCESSIBILITY_HOOK), f'--url={test_url}'],
-            cwd=str(self.temp_dir),
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=get_test_env()
-        )
-
-        # Should fail with non-zero exit code
-        self.assertNotEqual(result.returncode, 0, "Should fail when snapshot-id missing")
-
-    def test_accessibility_with_no_chrome_session(self):
-        """Test that hook fails gracefully when no Chrome session exists."""
-        test_url = 'https://example.com'
-        snapshot_id = 'test-no-chrome'
-
-        result = subprocess.run(
-            ['node', str(ACCESSIBILITY_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-            cwd=str(self.temp_dir),
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=get_test_env()
-        )
-
-        # Should fail when no Chrome session
-        self.assertNotEqual(result.returncode, 0, "Should fail when no Chrome session exists")
-        # Error should mention CDP or Chrome
-        err_lower = result.stderr.lower()
-        self.assertTrue(
-            any(x in err_lower for x in ['chrome', 'cdp', 'cannot find', 'puppeteer']),
-            f"Should mention Chrome/CDP in error: {result.stderr}"
-        )
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/apt/on_Binary__13_apt_install.py b/archivebox/plugins/apt/on_Binary__13_apt_install.py
deleted file mode 100644
index 82e343ff..00000000
--- a/archivebox/plugins/apt/on_Binary__13_apt_install.py
+++ /dev/null
@@ -1,83 +0,0 @@
-#!/usr/bin/env python3
-"""
-Install a binary using apt package manager.
-
-Usage: on_Binary__install_using_apt_provider.py --binary-id=<uuid> --machine-id=<uuid> --name=<name>
-Output: Binary JSONL record to stdout after installation
-"""
-
-import json
-import sys
-
-import rich_click as click
-from abx_pkg import Binary, AptProvider, BinProviderOverrides
-
-# Fix pydantic forward reference issue
-AptProvider.model_rebuild()
-
-
-@click.command()
-@click.option('--binary-id', required=True, help="Binary UUID")
-@click.option('--machine-id', required=True, help="Machine UUID")
-@click.option('--name', required=True, help="Binary name to install")
-@click.option('--binproviders', default='*', help="Allowed providers (comma-separated)")
-@click.option('--overrides', default=None, help="JSON-encoded overrides dict")
-def main(binary_id: str, machine_id: str, name: str, binproviders: str, overrides: str | None):
-    """Install binary using apt package manager."""
-
-    # Check if apt provider is allowed
-    if binproviders != '*' and 'apt' not in binproviders.split(','):
-        click.echo(f"apt provider not allowed for {name}", err=True)
-        sys.exit(0)  # Not an error, just skip
-
-    # Use abx-pkg AptProvider to install binary
-    provider = AptProvider()
-    if not provider.INSTALLER_BIN:
-        click.echo("apt not available on this system", err=True)
-        sys.exit(1)
-
-    click.echo(f"Installing {name} via apt...", err=True)
-
-    try:
-        # Parse overrides if provided
-        overrides_dict = None
-        if overrides:
-            try:
-                overrides_dict = json.loads(overrides)
-                # Extract apt-specific overrides
-                overrides_dict = overrides_dict.get('apt', {})
-                click.echo(f"Using apt install overrides: {overrides_dict}", err=True)
-            except json.JSONDecodeError:
-                click.echo(f"Warning: Failed to parse overrides JSON: {overrides}", err=True)
-
-        binary = Binary(name=name, binproviders=[provider], overrides={'apt': overrides_dict} if overrides_dict else {}).install()
-    except Exception as e:
-        click.echo(f"apt install failed: {e}", err=True)
-        sys.exit(1)
-
-    if not binary.abspath:
-        click.echo(f"{name} not found after apt install", err=True)
-        sys.exit(1)
-
-    # Output Binary JSONL record to stdout
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'abspath': str(binary.abspath),
-        'version': str(binary.version) if binary.version else '',
-        'sha256': binary.sha256 or '',
-        'binprovider': 'apt',
-        'machine_id': machine_id,
-        'binary_id': binary_id,
-    }
-    print(json.dumps(record))
-
-    # Log human-readable info to stderr
-    click.echo(f"Installed {name} at {binary.abspath}", err=True)
-    click.echo(f"  version: {binary.version}", err=True)
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/apt/templates/icon.html b/archivebox/plugins/apt/templates/icon.html
deleted file mode 100644
index e69de29b..00000000
diff --git a/archivebox/plugins/apt/tests/test_apt_provider.py b/archivebox/plugins/apt/tests/test_apt_provider.py
deleted file mode 100644
index c8b7934e..00000000
--- a/archivebox/plugins/apt/tests/test_apt_provider.py
+++ /dev/null
@@ -1,154 +0,0 @@
-"""
-Tests for the apt binary provider plugin.
-
-Tests cover:
-1. Hook script execution
-2. apt package availability detection
-3. JSONL output format
-"""
-
-import json
-import os
-import shutil
-import subprocess
-import sys
-import tempfile
-from pathlib import Path
-
-import pytest
-from django.test import TestCase
-
-
-# Get the path to the apt provider hook
-PLUGIN_DIR = Path(__file__).parent.parent
-INSTALL_HOOK = next(PLUGIN_DIR.glob('on_Binary__*_apt_install.py'), None)
-
-
-def apt_available() -> bool:
-    """Check if apt is installed."""
-    return shutil.which('apt') is not None or shutil.which('apt-get') is not None
-
-
-def is_linux() -> bool:
-    """Check if running on Linux."""
-    import platform
-    return platform.system().lower() == 'linux'
-
-
-class TestAptProviderHook(TestCase):
-    """Test the apt binary provider installation hook."""
-
-    def setUp(self):
-        """Set up test environment."""
-        self.temp_dir = tempfile.mkdtemp()
-
-    def tearDown(self):
-        """Clean up."""
-        shutil.rmtree(self.temp_dir, ignore_errors=True)
-
-    def test_hook_script_exists(self):
-        """Hook script should exist."""
-        self.assertTrue(INSTALL_HOOK and INSTALL_HOOK.exists(), f"Hook not found: {INSTALL_HOOK}")
-
-    def test_hook_skips_when_apt_not_allowed(self):
-        """Hook should skip when apt not in allowed binproviders."""
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=wget',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-                '--binproviders=pip,npm',  # apt not allowed
-            ],
-            capture_output=True,
-            text=True,
-            timeout=30
-        )
-
-        # Should exit cleanly (code 0) when apt not allowed
-        self.assertIn('apt provider not allowed', result.stderr)
-        self.assertEqual(result.returncode, 0)
-
-    @pytest.mark.skipif(not is_linux(), reason="apt only available on Linux")
-    def test_hook_detects_apt(self):
-        """Hook should detect apt binary when available."""
-        assert apt_available(), "apt not installed"
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=nonexistent-pkg-xyz123',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-            ],
-            capture_output=True,
-            text=True,
-            timeout=30
-        )
-
-        # Should not say apt is not available
-        self.assertNotIn('apt not available', result.stderr)
-
-    def test_hook_handles_overrides(self):
-        """Hook should accept overrides JSON."""
-        overrides = json.dumps({
-            'apt': {'packages': ['custom-package-name']}
-        })
-
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=test-pkg',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-                f'--overrides={overrides}',
-            ],
-            capture_output=True,
-            text=True,
-            timeout=30
-        )
-
-        # Should not crash parsing overrides
-        self.assertNotIn('Traceback', result.stderr)
-
-
-@pytest.mark.skipif(not is_linux(), reason="apt only available on Linux")
-class TestAptProviderSystemBinaries(TestCase):
-    """Test apt provider with system binaries."""
-
-    def test_detect_existing_binary(self):
-        """apt provider should detect already-installed system binaries."""
-        assert apt_available(), "apt not installed"
-        # Check for a binary that's almost certainly installed (like 'ls' or 'bash')
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=bash',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-            ],
-            capture_output=True,
-            text=True,
-            timeout=60
-        )
-
-        # Parse JSONL output
-        for line in result.stdout.split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'Binary' and record.get('name') == 'bash':
-                        # Found bash
-                        self.assertTrue(record.get('abspath'))
-                        self.assertTrue(Path(record['abspath']).exists())
-                        return
-                except json.JSONDecodeError:
-                    continue
-
-        # apt may not be able to "install" bash (already installed)
-        # Just verify no crash
-        self.assertNotIn('Traceback', result.stderr)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/archivedotorg/config.json b/archivebox/plugins/archivedotorg/config.json
deleted file mode 100644
index b517183e..00000000
--- a/archivebox/plugins/archivedotorg/config.json
+++ /dev/null
@@ -1,26 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "ARCHIVEDOTORG_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_ARCHIVEDOTORG", "USE_ARCHIVEDOTORG", "SUBMIT_ARCHIVEDOTORG"],
-      "description": "Submit URLs to archive.org Wayback Machine"
-    },
-    "ARCHIVEDOTORG_TIMEOUT": {
-      "type": "integer",
-      "default": 60,
-      "minimum": 10,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for archive.org submission in seconds"
-    },
-    "ARCHIVEDOTORG_USER_AGENT": {
-      "type": "string",
-      "default": "",
-      "x-fallback": "USER_AGENT",
-      "description": "User agent string"
-    }
-  }
-}
diff --git a/archivebox/plugins/archivedotorg/on_Snapshot__08_archivedotorg.bg.py b/archivebox/plugins/archivedotorg/on_Snapshot__08_archivedotorg.bg.py
deleted file mode 100644
index 11642b24..00000000
--- a/archivebox/plugins/archivedotorg/on_Snapshot__08_archivedotorg.bg.py
+++ /dev/null
@@ -1,154 +0,0 @@
-#!/usr/bin/env python3
-"""
-Submit a URL to archive.org for archiving.
-
-Usage: on_Snapshot__archivedotorg.bg.py --url=<url> --snapshot-id=<uuid>
-Output: Writes archive.org.txt to $PWD with the archived URL
-
-Environment variables:
-    ARCHIVEDOTORG_TIMEOUT: Timeout in seconds (default: 60)
-    USER_AGENT: User agent string
-
-    # Fallback to ARCHIVING_CONFIG values if ARCHIVEDOTORG_* not set:
-    TIMEOUT: Fallback timeout
-
-Note: This extractor uses the 'requests' library which is bundled with ArchiveBox.
-      It can run standalone if requests is installed: pip install requests
-"""
-
-import json
-import os
-import sys
-from pathlib import Path
-
-import rich_click as click
-
-
-# Extractor metadata
-PLUGIN_NAME = 'archivedotorg'
-OUTPUT_DIR = '.'
-OUTPUT_FILE = 'archive.org.txt'
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-
-def get_env_int(name: str, default: int = 0) -> int:
-    try:
-        return int(get_env(name, str(default)))
-    except ValueError:
-        return default
-
-
-def submit_to_archivedotorg(url: str) -> tuple[bool, str | None, str]:
-    """
-    Submit URL to archive.org Wayback Machine.
-
-    Returns: (success, output_path, error_message)
-    """
-    def log(message: str) -> None:
-        print(f'[archivedotorg] {message}', file=sys.stderr)
-
-    try:
-        import requests
-    except ImportError:
-        return False, None, 'requests library not installed'
-
-    timeout = get_env_int('ARCHIVEDOTORG_TIMEOUT') or get_env_int('TIMEOUT', 60)
-    user_agent = get_env('USER_AGENT', 'Mozilla/5.0 (compatible; ArchiveBox/1.0)')
-
-    submit_url = f'https://web.archive.org/save/{url}'
-    log(f'Submitting to Wayback Machine (timeout={timeout}s)')
-    log(f'GET {submit_url}')
-
-    try:
-        response = requests.get(
-            submit_url,
-            timeout=timeout,
-            headers={'User-Agent': user_agent},
-            allow_redirects=True,
-        )
-        log(f'HTTP {response.status_code} final_url={response.url}')
-
-        # Check for successful archive
-        content_location = response.headers.get('Content-Location', '')
-        x_archive_orig_url = response.headers.get('X-Archive-Orig-Url', '')
-        if content_location:
-            log(f'Content-Location: {content_location}')
-        if x_archive_orig_url:
-            log(f'X-Archive-Orig-Url: {x_archive_orig_url}')
-
-        # Build archive URL
-        if content_location:
-            archive_url = f'https://web.archive.org{content_location}'
-            Path(OUTPUT_FILE).write_text(archive_url, encoding='utf-8')
-            log(f'Saved archive URL -> {archive_url}')
-            return True, OUTPUT_FILE, ''
-        elif 'web.archive.org' in response.url:
-            # We were redirected to an archive page
-            Path(OUTPUT_FILE).write_text(response.url, encoding='utf-8')
-            log(f'Redirected to archive page -> {response.url}')
-            return True, OUTPUT_FILE, ''
-        else:
-            # Check for errors in response
-            if 'RobotAccessControlException' in response.text:
-                # Blocked by robots.txt - save submit URL for manual retry
-                Path(OUTPUT_FILE).write_text(submit_url, encoding='utf-8')
-                log('Blocked by robots.txt, saved submit URL for manual retry')
-                return True, OUTPUT_FILE, ''  # Consider this a soft success
-            elif response.status_code >= 400:
-                return False, None, f'HTTP {response.status_code}'
-            else:
-                # Save submit URL anyway
-                Path(OUTPUT_FILE).write_text(submit_url, encoding='utf-8')
-                log('No archive URL returned, saved submit URL for manual retry')
-                return True, OUTPUT_FILE, ''
-
-    except requests.Timeout:
-        return False, None, f'Request timed out after {timeout} seconds'
-    except requests.RequestException as e:
-        return False, None, f'{type(e).__name__}: {e}'
-    except Exception as e:
-        return False, None, f'{type(e).__name__}: {e}'
-
-
-@click.command()
-@click.option('--url', required=True, help='URL to submit to archive.org')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
-def main(url: str, snapshot_id: str):
-    """Submit a URL to archive.org for archiving."""
-
-    # Check if feature is enabled
-    if get_env('ARCHIVEDOTORG_ENABLED', 'True').lower() in ('false', '0', 'no', 'off'):
-        print('Skipping archive.org submission (ARCHIVEDOTORG_ENABLED=False)', file=sys.stderr)
-        # Temporary failure (config disabled) - NO JSONL emission
-        sys.exit(0)
-
-    try:
-        # Run extraction
-        success, output, error = submit_to_archivedotorg(url)
-
-        if success:
-            # Success - emit ArchiveResult with output file
-            result = {
-                'type': 'ArchiveResult',
-                'status': 'succeeded',
-                'output_str': output or '',
-            }
-            print(json.dumps(result))
-            sys.exit(0)
-        else:
-            # Transient error (network, timeout, HTTP error) - emit NO JSONL
-            # System will retry later
-            print(f'ERROR: {error}', file=sys.stderr)
-            sys.exit(1)
-
-    except Exception as e:
-        # Unexpected error - also transient, emit NO JSONL
-        print(f'ERROR: {type(e).__name__}: {e}', file=sys.stderr)
-        sys.exit(1)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/archivedotorg/templates/card.html b/archivebox/plugins/archivedotorg/templates/card.html
deleted file mode 100644
index 64a3c4d1..00000000
--- a/archivebox/plugins/archivedotorg/templates/card.html
+++ /dev/null
@@ -1,12 +0,0 @@
-{% load config_tags %}
-{% get_config "ARCHIVEDOTORG_ENABLED" as enabled %}
-{% if enabled %}
-<!-- Archive.org thumbnail - iframe preview of archived page -->
-<div class="extractor-thumbnail archivedotorg-thumbnail" style="width: 100%; height: 100px; overflow: hidden;">
-    <iframe src="{{ output_path }}"
-            style="width: 100%; height: 100px; border: none; pointer-events: none;"
-            loading="lazy"
-            sandbox="allow-same-origin">
-    </iframe>
-</div>
-{% endif %}
diff --git a/archivebox/plugins/archivedotorg/templates/icon.html b/archivebox/plugins/archivedotorg/templates/icon.html
deleted file mode 100644
index e3f48634..00000000
--- a/archivebox/plugins/archivedotorg/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--archivedotorg" title="Archive.org"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M3 7h18"/><rect x="3" y="7" width="18" height="13" rx="2"/><path d="M9 12h6"/></svg></span>
diff --git a/archivebox/plugins/archivedotorg/tests/test_archivedotorg.py b/archivebox/plugins/archivedotorg/tests/test_archivedotorg.py
deleted file mode 100644
index 1e4b4a97..00000000
--- a/archivebox/plugins/archivedotorg/tests/test_archivedotorg.py
+++ /dev/null
@@ -1,93 +0,0 @@
-"""
-Integration tests for archivedotorg plugin
-
-Tests verify standalone archive.org extractor execution.
-"""
-
-import json
-import subprocess
-import sys
-import tempfile
-from pathlib import Path
-import pytest
-
-PLUGIN_DIR = Path(__file__).parent.parent
-ARCHIVEDOTORG_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_archivedotorg.*'), None)
-TEST_URL = 'https://example.com'
-
-def test_hook_script_exists():
-    assert ARCHIVEDOTORG_HOOK.exists()
-
-def test_submits_to_archivedotorg():
-    with tempfile.TemporaryDirectory() as tmpdir:
-        result = subprocess.run(
-            [sys.executable, str(ARCHIVEDOTORG_HOOK), '--url', TEST_URL, '--snapshot-id', 'test789'],
-            cwd=tmpdir, capture_output=True, text=True, timeout=60
-        )
-
-        assert result.returncode in (0, 1)
-
-        # Parse clean JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        if result.returncode == 0:
-            # Success - should have ArchiveResult
-            assert result_json, "Should have ArchiveResult JSONL output on success"
-            assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-        else:
-            # Transient error - no JSONL output, just stderr
-            assert not result_json, "Should NOT emit JSONL on transient error"
-            assert result.stderr, "Should have error message in stderr"
-
-def test_config_save_archivedotorg_false_skips():
-    with tempfile.TemporaryDirectory() as tmpdir:
-        import os
-        env = os.environ.copy()
-        env['ARCHIVEDOTORG_ENABLED'] = 'False'
-
-        result = subprocess.run(
-            [sys.executable, str(ARCHIVEDOTORG_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
-            cwd=tmpdir, capture_output=True, text=True, env=env, timeout=30
-        )
-
-        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
-
-        # Feature disabled - temporary failure, should NOT emit JSONL
-        assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
-
-        # Should NOT emit any JSONL
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
-
-def test_handles_timeout():
-    with tempfile.TemporaryDirectory() as tmpdir:
-        import os
-        env = os.environ.copy()
-        env['TIMEOUT'] = '1'
-
-        result = subprocess.run(
-            [sys.executable, str(ARCHIVEDOTORG_HOOK), '--url', TEST_URL, '--snapshot-id', 'testtimeout'],
-            cwd=tmpdir, capture_output=True, text=True, env=env, timeout=30
-        )
-
-        # Timeout is a transient error - should exit 1 with no JSONL
-        assert result.returncode in (0, 1), "Should complete without hanging"
-
-        # If it timed out (exit 1), should have no JSONL output
-        if result.returncode == 1:
-            jsonl_lines = [line for line in result.stdout.strip().split('\n')
-                          if line.strip().startswith('{')]
-            assert len(jsonl_lines) == 0, "Should not emit JSONL on timeout (transient error)"
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/brew/on_Binary__12_brew_install.py b/archivebox/plugins/brew/on_Binary__12_brew_install.py
deleted file mode 100644
index 928e1bd5..00000000
--- a/archivebox/plugins/brew/on_Binary__12_brew_install.py
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env python3
-"""
-Install a binary using Homebrew package manager.
-
-Usage: on_Binary__install_using_brew_provider.py --binary-id=<uuid> --machine-id=<uuid> --name=<name> [--custom-cmd=<cmd>]
-Output: Binary JSONL record to stdout after installation
-
-Environment variables:
-    MACHINE_ID: Machine UUID (set by orchestrator)
-"""
-
-import json
-import os
-import sys
-
-import rich_click as click
-from abx_pkg import Binary, BrewProvider, BinProviderOverrides
-
-# Fix pydantic forward reference issue
-BrewProvider.model_rebuild()
-
-
-@click.command()
-@click.option('--machine-id', required=True, help="Machine UUID")
-@click.option('--binary-id', required=True, help="Dependency UUID")
-@click.option('--name', required=True, help="Binary name to install")
-@click.option('--binproviders', default='*', help="Allowed providers (comma-separated)")
-@click.option('--custom-cmd', default=None, help="Custom install command")
-@click.option('--overrides', default=None, help="JSON-encoded overrides dict")
-def main(binary_id: str, machine_id: str, name: str, binproviders: str, custom_cmd: str | None, overrides: str | None):
-    """Install binary using Homebrew."""
-
-    if binproviders != '*' and 'brew' not in binproviders.split(','):
-        click.echo(f"brew provider not allowed for {name}", err=True)
-        sys.exit(0)
-
-    # Use abx-pkg BrewProvider to install binary
-    provider = BrewProvider()
-    if not provider.INSTALLER_BIN:
-        click.echo("brew not available on this system", err=True)
-        sys.exit(1)
-
-    click.echo(f"Installing {name} via brew...", err=True)
-
-    try:
-        # Parse overrides if provided
-        overrides_dict = None
-        if overrides:
-            try:
-                overrides_dict = json.loads(overrides)
-                click.echo(f"Using custom install overrides: {overrides_dict}", err=True)
-            except json.JSONDecodeError:
-                click.echo(f"Warning: Failed to parse overrides JSON: {overrides}", err=True)
-
-        binary = Binary(name=name, binproviders=[provider], overrides=overrides_dict or {}).install()
-    except Exception as e:
-        click.echo(f"brew install failed: {e}", err=True)
-        sys.exit(1)
-
-    if not binary.abspath:
-        click.echo(f"{name} not found after brew install", err=True)
-        sys.exit(1)
-
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    # Output Binary JSONL record to stdout
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'abspath': str(binary.abspath),
-        'version': str(binary.version) if binary.version else '',
-        'sha256': binary.sha256 or '',
-        'binprovider': 'brew',
-        'machine_id': machine_id,
-        'binary_id': binary_id,
-    }
-    print(json.dumps(record))
-
-    # Log human-readable info to stderr
-    click.echo(f"Installed {name} at {binary.abspath}", err=True)
-    click.echo(f"  version: {binary.version}", err=True)
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/brew/templates/icon.html b/archivebox/plugins/brew/templates/icon.html
deleted file mode 100644
index e69de29b..00000000
diff --git a/archivebox/plugins/chrome/chrome_utils.js b/archivebox/plugins/chrome/chrome_utils.js
deleted file mode 100755
index e0e42a7e..00000000
--- a/archivebox/plugins/chrome/chrome_utils.js
+++ /dev/null
@@ -1,1997 +0,0 @@
-#!/usr/bin/env node
-/**
- * Chrome Extension Management Utilities
- *
- * Handles downloading, installing, and managing Chrome extensions for browser automation.
- * Ported from the TypeScript implementation in archivebox.ts
- */
-
-const fs = require('fs');
-const path = require('path');
-const crypto = require('crypto');
-const http = require('http');
-const net = require('net');
-const { exec, spawn } = require('child_process');
-const { promisify } = require('util');
-const { Readable } = require('stream');
-const { finished } = require('stream/promises');
-
-const execAsync = promisify(exec);
-
-const CHROME_SESSION_REQUIRED_ERROR = 'No Chrome session found (chrome plugin must run first)';
-
-// ============================================================================
-// Environment helpers
-// ============================================================================
-
-/**
- * Get environment variable with default value.
- * @param {string} name - Environment variable name
- * @param {string} [defaultValue=''] - Default value if not set
- * @returns {string} - Trimmed environment variable value
- */
-function getEnv(name, defaultValue = '') {
-    return (process.env[name] || defaultValue).trim();
-}
-
-/**
- * Get boolean environment variable.
- * @param {string} name - Environment variable name
- * @param {boolean} [defaultValue=false] - Default value if not set
- * @returns {boolean} - Boolean value
- */
-function getEnvBool(name, defaultValue = false) {
-    const val = getEnv(name, '').toLowerCase();
-    if (['true', '1', 'yes', 'on'].includes(val)) return true;
-    if (['false', '0', 'no', 'off'].includes(val)) return false;
-    return defaultValue;
-}
-
-/**
- * Get integer environment variable.
- * @param {string} name - Environment variable name
- * @param {number} [defaultValue=0] - Default value if not set
- * @returns {number} - Integer value
- */
-function getEnvInt(name, defaultValue = 0) {
-    const val = parseInt(getEnv(name, String(defaultValue)), 10);
-    return isNaN(val) ? defaultValue : val;
-}
-
-/**
- * Get array environment variable (JSON array or comma-separated string).
- *
- * Parsing strategy:
- * - If value starts with '[', parse as JSON array
- * - Otherwise, parse as comma-separated values
- *
- * This prevents incorrect splitting of arguments that contain internal commas.
- * For arguments with commas, use JSON format:
- *   CHROME_ARGS='["--user-data-dir=/path/with,comma", "--window-size=1440,900"]'
- *
- * @param {string} name - Environment variable name
- * @param {string[]} [defaultValue=[]] - Default value if not set
- * @returns {string[]} - Array of strings
- */
-function getEnvArray(name, defaultValue = []) {
-    const val = getEnv(name, '');
-    if (!val) return defaultValue;
-
-    // If starts with '[', parse as JSON array
-    if (val.startsWith('[')) {
-        try {
-            const parsed = JSON.parse(val);
-            if (Array.isArray(parsed)) return parsed;
-        } catch (e) {
-            console.error(`[!] Failed to parse ${name} as JSON array: ${e.message}`);
-            // Fall through to comma-separated parsing
-        }
-    }
-
-    // Parse as comma-separated values
-    return val.split(',').map(s => s.trim()).filter(Boolean);
-}
-
-/**
- * Parse resolution string into width/height.
- * @param {string} resolution - Resolution string like "1440,2000"
- * @returns {{width: number, height: number}} - Parsed dimensions
- */
-function parseResolution(resolution) {
-    const [width, height] = resolution.split(',').map(x => parseInt(x.trim(), 10));
-    return { width: width || 1440, height: height || 2000 };
-}
-
-// ============================================================================
-// PID file management
-// ============================================================================
-
-/**
- * Write PID file with specific mtime for process validation.
- * @param {string} filePath - Path to PID file
- * @param {number} pid - Process ID
- * @param {number} startTimeSeconds - Process start time in seconds
- */
-function writePidWithMtime(filePath, pid, startTimeSeconds) {
-    fs.writeFileSync(filePath, String(pid));
-    const startTimeMs = startTimeSeconds * 1000;
-    fs.utimesSync(filePath, new Date(startTimeMs), new Date(startTimeMs));
-}
-
-/**
- * Write a shell script that can re-run the Chrome command.
- * @param {string} filePath - Path to script file
- * @param {string} binary - Chrome binary path
- * @param {string[]} args - Chrome arguments
- */
-function writeCmdScript(filePath, binary, args) {
-    const escape = (arg) =>
-        arg.includes(' ') || arg.includes('"') || arg.includes('$')
-            ? `"${arg.replace(/"/g, '\\"')}"`
-            : arg;
-    fs.writeFileSync(
-        filePath,
-        `#!/bin/bash\n${binary} ${args.map(escape).join(' ')}\n`
-    );
-    fs.chmodSync(filePath, 0o755);
-}
-
-// ============================================================================
-// Port management
-// ============================================================================
-
-/**
- * Find a free port on localhost.
- * @returns {Promise<number>} - Available port number
- */
-function findFreePort() {
-    return new Promise((resolve, reject) => {
-        const server = net.createServer();
-        server.unref();
-        server.on('error', reject);
-        server.listen(0, () => {
-            const port = server.address().port;
-            server.close(() => resolve(port));
-        });
-    });
-}
-
-/**
- * Wait for Chrome's DevTools port to be ready.
- * @param {number} port - Debug port number
- * @param {number} [timeout=30000] - Timeout in milliseconds
- * @returns {Promise<Object>} - Chrome version info
- */
-function waitForDebugPort(port, timeout = 30000) {
-    const startTime = Date.now();
-
-    return new Promise((resolve, reject) => {
-        const tryConnect = () => {
-            if (Date.now() - startTime > timeout) {
-                reject(new Error(`Timeout waiting for Chrome debug port ${port}`));
-                return;
-            }
-
-            const req = http.get(`http://127.0.0.1:${port}/json/version`, (res) => {
-                let data = '';
-                res.on('data', (chunk) => (data += chunk));
-                res.on('end', () => {
-                    try {
-                        const info = JSON.parse(data);
-                        resolve(info);
-                    } catch (e) {
-                        setTimeout(tryConnect, 100);
-                    }
-                });
-            });
-
-            req.on('error', () => {
-                setTimeout(tryConnect, 100);
-            });
-
-            req.setTimeout(1000, () => {
-                req.destroy();
-                setTimeout(tryConnect, 100);
-            });
-        };
-
-        tryConnect();
-    });
-}
-
-// ============================================================================
-// Zombie process cleanup
-// ============================================================================
-
-/**
- * Kill zombie Chrome processes from stale crawls.
- * Recursively scans DATA_DIR for any .../chrome/...pid files from stale crawls.
- * Does not assume specific directory structure - works with nested paths.
- * @param {string} [dataDir] - Data directory (defaults to DATA_DIR env or '.')
- * @returns {number} - Number of zombies killed
- */
-function killZombieChrome(dataDir = null) {
-    dataDir = dataDir || getEnv('DATA_DIR', '.');
-    const now = Date.now();
-    const fiveMinutesAgo = now - 300000;
-    let killed = 0;
-
-    console.error('[*] Checking for zombie Chrome processes...');
-
-    if (!fs.existsSync(dataDir)) {
-        console.error('[+] No data directory found');
-        return 0;
-    }
-
-    /**
-     * Recursively find all chrome/.pid files in directory tree
-     * @param {string} dir - Directory to search
-     * @param {number} depth - Current recursion depth (limit to 10)
-     * @returns {Array<{pidFile: string, crawlDir: string}>} - Array of PID file info
-     */
-    function findChromePidFiles(dir, depth = 0) {
-        if (depth > 10) return [];  // Prevent infinite recursion
-
-        const results = [];
-        try {
-            const entries = fs.readdirSync(dir, { withFileTypes: true });
-
-            for (const entry of entries) {
-                if (!entry.isDirectory()) continue;
-
-                const fullPath = path.join(dir, entry.name);
-
-                // Found a chrome directory - check for .pid files
-                if (entry.name === 'chrome') {
-                    try {
-                        const pidFiles = fs.readdirSync(fullPath).filter(f => f.endsWith('.pid'));
-                        const crawlDir = dir;  // Parent of chrome/ is the crawl dir
-
-                        for (const pidFileName of pidFiles) {
-                            results.push({
-                                pidFile: path.join(fullPath, pidFileName),
-                                crawlDir: crawlDir,
-                            });
-                        }
-                    } catch (e) {
-                        // Skip if can't read chrome dir
-                    }
-                } else {
-                    // Recurse into subdirectory (skip hidden dirs and node_modules)
-                    if (!entry.name.startsWith('.') && entry.name !== 'node_modules') {
-                        results.push(...findChromePidFiles(fullPath, depth + 1));
-                    }
-                }
-            }
-        } catch (e) {
-            // Skip if can't read directory
-        }
-        return results;
-    }
-
-    try {
-        const chromePids = findChromePidFiles(dataDir);
-
-        for (const {pidFile, crawlDir} of chromePids) {
-            // Check if crawl was modified recently (still active)
-            try {
-                const crawlStats = fs.statSync(crawlDir);
-                if (crawlStats.mtimeMs > fiveMinutesAgo) {
-                    continue;  // Crawl is active, skip
-                }
-            } catch (e) {
-                continue;
-            }
-
-            // Crawl is stale, check PID
-            try {
-                const pid = parseInt(fs.readFileSync(pidFile, 'utf8').trim(), 10);
-                if (isNaN(pid) || pid <= 0) continue;
-
-                // Check if process exists
-                try {
-                    process.kill(pid, 0);
-                } catch (e) {
-                    // Process dead, remove stale PID file
-                    try { fs.unlinkSync(pidFile); } catch (e) {}
-                    continue;
-                }
-
-                // Process alive and crawl is stale - zombie!
-                console.error(`[!] Found zombie (PID ${pid}) from stale crawl ${path.basename(crawlDir)}`);
-
-                try {
-                    try { process.kill(-pid, 'SIGKILL'); } catch (e) { process.kill(pid, 'SIGKILL'); }
-                    killed++;
-                    console.error(`[+] Killed zombie (PID ${pid})`);
-                    try { fs.unlinkSync(pidFile); } catch (e) {}
-                } catch (e) {
-                    console.error(`[!] Failed to kill PID ${pid}: ${e.message}`);
-                }
-            } catch (e) {
-                // Skip invalid PID files
-            }
-        }
-    } catch (e) {
-        console.error(`[!] Error scanning for Chrome processes: ${e.message}`);
-    }
-
-    if (killed > 0) {
-        console.error(`[+] Killed ${killed} zombie process(es)`);
-    } else {
-        console.error('[+] No zombies found');
-    }
-
-    // Clean up stale SingletonLock files from persona chrome_user_data directories
-    const personasDir = path.join(dataDir, 'personas');
-    if (fs.existsSync(personasDir)) {
-        try {
-            const personas = fs.readdirSync(personasDir, { withFileTypes: true });
-            for (const persona of personas) {
-                if (!persona.isDirectory()) continue;
-
-                const userDataDir = path.join(personasDir, persona.name, 'chrome_user_data');
-                const singletonLock = path.join(userDataDir, 'SingletonLock');
-
-                if (fs.existsSync(singletonLock)) {
-                    try {
-                        fs.unlinkSync(singletonLock);
-                        console.error(`[+] Removed stale SingletonLock: ${singletonLock}`);
-                    } catch (e) {
-                        // Ignore - may be in use by active Chrome
-                    }
-                }
-            }
-        } catch (e) {
-            // Ignore errors scanning personas directory
-        }
-    }
-
-    return killed;
-}
-
-// ============================================================================
-// Chrome launching
-// ============================================================================
-
-/**
- * Launch Chromium with extensions and return connection info.
- *
- * @param {Object} options - Launch options
- * @param {string} [options.binary] - Chrome binary path (auto-detected if not provided)
- * @param {string} [options.outputDir='chrome'] - Directory for output files
- * @param {string} [options.userDataDir] - Chrome user data directory for persistent sessions
- * @param {string} [options.resolution='1440,2000'] - Window resolution
- * @param {boolean} [options.headless=true] - Run in headless mode
- * @param {boolean} [options.sandbox=true] - Enable Chrome sandbox
- * @param {boolean} [options.checkSsl=true] - Check SSL certificates
- * @param {string[]} [options.extensionPaths=[]] - Paths to unpacked extensions
- * @param {boolean} [options.killZombies=true] - Kill zombie processes first
- * @returns {Promise<Object>} - {success, cdpUrl, pid, port, process, error}
- */
-async function launchChromium(options = {}) {
-    const {
-        binary = findChromium(),
-        outputDir = 'chrome',
-        userDataDir = getEnv('CHROME_USER_DATA_DIR'),
-        resolution = getEnv('CHROME_RESOLUTION') || getEnv('RESOLUTION', '1440,2000'),
-        userAgent = getEnv('CHROME_USER_AGENT') || getEnv('USER_AGENT', ''),
-        headless = getEnvBool('CHROME_HEADLESS', true),
-        sandbox = getEnvBool('CHROME_SANDBOX', true),
-        checkSsl = getEnvBool('CHROME_CHECK_SSL_VALIDITY', getEnvBool('CHECK_SSL_VALIDITY', true)),
-        extensionPaths = [],
-        killZombies = true,
-    } = options;
-
-    if (!binary) {
-        return { success: false, error: 'Chrome binary not found' };
-    }
-
-    const downloadsDir = getEnv('CHROME_DOWNLOADS_DIR');
-
-    // Kill zombies first
-    if (killZombies) {
-        killZombieChrome();
-    }
-
-    const { width, height } = parseResolution(resolution);
-
-    // Create output directory
-    if (!fs.existsSync(outputDir)) {
-        fs.mkdirSync(outputDir, { recursive: true });
-    }
-
-    // Create user data directory if specified and doesn't exist
-    if (userDataDir) {
-        if (!fs.existsSync(userDataDir)) {
-            fs.mkdirSync(userDataDir, { recursive: true });
-            console.error(`[*] Created user data directory: ${userDataDir}`);
-        }
-        // Clean up any stale SingletonLock file from previous crashed sessions
-        const singletonLock = path.join(userDataDir, 'SingletonLock');
-        if (fs.existsSync(singletonLock)) {
-            try {
-                fs.unlinkSync(singletonLock);
-                console.error(`[*] Removed stale SingletonLock: ${singletonLock}`);
-            } catch (e) {
-                console.error(`[!] Failed to remove SingletonLock: ${e.message}`);
-            }
-        }
-        if (downloadsDir) {
-            try {
-                const defaultProfileDir = path.join(userDataDir, 'Default');
-                const prefsPath = path.join(defaultProfileDir, 'Preferences');
-                fs.mkdirSync(defaultProfileDir, { recursive: true });
-                let prefs = {};
-                if (fs.existsSync(prefsPath)) {
-                    try {
-                        prefs = JSON.parse(fs.readFileSync(prefsPath, 'utf-8'));
-                    } catch (e) {
-                        prefs = {};
-                    }
-                }
-                prefs.download = prefs.download || {};
-                prefs.download.default_directory = downloadsDir;
-                prefs.download.prompt_for_download = false;
-                fs.writeFileSync(prefsPath, JSON.stringify(prefs));
-                console.error(`[*] Set Chrome download directory: ${downloadsDir}`);
-            } catch (e) {
-                console.error(`[!] Failed to set Chrome download directory: ${e.message}`);
-            }
-        }
-    }
-
-    // Find a free port
-    const debugPort = await findFreePort();
-    console.error(`[*] Using debug port: ${debugPort}`);
-
-    // Get base Chrome args from config (static flags from CHROME_ARGS env var)
-    // These come from config.json defaults, merged by get_config() in Python
-    const baseArgs = getEnvArray('CHROME_ARGS', []);
-
-    // Get extra user-provided args
-    const extraArgs = getEnvArray('CHROME_ARGS_EXTRA', []);
-
-    // Build dynamic Chrome arguments (these must be computed at runtime)
-    const inDocker = getEnvBool('IN_DOCKER', false);
-    const dynamicArgs = [
-        // Remote debugging setup
-        `--remote-debugging-port=${debugPort}`,
-        '--remote-debugging-address=127.0.0.1',
-
-        // Sandbox settings (disable in Docker)
-        ...(sandbox ? [] : (inDocker ? ['--no-sandbox', '--disable-setuid-sandbox'] : [])),
-
-        // Docker-specific workarounds
-        '--disable-dev-shm-usage',
-
-        // Window size
-        `--window-size=${width},${height}`,
-
-        // User data directory (for persistent sessions with persona)
-        ...(userDataDir ? [`--user-data-dir=${userDataDir}`] : []),
-
-        // User agent
-        ...(userAgent ? [`--user-agent=${userAgent}`] : []),
-
-        // Headless mode
-        ...(headless ? ['--headless=new'] : []),
-
-        // SSL certificate checking
-        ...(checkSsl ? [] : ['--ignore-certificate-errors']),
-    ];
-
-    // Combine all args: base (from config) + dynamic (runtime) + extra (user overrides)
-    // Dynamic args come after base so they can override if needed
-    const chromiumArgs = [...baseArgs, ...dynamicArgs, ...extraArgs];
-
-    // Ensure keychain prompts are disabled on macOS
-    if (!chromiumArgs.includes('--use-mock-keychain')) {
-        chromiumArgs.push('--use-mock-keychain');
-    }
-
-    // Add extension loading flags
-    if (extensionPaths.length > 0) {
-        const extPathsArg = extensionPaths.join(',');
-        chromiumArgs.push(`--load-extension=${extPathsArg}`);
-        chromiumArgs.push('--enable-unsafe-extension-debugging');
-        chromiumArgs.push('--disable-features=DisableLoadExtensionCommandLineSwitch,ExtensionManifestV2Unsupported,ExtensionManifestV2Disabled');
-        console.error(`[*] Loading ${extensionPaths.length} extension(s) via --load-extension`);
-    }
-
-    chromiumArgs.push('about:blank');
-
-    // Write command script for debugging
-    writeCmdScript(path.join(outputDir, 'cmd.sh'), binary, chromiumArgs);
-
-    try {
-        console.error(`[*] Spawning Chromium (headless=${headless})...`);
-        const chromiumProcess = spawn(binary, chromiumArgs, {
-            stdio: ['ignore', 'pipe', 'pipe'],
-            detached: true,
-        });
-
-        const chromePid = chromiumProcess.pid;
-        const chromeStartTime = Date.now() / 1000;
-
-        if (chromePid) {
-            console.error(`[*] Chromium spawned (PID: ${chromePid})`);
-            writePidWithMtime(path.join(outputDir, 'chrome.pid'), chromePid, chromeStartTime);
-        }
-
-        // Pipe Chrome output to stderr
-        chromiumProcess.stdout.on('data', (data) => {
-            process.stderr.write(`[chromium:stdout] ${data}`);
-        });
-        chromiumProcess.stderr.on('data', (data) => {
-            process.stderr.write(`[chromium:stderr] ${data}`);
-        });
-
-        // Wait for debug port
-        console.error(`[*] Waiting for debug port ${debugPort}...`);
-        const versionInfo = await waitForDebugPort(debugPort, 30000);
-        const wsUrl = versionInfo.webSocketDebuggerUrl;
-        console.error(`[+] Chromium ready: ${wsUrl}`);
-
-        fs.writeFileSync(path.join(outputDir, 'cdp_url.txt'), wsUrl);
-        fs.writeFileSync(path.join(outputDir, 'port.txt'), String(debugPort));
-
-        return {
-            success: true,
-            cdpUrl: wsUrl,
-            pid: chromePid,
-            port: debugPort,
-            process: chromiumProcess,
-        };
-    } catch (e) {
-        return { success: false, error: `${e.name}: ${e.message}` };
-    }
-}
-
-/**
- * Check if a process is still running.
- * @param {number} pid - Process ID to check
- * @returns {boolean} - True if process exists
- */
-function isProcessAlive(pid) {
-    try {
-        process.kill(pid, 0);  // Signal 0 checks existence without killing
-        return true;
-    } catch (e) {
-        return false;
-    }
-}
-
-/**
- * Find all Chrome child processes for a given debug port.
- * @param {number} port - Debug port number
- * @returns {Array<number>} - Array of PIDs
- */
-function findChromeProcessesByPort(port) {
-    const { execSync } = require('child_process');
-    const pids = [];
-
-    try {
-        // Find all Chrome processes using this debug port
-        const output = execSync(
-            `ps aux | grep -i "chrome.*--remote-debugging-port=${port}" | grep -v grep | awk '{print $2}'`,
-            { encoding: 'utf8', timeout: 5000 }
-        );
-
-        for (const line of output.split('\n')) {
-            const pid = parseInt(line.trim(), 10);
-            if (!isNaN(pid) && pid > 0) {
-                pids.push(pid);
-            }
-        }
-    } catch (e) {
-        // Command failed or no processes found
-    }
-
-    return pids;
-}
-
-/**
- * Kill a Chrome process by PID.
- * Always sends SIGTERM before SIGKILL, then verifies death.
- *
- * @param {number} pid - Process ID to kill
- * @param {string} [outputDir] - Directory containing PID files to clean up
- */
-async function killChrome(pid, outputDir = null) {
-    if (!pid) return;
-
-    console.error(`[*] Killing Chrome process tree (PID ${pid})...`);
-
-    // Get debug port for finding child processes
-    let debugPort = null;
-    if (outputDir) {
-        try {
-            const portFile = path.join(outputDir, 'port.txt');
-            if (fs.existsSync(portFile)) {
-                debugPort = parseInt(fs.readFileSync(portFile, 'utf8').trim(), 10);
-            }
-        } catch (e) {}
-    }
-
-    // Step 1: SIGTERM to process group (graceful shutdown)
-    console.error(`[*] Sending SIGTERM to process group -${pid}...`);
-    try {
-        process.kill(-pid, 'SIGTERM');
-    } catch (e) {
-        try {
-            console.error(`[*] Process group kill failed, trying single process...`);
-            process.kill(pid, 'SIGTERM');
-        } catch (e2) {
-            console.error(`[!] SIGTERM failed: ${e2.message}`);
-        }
-    }
-
-    // Step 2: Wait for graceful shutdown
-    await new Promise(resolve => setTimeout(resolve, 2000));
-
-    // Step 3: Check if still alive
-    if (!isProcessAlive(pid)) {
-        console.error('[+] Chrome process terminated gracefully');
-    } else {
-        // Step 4: Force kill ENTIRE process group with SIGKILL
-        console.error(`[*] Process still alive, sending SIGKILL to process group -${pid}...`);
-        try {
-            process.kill(-pid, 'SIGKILL');  // Kill entire process group
-        } catch (e) {
-            console.error(`[!] Process group SIGKILL failed, trying single process: ${e.message}`);
-            try {
-                process.kill(pid, 'SIGKILL');
-            } catch (e2) {
-                console.error(`[!] SIGKILL failed: ${e2.message}`);
-            }
-        }
-
-        // Step 5: Wait briefly and verify death
-        await new Promise(resolve => setTimeout(resolve, 1000));
-
-        if (isProcessAlive(pid)) {
-            console.error(`[!] WARNING: Process ${pid} is unkillable (likely in UNE state)`);
-            console.error(`[!] This typically happens when Chrome crashes in kernel syscall`);
-            console.error(`[!] Process will remain as zombie until system reboot`);
-            console.error(`[!] macOS IOSurface crash creates unkillable processes in UNE state`);
-
-            // Try one more time to kill the entire process group
-            if (debugPort) {
-                const relatedPids = findChromeProcessesByPort(debugPort);
-                if (relatedPids.length > 1) {
-                    console.error(`[*] Found ${relatedPids.length} Chrome processes still running on port ${debugPort}`);
-                    console.error(`[*] Attempting final process group SIGKILL...`);
-
-                    // Try to kill each unique process group we find
-                    const processGroups = new Set();
-                    for (const relatedPid of relatedPids) {
-                        if (relatedPid !== pid) {
-                            processGroups.add(relatedPid);
-                        }
-                    }
-
-                    for (const groupPid of processGroups) {
-                        try {
-                            process.kill(-groupPid, 'SIGKILL');
-                        } catch (e) {}
-                    }
-                }
-            }
-        } else {
-            console.error('[+] Chrome process group killed successfully');
-        }
-    }
-
-    // Step 8: Clean up PID files
-    // Note: hook-specific .pid files are cleaned up by run_hook() and Snapshot.cleanup()
-    if (outputDir) {
-        try { fs.unlinkSync(path.join(outputDir, 'chrome.pid')); } catch (e) {}
-    }
-
-    console.error('[*] Chrome cleanup completed');
-}
-
-/**
- * Install Chromium using @puppeteer/browsers programmatic API.
- * Uses puppeteer's default cache location, returns the binary path.
- *
- * @param {Object} options - Install options
- * @returns {Promise<Object>} - {success, binary, version, error}
- */
-async function installChromium(options = {}) {
-    // Check if CHROME_BINARY is already set and valid
-    const configuredBinary = getEnv('CHROME_BINARY');
-    if (configuredBinary && fs.existsSync(configuredBinary)) {
-        console.error(`[+] Using configured CHROME_BINARY: ${configuredBinary}`);
-        return { success: true, binary: configuredBinary, version: null };
-    }
-
-    // Try to load @puppeteer/browsers from NODE_MODULES_DIR or system
-    let puppeteerBrowsers;
-    try {
-        if (process.env.NODE_MODULES_DIR) {
-            module.paths.unshift(process.env.NODE_MODULES_DIR);
-        }
-        puppeteerBrowsers = require('@puppeteer/browsers');
-    } catch (e) {
-        console.error(`[!] @puppeteer/browsers not found. Install it first with installPuppeteerCore.`);
-        return { success: false, error: '@puppeteer/browsers not installed' };
-    }
-
-    console.error(`[*] Installing Chromium via @puppeteer/browsers...`);
-
-    try {
-        const result = await puppeteerBrowsers.install({
-            browser: 'chromium',
-            buildId: 'latest',
-        });
-
-        const binary = result.executablePath;
-        const version = result.buildId;
-
-        if (!binary || !fs.existsSync(binary)) {
-            console.error(`[!] Chromium binary not found at: ${binary}`);
-            return { success: false, error: `Chromium binary not found at: ${binary}` };
-        }
-
-        console.error(`[+] Chromium installed: ${binary}`);
-        return { success: true, binary, version };
-    } catch (e) {
-        console.error(`[!] Failed to install Chromium: ${e.message}`);
-        return { success: false, error: e.message };
-    }
-}
-
-/**
- * Install puppeteer-core npm package.
- *
- * @param {Object} options - Install options
- * @param {string} [options.npmPrefix] - npm prefix directory (default: DATA_DIR/lib/<arch>/npm or ./node_modules parent)
- * @param {number} [options.timeout=60000] - Timeout in milliseconds
- * @returns {Promise<Object>} - {success, path, error}
- */
-async function installPuppeteerCore(options = {}) {
-    const arch = `${process.arch}-${process.platform}`;
-    const defaultPrefix = path.join(getEnv('LIB_DIR', getEnv('DATA_DIR', '.')), 'npm');
-    const {
-        npmPrefix = defaultPrefix,
-        timeout = 60000,
-    } = options;
-
-    const nodeModulesDir = path.join(npmPrefix, 'node_modules');
-    const puppeteerPath = path.join(nodeModulesDir, 'puppeteer-core');
-
-    // Check if already installed
-    if (fs.existsSync(puppeteerPath)) {
-        console.error(`[+] puppeteer-core already installed: ${puppeteerPath}`);
-        return { success: true, path: puppeteerPath };
-    }
-
-    console.error(`[*] Installing puppeteer-core to ${npmPrefix}...`);
-
-    // Create directory
-    if (!fs.existsSync(npmPrefix)) {
-        fs.mkdirSync(npmPrefix, { recursive: true });
-    }
-
-    try {
-        const { execSync } = require('child_process');
-        execSync(
-            `npm install --prefix "${npmPrefix}" puppeteer-core`,
-            { encoding: 'utf8', timeout, stdio: ['pipe', 'pipe', 'pipe'] }
-        );
-        console.error(`[+] puppeteer-core installed successfully`);
-        return { success: true, path: puppeteerPath };
-    } catch (e) {
-        console.error(`[!] Failed to install puppeteer-core: ${e.message}`);
-        return { success: false, error: e.message };
-    }
-}
-
-// Try to import unzipper, fallback to system unzip if not available
-let unzip = null;
-try {
-    const unzipper = require('unzipper');
-    unzip = async (sourcePath, destPath) => {
-        const stream = fs.createReadStream(sourcePath).pipe(unzipper.Extract({ path: destPath }));
-        return stream.promise();
-    };
-} catch (err) {
-    // Will use system unzip command as fallback
-}
-
-/**
- * Compute the extension ID from the unpacked path.
- * Chrome uses a SHA256 hash of the unpacked extension directory path to compute a dynamic id.
- *
- * @param {string} unpacked_path - Path to the unpacked extension directory
- * @returns {string} - 32-character extension ID
- */
-function getExtensionId(unpacked_path) {
-    let resolved_path = unpacked_path;
-    try {
-        resolved_path = fs.realpathSync(unpacked_path);
-    } catch (err) {
-        // Use the provided path if realpath fails
-        resolved_path = unpacked_path;
-    }
-    // Chrome uses a SHA256 hash of the unpacked extension directory path
-    const hash = crypto.createHash('sha256');
-    hash.update(Buffer.from(resolved_path, 'utf-8'));
-
-    // Convert first 32 hex chars to characters in the range 'a'-'p'
-    const detected_extension_id = Array.from(hash.digest('hex'))
-        .slice(0, 32)
-        .map(i => String.fromCharCode(parseInt(i, 16) + 'a'.charCodeAt(0)))
-        .join('');
-
-    return detected_extension_id;
-}
-
-/**
- * Download and install a Chrome extension from the Chrome Web Store.
- *
- * @param {Object} extension - Extension metadata object
- * @param {string} extension.webstore_id - Chrome Web Store extension ID
- * @param {string} extension.name - Human-readable extension name
- * @param {string} extension.crx_url - URL to download the CRX file
- * @param {string} extension.crx_path - Local path to save the CRX file
- * @param {string} extension.unpacked_path - Path to extract the extension
- * @returns {Promise<boolean>} - True if installation succeeded
- */
-async function installExtension(extension) {
-    const manifest_path = path.join(extension.unpacked_path, 'manifest.json');
-
-    // Download CRX file if not already downloaded
-    if (!fs.existsSync(manifest_path) && !fs.existsSync(extension.crx_path)) {
-        console.log(`[🛠️] Downloading missing extension ${extension.name} ${extension.webstore_id} -> ${extension.crx_path}`);
-
-        try {
-            // Ensure parent directory exists
-            const crxDir = path.dirname(extension.crx_path);
-            if (!fs.existsSync(crxDir)) {
-                fs.mkdirSync(crxDir, { recursive: true });
-            }
-
-            // Download CRX file from Chrome Web Store
-            const response = await fetch(extension.crx_url);
-
-            if (!response.ok) {
-                console.warn(`[⚠️] Failed to download extension ${extension.name}: HTTP ${response.status}`);
-                return false;
-            }
-
-            if (response.body) {
-                const crx_file = fs.createWriteStream(extension.crx_path);
-                const crx_stream = Readable.fromWeb(response.body);
-                await finished(crx_stream.pipe(crx_file));
-            } else {
-                console.warn(`[⚠️] Failed to download extension ${extension.name}: No response body`);
-                return false;
-            }
-        } catch (err) {
-            console.error(`[❌] Failed to download extension ${extension.name}:`, err);
-            return false;
-        }
-    }
-
-    // Unzip CRX file to unpacked_path (CRX files have extra header bytes but unzip handles it)
-    await fs.promises.mkdir(extension.unpacked_path, { recursive: true });
-
-    try {
-        // Use -q to suppress warnings about extra bytes in CRX header
-        await execAsync(`/usr/bin/unzip -q -o "${extension.crx_path}" -d "${extension.unpacked_path}"`);
-    } catch (err1) {
-        // unzip may return non-zero even on success due to CRX header warning, check if manifest exists
-        if (!fs.existsSync(manifest_path)) {
-            if (unzip) {
-                // Fallback to unzipper library
-                try {
-                    await unzip(extension.crx_path, extension.unpacked_path);
-                } catch (err2) {
-                    console.error(`[❌] Failed to unzip ${extension.crx_path}:`, err2.message);
-                    return false;
-                }
-            } else {
-                console.error(`[❌] Failed to unzip ${extension.crx_path}:`, err1.message);
-                return false;
-            }
-        }
-    }
-
-    if (!fs.existsSync(manifest_path)) {
-        console.error(`[❌] Failed to install ${extension.crx_path}: could not find manifest.json in unpacked_path`);
-        return false;
-    }
-
-    return true;
-}
-
-/**
- * Load or install a Chrome extension, computing all metadata.
- *
- * @param {Object} ext - Partial extension metadata (at minimum: webstore_id or unpacked_path)
- * @param {string} [ext.webstore_id] - Chrome Web Store extension ID
- * @param {string} [ext.name] - Human-readable extension name
- * @param {string} [ext.unpacked_path] - Path to unpacked extension
- * @param {string} [extensions_dir] - Directory to store extensions
- * @returns {Promise<Object>} - Complete extension metadata object
- */
-async function loadOrInstallExtension(ext, extensions_dir = null) {
-    if (!(ext.webstore_id || ext.unpacked_path)) {
-        throw new Error('Extension must have either {webstore_id} or {unpacked_path}');
-    }
-
-    // Determine extensions directory
-    // Use provided dir, or fall back to getExtensionsDir() which handles env vars and defaults
-    const EXTENSIONS_DIR = extensions_dir || getExtensionsDir();
-
-    // Set statically computable extension metadata
-    ext.webstore_id = ext.webstore_id || ext.id;
-    ext.name = ext.name || ext.webstore_id;
-    ext.webstore_url = ext.webstore_url || `https://chromewebstore.google.com/detail/${ext.webstore_id}`;
-    ext.crx_url = ext.crx_url || `https://clients2.google.com/service/update2/crx?response=redirect&prodversion=1230&acceptformat=crx3&x=id%3D${ext.webstore_id}%26uc`;
-    ext.crx_path = ext.crx_path || path.join(EXTENSIONS_DIR, `${ext.webstore_id}__${ext.name}.crx`);
-    ext.unpacked_path = ext.unpacked_path || path.join(EXTENSIONS_DIR, `${ext.webstore_id}__${ext.name}`);
-
-    const manifest_path = path.join(ext.unpacked_path, 'manifest.json');
-    ext.read_manifest = () => JSON.parse(fs.readFileSync(manifest_path, 'utf-8'));
-    ext.read_version = () => fs.existsSync(manifest_path) && ext.read_manifest()?.version || null;
-
-    // If extension is not installed, download and unpack it
-    if (!ext.read_version()) {
-        await installExtension(ext);
-    }
-
-    // Autodetect ID from filesystem path (unpacked extensions don't have stable IDs)
-    ext.id = getExtensionId(ext.unpacked_path);
-    ext.version = ext.read_version();
-
-    if (!ext.version) {
-        console.warn(`[❌] Unable to detect ID and version of installed extension ${ext.unpacked_path}`);
-    } else {
-        console.log(`[➕] Installed extension ${ext.name} (${ext.version})... ${ext.unpacked_path}`);
-    }
-
-    return ext;
-}
-
-/**
- * Check if a Puppeteer target is an extension background page/service worker.
- *
- * @param {Object} target - Puppeteer target object
- * @returns {Promise<Object>} - Object with target_is_bg, extension_id, manifest_version, etc.
- */
-async function isTargetExtension(target) {
-    let target_type;
-    let target_ctx;
-    let target_url;
-
-    try {
-        target_type = target.type();
-        target_ctx = (await target.worker()) || (await target.page()) || null;
-        target_url = target.url() || target_ctx?.url() || null;
-    } catch (err) {
-        if (String(err).includes('No target with given id found')) {
-            // Target closed during check, ignore harmless race condition
-            target_type = 'closed';
-            target_ctx = null;
-            target_url = 'about:closed';
-        } else {
-            throw err;
-        }
-    }
-
-    // Check if this is an extension background page or service worker
-    const is_chrome_extension = target_url?.startsWith('chrome-extension://');
-    const is_background_page = target_type === 'background_page';
-    const is_service_worker = target_type === 'service_worker';
-    const target_is_bg = is_chrome_extension && (is_background_page || is_service_worker);
-
-    let extension_id = null;
-    let manifest_version = null;
-    let manifest = null;
-    let manifest_name = null;
-    const target_is_extension = is_chrome_extension || target_is_bg;
-
-    if (target_is_extension) {
-        try {
-            extension_id = target_url?.split('://')[1]?.split('/')[0] || null;
-
-            if (target_ctx) {
-                manifest = await target_ctx.evaluate(() => chrome.runtime.getManifest());
-                manifest_version = manifest?.manifest_version || null;
-                manifest_name = manifest?.name || null;
-            }
-        } catch (err) {
-            // Failed to get extension metadata
-        }
-    }
-
-    return {
-        target_is_extension,
-        target_is_bg,
-        target_type,
-        target_ctx,
-        target_url,
-        extension_id,
-        manifest_version,
-        manifest,
-        manifest_name,
-    };
-}
-
-/**
- * Load extension metadata and connection handlers from a browser target.
- *
- * @param {Array} extensions - Array of extension metadata objects to update
- * @param {Object} target - Puppeteer target object
- * @returns {Promise<Object|null>} - Updated extension object or null if not an extension
- */
-async function loadExtensionFromTarget(extensions, target) {
-    const {
-        target_is_bg,
-        target_is_extension,
-        target_type,
-        target_ctx,
-        target_url,
-        extension_id,
-        manifest_version,
-    } = await isTargetExtension(target);
-
-    if (!(target_is_bg && extension_id && target_ctx)) {
-        return null;
-    }
-
-    // Find matching extension in our list
-    const extension = extensions.find(ext => ext.id === extension_id);
-    if (!extension) {
-        console.warn(`[⚠️] Found loaded extension ${extension_id} that's not in CHROME_EXTENSIONS list`);
-        return null;
-    }
-
-    // Load manifest from the extension context
-    let manifest = null;
-    try {
-        manifest = await target_ctx.evaluate(() => chrome.runtime.getManifest());
-    } catch (err) {
-        console.error(`[❌] Failed to read manifest for extension ${extension_id}:`, err);
-        return null;
-    }
-
-    // Create dispatch methods for communicating with the extension
-    const new_extension = {
-        ...extension,
-        target,
-        target_type,
-        target_url,
-        manifest,
-        manifest_version,
-
-        // Trigger extension toolbar button click
-        dispatchAction: async (tab) => {
-            return await target_ctx.evaluate(async (tab) => {
-                tab = tab || (await new Promise((resolve) =>
-                    chrome.tabs.query({ currentWindow: true, active: true }, ([tab]) => resolve(tab))
-                ));
-
-                // Manifest V3: chrome.action
-                if (chrome.action?.onClicked?.dispatch) {
-                    return await chrome.action.onClicked.dispatch(tab);
-                }
-
-                // Manifest V2: chrome.browserAction
-                if (chrome.browserAction?.onClicked?.dispatch) {
-                    return await chrome.browserAction.onClicked.dispatch(tab);
-                }
-
-                throw new Error('Extension action dispatch not available');
-            }, tab || null);
-        },
-
-        // Send message to extension
-        dispatchMessage: async (message, options = {}) => {
-            return await target_ctx.evaluate((msg, opts) => {
-                return new Promise((resolve) => {
-                    chrome.runtime.sendMessage(msg, opts, (response) => {
-                        resolve(response);
-                    });
-                });
-            }, message, options);
-        },
-
-        // Trigger extension command (keyboard shortcut)
-        dispatchCommand: async (command) => {
-            return await target_ctx.evaluate((cmd) => {
-                return new Promise((resolve) => {
-                    chrome.commands.onCommand.addListener((receivedCommand) => {
-                        if (receivedCommand === cmd) {
-                            resolve({ success: true, command: receivedCommand });
-                        }
-                    });
-                    // Note: Actually triggering commands programmatically is not directly supported
-                    // This would need to be done via CDP or keyboard simulation
-                });
-            }, command);
-        },
-    };
-
-    // Update the extension in the array
-    Object.assign(extension, new_extension);
-
-    console.log(`[🔌] Connected to extension ${extension.name} (${extension.version})`);
-
-    return new_extension;
-}
-
-/**
- * Install all extensions in the list if not already installed.
- *
- * @param {Array} extensions - Array of extension metadata objects
- * @param {string} [extensions_dir] - Directory to store extensions
- * @returns {Promise<Array>} - Array of installed extension objects
- */
-async function installAllExtensions(extensions, extensions_dir = null) {
-    console.log(`[⚙️] Installing ${extensions.length} chrome extensions...`);
-
-    for (const extension of extensions) {
-        await loadOrInstallExtension(extension, extensions_dir);
-    }
-
-    return extensions;
-}
-
-/**
- * Load and connect to all extensions from a running browser.
- *
- * @param {Object} browser - Puppeteer browser instance
- * @param {Array} extensions - Array of extension metadata objects
- * @returns {Promise<Array>} - Array of loaded extension objects with connection handlers
- */
-async function loadAllExtensionsFromBrowser(browser, extensions) {
-    console.log(`[⚙️] Loading ${extensions.length} chrome extensions from browser...`);
-
-    // Find loaded extensions at runtime by examining browser targets
-    for (const target of browser.targets()) {
-        await loadExtensionFromTarget(extensions, target);
-    }
-
-    return extensions;
-}
-
-/**
- * Load extension manifest.json file
- *
- * @param {string} unpacked_path - Path to unpacked extension directory
- * @returns {object|null} - Parsed manifest object or null if not found/invalid
- */
-function loadExtensionManifest(unpacked_path) {
-    const manifest_path = path.join(unpacked_path, 'manifest.json');
-
-    if (!fs.existsSync(manifest_path)) {
-        return null;
-    }
-
-    try {
-        const manifest_content = fs.readFileSync(manifest_path, 'utf-8');
-        return JSON.parse(manifest_content);
-    } catch (error) {
-        // Invalid JSON or read error
-        return null;
-    }
-}
-
-/**
- * @deprecated Use puppeteer's enableExtensions option instead.
- *
- * Generate Chrome launch arguments for loading extensions.
- * NOTE: This is deprecated. Use puppeteer.launch({ pipe: true, enableExtensions: [paths] }) instead.
- *
- * @param {Array} extensions - Array of extension metadata objects
- * @returns {Array<string>} - Chrome CLI arguments for loading extensions
- */
-function getExtensionLaunchArgs(extensions) {
-    console.warn('[DEPRECATED] getExtensionLaunchArgs is deprecated. Use puppeteer enableExtensions option instead.');
-    if (!extensions || extensions.length === 0) {
-        return [];
-    }
-
-    // Filter out extensions without unpacked_path first
-    const validExtensions = extensions.filter(ext => ext.unpacked_path);
-
-    const unpacked_paths = validExtensions.map(ext => ext.unpacked_path);
-    // Use computed id (from path hash) for allowlisting, as that's what Chrome uses for unpacked extensions
-    // Fall back to webstore_id if computed id not available
-    const extension_ids = validExtensions.map(ext => ext.id || getExtensionId(ext.unpacked_path));
-
-    return [
-        `--load-extension=${unpacked_paths.join(',')}`,
-        `--allowlisted-extension-id=${extension_ids.join(',')}`,
-        '--allow-legacy-extension-manifests',
-        '--disable-extensions-auto-update',
-    ];
-}
-
-/**
- * Get extension paths for use with puppeteer's enableExtensions option.
- * Following puppeteer best practices: https://pptr.dev/guides/chrome-extensions
- *
- * @param {Array} extensions - Array of extension metadata objects
- * @returns {Array<string>} - Array of extension unpacked paths
- */
-function getExtensionPaths(extensions) {
-    if (!extensions || extensions.length === 0) {
-        return [];
-    }
-    return extensions
-        .filter(ext => ext.unpacked_path)
-        .map(ext => ext.unpacked_path);
-}
-
-/**
- * Wait for an extension target to be available in the browser.
- * Following puppeteer best practices for accessing extension contexts.
- *
- * For Manifest V3 extensions (service workers):
- *   const worker = await waitForExtensionTarget(browser, extensionId);
- *   // worker is a WebWorker context
- *
- * For Manifest V2 extensions (background pages):
- *   const page = await waitForExtensionTarget(browser, extensionId);
- *   // page is a Page context
- *
- * @param {Object} browser - Puppeteer browser instance
- * @param {string} extensionId - Extension ID to wait for (computed from path hash)
- * @param {number} [timeout=30000] - Timeout in milliseconds
- * @returns {Promise<Object>} - Worker or Page context for the extension
- */
-async function waitForExtensionTarget(browser, extensionId, timeout = 30000) {
-    // Try to find service worker first (Manifest V3)
-    try {
-        const workerTarget = await browser.waitForTarget(
-            target => target.type() === 'service_worker' &&
-                target.url().includes(`chrome-extension://${extensionId}`),
-            { timeout }
-        );
-        const worker = await workerTarget.worker();
-        if (worker) return worker;
-    } catch (err) {
-        // No service worker found, try background page
-    }
-
-    // Try background page (Manifest V2)
-    try {
-        const backgroundTarget = await browser.waitForTarget(
-            target => target.type() === 'background_page' &&
-                target.url().includes(`chrome-extension://${extensionId}`),
-            { timeout }
-        );
-        const page = await backgroundTarget.page();
-        if (page) return page;
-    } catch (err) {
-        // No background page found
-    }
-
-    // Try any extension page as fallback
-    const extTarget = await browser.waitForTarget(
-        target => target.url().startsWith(`chrome-extension://${extensionId}`),
-        { timeout }
-    );
-
-    // Return worker or page depending on target type
-    if (extTarget.type() === 'service_worker') {
-        return await extTarget.worker();
-    }
-    return await extTarget.page();
-}
-
-/**
- * Get all loaded extension targets from a browser.
- *
- * @param {Object} browser - Puppeteer browser instance
- * @returns {Array<Object>} - Array of extension target info objects
- */
-function getExtensionTargets(browser) {
-    return browser.targets()
-        .filter(target =>
-            target.url().startsWith('chrome-extension://') ||
-            target.type() === 'service_worker' ||
-            target.type() === 'background_page'
-        )
-        .map(target => ({
-            type: target.type(),
-            url: target.url(),
-            extensionId: target.url().includes('chrome-extension://')
-                ? target.url().split('chrome-extension://')[1]?.split('/')[0]
-                : null,
-        }));
-}
-
-/**
- * Find Chromium binary path.
- * Checks CHROME_BINARY env var first, then falls back to system locations.
- *
- * @returns {string|null} - Absolute path to browser binary or null if not found
- */
-function findChromium() {
-    const { execSync } = require('child_process');
-
-    // Helper to validate a binary by running --version
-    const validateBinary = (binaryPath) => {
-        if (!binaryPath || !fs.existsSync(binaryPath)) return false;
-        try {
-            execSync(`"${binaryPath}" --version`, { encoding: 'utf8', timeout: 5000, stdio: 'pipe' });
-            return true;
-        } catch (e) {
-            return false;
-        }
-    };
-
-    // 1. Check CHROME_BINARY env var first
-    const chromeBinary = getEnv('CHROME_BINARY');
-    if (chromeBinary) {
-        const absPath = path.resolve(chromeBinary);
-        if (absPath.includes('Google Chrome') || absPath.includes('google-chrome')) {
-            console.error('[!] Warning: CHROME_BINARY points to Chrome. Chromium is required for extension support.');
-        } else if (validateBinary(absPath)) {
-            return absPath;
-        }
-        console.error(`[!] Warning: CHROME_BINARY="${chromeBinary}" is not valid`);
-    }
-
-    // 2. Warn that no CHROME_BINARY is configured, searching fallbacks
-    if (!chromeBinary) {
-        console.error('[!] Warning: CHROME_BINARY not set, searching system locations...');
-    }
-
-    // Helper to find Chromium in @puppeteer/browsers directory structure
-    const findInPuppeteerDir = (baseDir) => {
-        if (!fs.existsSync(baseDir)) return null;
-        try {
-            const versions = fs.readdirSync(baseDir);
-            for (const version of versions.sort().reverse()) {
-                const versionDir = path.join(baseDir, version);
-                const candidates = [
-                    path.join(versionDir, 'chrome-mac-arm64/Chromium.app/Contents/MacOS/Chromium'),
-                    path.join(versionDir, 'chrome-mac/Chromium.app/Contents/MacOS/Chromium'),
-                    path.join(versionDir, 'chrome-mac-x64/Chromium.app/Contents/MacOS/Chromium'),
-                    path.join(versionDir, 'chrome-linux64/chrome'),
-                    path.join(versionDir, 'chrome-linux/chrome'),
-                ];
-                for (const c of candidates) {
-                    if (fs.existsSync(c)) return c;
-                }
-            }
-        } catch (e) {}
-        return null;
-    };
-
-    // 3. Search fallback locations (Chromium only)
-    const fallbackLocations = [
-        // System Chromium
-        '/Applications/Chromium.app/Contents/MacOS/Chromium',
-        '/usr/bin/chromium',
-        '/usr/bin/chromium-browser',
-        // Puppeteer cache
-        path.join(process.env.HOME || '', '.cache/puppeteer/chromium'),
-        path.join(process.env.HOME || '', '.cache/puppeteer'),
-    ];
-
-    for (const loc of fallbackLocations) {
-        // Check if it's a puppeteer cache dir
-        if (loc.includes('.cache/puppeteer')) {
-            const binary = findInPuppeteerDir(loc);
-            if (binary && validateBinary(binary)) {
-                return binary;
-            }
-        } else if (validateBinary(loc)) {
-            return loc;
-        }
-    }
-
-    return null;
-}
-
-/**
- * Find Chromium binary path only (never Chrome/Brave/Edge).
- * Prefers CHROME_BINARY if set, then Chromium.
- *
- * @returns {string|null} - Absolute path or command name to browser binary
- */
-function findAnyChromiumBinary() {
-    const chromiumBinary = findChromium();
-    if (chromiumBinary) return chromiumBinary;
-    return null;
-}
-
-// ============================================================================
-// Shared Extension Installer Utilities
-// ============================================================================
-
-/**
- * Get the extensions directory path.
- * Centralized path calculation used by extension installers and chrome launch.
- *
- * Path is derived from environment variables in this priority:
- * 1. CHROME_EXTENSIONS_DIR (explicit override)
- * 2. DATA_DIR/personas/ACTIVE_PERSONA/chrome_extensions (default)
- *
- * @returns {string} - Absolute path to extensions directory
- */
-function getExtensionsDir() {
-    const dataDir = getEnv('DATA_DIR', '.');
-    const persona = getEnv('ACTIVE_PERSONA', 'Default');
-    return getEnv('CHROME_EXTENSIONS_DIR') ||
-        path.join(dataDir, 'personas', persona, 'chrome_extensions');
-}
-
-/**
- * Get machine type string for platform-specific paths.
- * Matches Python's archivebox.config.paths.get_machine_type()
- *
- * @returns {string} - Machine type (e.g., 'x86_64-linux', 'arm64-darwin')
- */
-function getMachineType() {
-    if (process.env.MACHINE_TYPE) {
-        return process.env.MACHINE_TYPE;
-    }
-
-    let machine = process.arch;
-    const system = process.platform;
-
-    // Normalize machine type to match Python's convention
-    if (machine === 'arm64' || machine === 'aarch64') {
-        machine = 'arm64';
-    } else if (machine === 'x64' || machine === 'x86_64' || machine === 'amd64') {
-        machine = 'x86_64';
-    } else if (machine === 'ia32' || machine === 'x86') {
-        machine = 'x86';
-    }
-
-    return `${machine}-${system}`;
-}
-
-/**
- * Get LIB_DIR path for platform-specific binaries.
- * Returns DATA_DIR/lib/MACHINE_TYPE/
- *
- * @returns {string} - Absolute path to lib directory
- */
-function getLibDir() {
-    if (process.env.LIB_DIR) {
-        return path.resolve(process.env.LIB_DIR);
-    }
-    const dataDir = getEnv('DATA_DIR', './data');
-    const machineType = getMachineType();
-    return path.resolve(path.join(dataDir, 'lib', machineType));
-}
-
-/**
- * Get NODE_MODULES_DIR path for npm packages.
- * Returns LIB_DIR/npm/node_modules/
- *
- * @returns {string} - Absolute path to node_modules directory
- */
-function getNodeModulesDir() {
-    if (process.env.NODE_MODULES_DIR) {
-        return path.resolve(process.env.NODE_MODULES_DIR);
-    }
-    return path.resolve(path.join(getLibDir(), 'npm', 'node_modules'));
-}
-
-/**
- * Get all test environment paths as a JSON object.
- * This is the single source of truth for path calculations - Python calls this
- * to avoid duplicating path logic.
- *
- * @returns {Object} - Object with all test environment paths
- */
-function getTestEnv() {
-    const dataDir = getEnv('DATA_DIR', './data');
-    const machineType = getMachineType();
-    const libDir = getLibDir();
-    const nodeModulesDir = getNodeModulesDir();
-
-    return {
-        DATA_DIR: dataDir,
-        MACHINE_TYPE: machineType,
-        LIB_DIR: libDir,
-        NODE_MODULES_DIR: nodeModulesDir,
-        NODE_PATH: nodeModulesDir,  // Node.js uses NODE_PATH for module resolution
-        NPM_BIN_DIR: path.join(libDir, 'npm', '.bin'),
-        CHROME_EXTENSIONS_DIR: getExtensionsDir(),
-    };
-}
-
-/**
- * Install a Chrome extension with caching support.
- *
- * This is the main entry point for extension installer hooks. It handles:
- * - Checking for cached extension metadata
- * - Installing the extension if not cached
- * - Writing cache file for future runs
- *
- * @param {Object} extension - Extension metadata object
- * @param {string} extension.webstore_id - Chrome Web Store extension ID
- * @param {string} extension.name - Human-readable extension name (used for cache file)
- * @param {Object} [options] - Options
- * @param {string} [options.extensionsDir] - Override extensions directory
- * @param {boolean} [options.quiet=false] - Suppress info logging
- * @returns {Promise<Object|null>} - Installed extension metadata or null on failure
- */
-async function installExtensionWithCache(extension, options = {}) {
-    const {
-        extensionsDir = getExtensionsDir(),
-        quiet = false,
-    } = options;
-
-    const cacheFile = path.join(extensionsDir, `${extension.name}.extension.json`);
-
-    // Check if extension is already cached and valid
-    if (fs.existsSync(cacheFile)) {
-        try {
-            const cached = JSON.parse(fs.readFileSync(cacheFile, 'utf-8'));
-            const manifestPath = path.join(cached.unpacked_path, 'manifest.json');
-
-            if (fs.existsSync(manifestPath)) {
-                if (!quiet) {
-                    console.log(`[*] ${extension.name} extension already installed (using cache)`);
-                }
-                return cached;
-            }
-        } catch (e) {
-            // Cache file corrupted, re-install
-            console.warn(`[⚠️] Extension cache corrupted for ${extension.name}, re-installing...`);
-        }
-    }
-
-    // Install extension
-    if (!quiet) {
-        console.log(`[*] Installing ${extension.name} extension...`);
-    }
-
-    const installedExt = await loadOrInstallExtension(extension, extensionsDir);
-
-    if (!installedExt?.version) {
-        console.error(`[❌] Failed to install ${extension.name} extension`);
-        return null;
-    }
-
-    // Write cache file
-    try {
-        await fs.promises.mkdir(extensionsDir, { recursive: true });
-        await fs.promises.writeFile(cacheFile, JSON.stringify(installedExt, null, 2));
-        if (!quiet) {
-            console.log(`[+] Extension metadata written to ${cacheFile}`);
-        }
-    } catch (e) {
-        console.warn(`[⚠️] Failed to write cache file: ${e.message}`);
-    }
-
-    if (!quiet) {
-        console.log(`[+] ${extension.name} extension installed`);
-    }
-
-    return installedExt;
-}
-
-// ============================================================================
-// Snapshot Hook Utilities (for CDP-based plugins like ssl, responses, dns)
-// ============================================================================
-
-/**
- * Parse command line arguments into an object.
- * Handles --key=value and --flag formats.
- *
- * @returns {Object} - Parsed arguments object
- */
-function parseArgs() {
-    const args = {};
-    process.argv.slice(2).forEach(arg => {
-        if (arg.startsWith('--')) {
-            const [key, ...valueParts] = arg.slice(2).split('=');
-            args[key.replace(/-/g, '_')] = valueParts.join('=') || true;
-        }
-    });
-    return args;
-}
-
-/**
- * Wait for Chrome session files to be ready.
- * Polls for cdp_url.txt and target_id.txt in the chrome session directory.
- *
- * @param {string} chromeSessionDir - Path to chrome session directory (e.g., '../chrome')
- * @param {number} [timeoutMs=60000] - Timeout in milliseconds
- * @returns {Promise<boolean>} - True if files are ready, false if timeout
- */
-async function waitForChromeSession(chromeSessionDir, timeoutMs = 60000) {
-    const cdpFile = path.join(chromeSessionDir, 'cdp_url.txt');
-    const targetIdFile = path.join(chromeSessionDir, 'target_id.txt');
-    const startTime = Date.now();
-
-    while (Date.now() - startTime < timeoutMs) {
-        if (fs.existsSync(cdpFile) && fs.existsSync(targetIdFile)) {
-            return true;
-        }
-        await new Promise(resolve => setTimeout(resolve, 100));
-    }
-
-    return false;
-}
-
-/**
- * Read CDP WebSocket URL from chrome session directory.
- *
- * @param {string} chromeSessionDir - Path to chrome session directory
- * @returns {string|null} - CDP URL or null if not found
- */
-function readCdpUrl(chromeSessionDir) {
-    const cdpFile = path.join(chromeSessionDir, 'cdp_url.txt');
-    if (fs.existsSync(cdpFile)) {
-        return fs.readFileSync(cdpFile, 'utf8').trim();
-    }
-    return null;
-}
-
-/**
- * Read target ID from chrome session directory.
- *
- * @param {string} chromeSessionDir - Path to chrome session directory
- * @returns {string|null} - Target ID or null if not found
- */
-function readTargetId(chromeSessionDir) {
-    const targetIdFile = path.join(chromeSessionDir, 'target_id.txt');
-    if (fs.existsSync(targetIdFile)) {
-        return fs.readFileSync(targetIdFile, 'utf8').trim();
-    }
-    return null;
-}
-
-/**
- * Connect to Chrome browser and find the target page.
- * This is a high-level utility that handles all the connection logic:
- * 1. Wait for chrome session files
- * 2. Connect to browser via CDP
- * 3. Find the target page by ID
- *
- * @param {Object} options - Connection options
- * @param {string} [options.chromeSessionDir='../chrome'] - Path to chrome session directory
- * @param {number} [options.timeoutMs=60000] - Timeout for waiting
- * @param {Object} [options.puppeteer] - Puppeteer module (must be passed in)
- * @returns {Promise<Object>} - { browser, page, targetId, cdpUrl }
- * @throws {Error} - If connection fails or page not found
- */
-async function connectToPage(options = {}) {
-    const {
-        chromeSessionDir = '../chrome',
-        timeoutMs = 60000,
-        puppeteer,
-    } = options;
-
-    if (!puppeteer) {
-        throw new Error('puppeteer module must be passed to connectToPage()');
-    }
-
-    // Wait for chrome session to be ready
-    const sessionReady = await waitForChromeSession(chromeSessionDir, timeoutMs);
-    if (!sessionReady) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-
-    // Read session files
-    const cdpUrl = readCdpUrl(chromeSessionDir);
-    if (!cdpUrl) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-
-    const targetId = readTargetId(chromeSessionDir);
-
-    // Connect to browser
-    const browser = await puppeteer.connect({ browserWSEndpoint: cdpUrl });
-
-    // Find the target page
-    const pages = await browser.pages();
-    let page = null;
-
-    if (targetId) {
-        page = pages.find(p => {
-            const target = p.target();
-            return target && target._targetId === targetId;
-        });
-    }
-
-    // Fallback to last page if target not found
-    if (!page) {
-        page = pages[pages.length - 1];
-    }
-
-    if (!page) {
-        throw new Error('No page found in browser');
-    }
-
-    return { browser, page, targetId, cdpUrl };
-}
-
-/**
- * Wait for page navigation to complete.
- * Polls for page_loaded.txt marker file written by chrome_navigate.
- *
- * @param {string} chromeSessionDir - Path to chrome session directory
- * @param {number} [timeoutMs=120000] - Timeout in milliseconds
- * @param {number} [postLoadDelayMs=0] - Additional delay after page load marker
- * @returns {Promise<void>}
- * @throws {Error} - If timeout waiting for navigation
- */
-async function waitForPageLoaded(chromeSessionDir, timeoutMs = 120000, postLoadDelayMs = 0) {
-    const pageLoadedMarker = path.join(chromeSessionDir, 'page_loaded.txt');
-    const pollInterval = 100;
-    let waitTime = 0;
-
-    while (!fs.existsSync(pageLoadedMarker) && waitTime < timeoutMs) {
-        await new Promise(resolve => setTimeout(resolve, pollInterval));
-        waitTime += pollInterval;
-    }
-
-    if (!fs.existsSync(pageLoadedMarker)) {
-        throw new Error('Timeout waiting for navigation (chrome_navigate did not complete)');
-    }
-
-    // Optional post-load delay for late responses
-    if (postLoadDelayMs > 0) {
-        await new Promise(resolve => setTimeout(resolve, postLoadDelayMs));
-    }
-}
-
-// Export all functions
-module.exports = {
-    // Environment helpers
-    getEnv,
-    getEnvBool,
-    getEnvInt,
-    getEnvArray,
-    parseResolution,
-    // PID file management
-    writePidWithMtime,
-    writeCmdScript,
-    // Port management
-    findFreePort,
-    waitForDebugPort,
-    // Zombie cleanup
-    killZombieChrome,
-    // Chrome launching
-    launchChromium,
-    killChrome,
-    // Chromium install
-    installChromium,
-    installPuppeteerCore,
-    // Chromium binary finding
-    findChromium,
-    findAnyChromiumBinary,
-    // Extension utilities
-    getExtensionId,
-    loadExtensionManifest,
-    installExtension,
-    loadOrInstallExtension,
-    isTargetExtension,
-    loadExtensionFromTarget,
-    installAllExtensions,
-    loadAllExtensionsFromBrowser,
-    // New puppeteer best-practices helpers
-    getExtensionPaths,
-    waitForExtensionTarget,
-    getExtensionTargets,
-    // Shared path utilities (single source of truth for Python/JS)
-    getMachineType,
-    getLibDir,
-    getNodeModulesDir,
-    getExtensionsDir,
-    getTestEnv,
-    // Shared extension installer utilities
-    installExtensionWithCache,
-    // Deprecated - use enableExtensions option instead
-    getExtensionLaunchArgs,
-    // Snapshot hook utilities (for CDP-based plugins)
-    parseArgs,
-    waitForChromeSession,
-    readCdpUrl,
-    readTargetId,
-    connectToPage,
-    waitForPageLoaded,
-};
-
-// CLI usage
-if (require.main === module) {
-    const args = process.argv.slice(2);
-
-    if (args.length === 0) {
-        console.log('Usage: chrome_utils.js <command> [args...]');
-        console.log('');
-        console.log('Commands:');
-        console.log('  findChromium              Find Chromium binary');
-        console.log('  installChromium           Install Chromium via @puppeteer/browsers');
-        console.log('  installPuppeteerCore      Install puppeteer-core npm package');
-        console.log('  launchChromium            Launch Chrome with CDP debugging');
-        console.log('  killChrome <pid>          Kill Chrome process by PID');
-        console.log('  killZombieChrome          Clean up zombie Chrome processes');
-        console.log('');
-        console.log('  getMachineType            Get machine type (e.g., x86_64-linux)');
-        console.log('  getLibDir                 Get LIB_DIR path');
-        console.log('  getNodeModulesDir         Get NODE_MODULES_DIR path');
-        console.log('  getExtensionsDir          Get Chrome extensions directory');
-        console.log('  getTestEnv                Get all paths as JSON (for tests)');
-        console.log('');
-        console.log('  getExtensionId <path>     Get extension ID from unpacked path');
-        console.log('  loadExtensionManifest     Load extension manifest.json');
-        console.log('  loadOrInstallExtension    Load or install an extension');
-        console.log('  installExtensionWithCache Install extension with caching');
-        console.log('');
-        console.log('Environment variables:');
-        console.log('  DATA_DIR                  Base data directory');
-        console.log('  LIB_DIR                   Library directory (computed if not set)');
-        console.log('  MACHINE_TYPE              Machine type override');
-        console.log('  NODE_MODULES_DIR          Node modules directory');
-        console.log('  CHROME_BINARY             Chrome binary path');
-        console.log('  CHROME_EXTENSIONS_DIR     Extensions directory');
-        process.exit(1);
-    }
-
-    const [command, ...commandArgs] = args;
-
-    (async () => {
-        try {
-            switch (command) {
-                case 'findChromium': {
-                    const binary = findChromium();
-                    if (binary) {
-                        console.log(binary);
-                    } else {
-                        console.error('Chromium binary not found');
-                        process.exit(1);
-                    }
-                    break;
-                }
-
-                case 'installChromium': {
-                    const result = await installChromium();
-                    if (result.success) {
-                        console.log(JSON.stringify({
-                            binary: result.binary,
-                            version: result.version,
-                        }));
-                    } else {
-                        console.error(result.error);
-                        process.exit(1);
-                    }
-                    break;
-                }
-
-                case 'installPuppeteerCore': {
-                    const [npmPrefix] = commandArgs;
-                    const result = await installPuppeteerCore({ npmPrefix: npmPrefix || undefined });
-                    if (result.success) {
-                        console.log(JSON.stringify({ path: result.path }));
-                    } else {
-                        console.error(result.error);
-                        process.exit(1);
-                    }
-                    break;
-                }
-
-                case 'launchChromium': {
-                    const [outputDir, extensionPathsJson] = commandArgs;
-                    const extensionPaths = extensionPathsJson ? JSON.parse(extensionPathsJson) : [];
-                    const result = await launchChromium({
-                        outputDir: outputDir || 'chrome',
-                        extensionPaths,
-                    });
-                    if (result.success) {
-                        console.log(JSON.stringify({
-                            cdpUrl: result.cdpUrl,
-                            pid: result.pid,
-                            port: result.port,
-                        }));
-                    } else {
-                        console.error(result.error);
-                        process.exit(1);
-                    }
-                    break;
-                }
-
-                case 'killChrome': {
-                    const [pidStr, outputDir] = commandArgs;
-                    const pid = parseInt(pidStr, 10);
-                    if (isNaN(pid)) {
-                        console.error('Invalid PID');
-                        process.exit(1);
-                    }
-                    await killChrome(pid, outputDir);
-                    break;
-                }
-
-                case 'killZombieChrome': {
-                    const [dataDir] = commandArgs;
-                    const killed = killZombieChrome(dataDir);
-                    console.log(killed);
-                    break;
-                }
-
-                case 'getExtensionId': {
-                    const [unpacked_path] = commandArgs;
-                    const id = getExtensionId(unpacked_path);
-                    console.log(id);
-                    break;
-                }
-
-                case 'loadExtensionManifest': {
-                    const [unpacked_path] = commandArgs;
-                    const manifest = loadExtensionManifest(unpacked_path);
-                    console.log(JSON.stringify(manifest));
-                    break;
-                }
-
-                case 'getExtensionLaunchArgs': {
-                    const [extensions_json] = commandArgs;
-                    const extensions = JSON.parse(extensions_json);
-                    const launchArgs = getExtensionLaunchArgs(extensions);
-                    console.log(JSON.stringify(launchArgs));
-                    break;
-                }
-
-                case 'loadOrInstallExtension': {
-                    const [webstore_id, name, extensions_dir] = commandArgs;
-                    const ext = await loadOrInstallExtension({ webstore_id, name }, extensions_dir);
-                    console.log(JSON.stringify(ext, null, 2));
-                    break;
-                }
-
-                case 'getMachineType': {
-                    console.log(getMachineType());
-                    break;
-                }
-
-                case 'getLibDir': {
-                    console.log(getLibDir());
-                    break;
-                }
-
-                case 'getNodeModulesDir': {
-                    console.log(getNodeModulesDir());
-                    break;
-                }
-
-                case 'getExtensionsDir': {
-                    console.log(getExtensionsDir());
-                    break;
-                }
-
-                case 'getTestEnv': {
-                    console.log(JSON.stringify(getTestEnv(), null, 2));
-                    break;
-                }
-
-                case 'installExtensionWithCache': {
-                    const [webstore_id, name] = commandArgs;
-                    if (!webstore_id || !name) {
-                        console.error('Usage: installExtensionWithCache <webstore_id> <name>');
-                        process.exit(1);
-                    }
-                    const ext = await installExtensionWithCache({ webstore_id, name });
-                    if (ext) {
-                        console.log(JSON.stringify(ext, null, 2));
-                    } else {
-                        process.exit(1);
-                    }
-                    break;
-                }
-
-                default:
-                    console.error(`Unknown command: ${command}`);
-                    process.exit(1);
-            }
-        } catch (error) {
-            console.error(`Error: ${error.message}`);
-            process.exit(1);
-        }
-    })();
-}
diff --git a/archivebox/plugins/chrome/config.json b/archivebox/plugins/chrome/config.json
deleted file mode 100644
index f4d6a4d8..00000000
--- a/archivebox/plugins/chrome/config.json
+++ /dev/null
@@ -1,157 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "CHROME_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["USE_CHROME"],
-      "description": "Enable Chromium browser integration for archiving"
-    },
-    "CHROME_BINARY": {
-      "type": "string",
-      "default": "chromium",
-      "x-aliases": ["CHROMIUM_BINARY", "GOOGLE_CHROME_BINARY"],
-      "description": "Path to Chromium binary"
-    },
-    "CHROME_NODE_BINARY": {
-      "type": "string",
-      "default": "node",
-      "x-fallback": "NODE_BINARY",
-      "description": "Path to Node.js binary (for Puppeteer)"
-    },
-    "CHROME_TIMEOUT": {
-      "type": "integer",
-      "default": 60,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for Chrome operations in seconds"
-    },
-    "CHROME_HEADLESS": {
-      "type": "boolean",
-      "default": true,
-      "description": "Run Chrome in headless mode"
-    },
-    "CHROME_SANDBOX": {
-      "type": "boolean",
-      "default": true,
-      "description": "Enable Chrome sandbox (disable in Docker with --no-sandbox)"
-    },
-    "CHROME_RESOLUTION": {
-      "type": "string",
-      "default": "1440,2000",
-      "pattern": "^\\d+,\\d+$",
-      "x-fallback": "RESOLUTION",
-      "description": "Browser viewport resolution (width,height)"
-    },
-    "CHROME_USER_DATA_DIR": {
-      "type": "string",
-      "default": "",
-      "description": "Path to Chrome user data directory for persistent sessions (derived from ACTIVE_PERSONA if not set)"
-    },
-    "CHROME_USER_AGENT": {
-      "type": "string",
-      "default": "",
-      "x-fallback": "USER_AGENT",
-      "description": "User agent string for Chrome"
-    },
-    "CHROME_ARGS": {
-      "type": "array",
-      "items": {"type": "string"},
-      "default": [
-        "--no-first-run",
-        "--no-default-browser-check",
-        "--disable-default-apps",
-        "--disable-sync",
-        "--disable-infobars",
-        "--disable-blink-features=AutomationControlled",
-        "--disable-component-update",
-        "--disable-domain-reliability",
-        "--disable-breakpad",
-        "--disable-client-side-phishing-detection",
-        "--disable-hang-monitor",
-        "--disable-speech-synthesis-api",
-        "--disable-speech-api",
-        "--disable-print-preview",
-        "--disable-notifications",
-        "--disable-desktop-notifications",
-        "--disable-popup-blocking",
-        "--disable-prompt-on-repost",
-        "--disable-external-intent-requests",
-        "--disable-session-crashed-bubble",
-        "--disable-search-engine-choice-screen",
-        "--disable-datasaver-prompt",
-        "--ash-no-nudges",
-        "--hide-crash-restore-bubble",
-        "--suppress-message-center-popups",
-        "--noerrdialogs",
-        "--no-pings",
-        "--silent-debugger-extension-api",
-        "--deny-permission-prompts",
-        "--safebrowsing-disable-auto-update",
-        "--metrics-recording-only",
-        "--password-store=basic",
-        "--use-mock-keychain",
-        "--disable-cookie-encryption",
-        "--font-render-hinting=none",
-        "--force-color-profile=srgb",
-        "--disable-partial-raster",
-        "--disable-skia-runtime-opts",
-        "--disable-2d-canvas-clip-aa",
-        "--enable-webgl",
-        "--hide-scrollbars",
-        "--export-tagged-pdf",
-        "--generate-pdf-document-outline",
-        "--disable-lazy-loading",
-        "--disable-renderer-backgrounding",
-        "--disable-background-networking",
-        "--disable-background-timer-throttling",
-        "--disable-backgrounding-occluded-windows",
-        "--disable-ipc-flooding-protection",
-        "--disable-extensions-http-throttling",
-        "--disable-field-trial-config",
-        "--disable-back-forward-cache",
-        "--autoplay-policy=no-user-gesture-required",
-        "--disable-gesture-requirement-for-media-playback",
-        "--lang=en-US,en;q=0.9",
-        "--log-level=2",
-        "--enable-logging=stderr"
-      ],
-      "x-aliases": ["CHROME_DEFAULT_ARGS"],
-      "description": "Default Chrome command-line arguments (static flags only, dynamic args like --user-data-dir are added at runtime)"
-    },
-    "CHROME_ARGS_EXTRA": {
-      "type": "array",
-      "items": {"type": "string"},
-      "default": [],
-      "x-aliases": ["CHROME_EXTRA_ARGS"],
-      "description": "Extra arguments to append to Chrome command (for user customization)"
-    },
-    "CHROME_PAGELOAD_TIMEOUT": {
-      "type": "integer",
-      "default": 60,
-      "minimum": 5,
-      "x-fallback": "CHROME_TIMEOUT",
-      "description": "Timeout for page navigation/load in seconds"
-    },
-    "CHROME_WAIT_FOR": {
-      "type": "string",
-      "default": "networkidle2",
-      "enum": ["domcontentloaded", "load", "networkidle0", "networkidle2"],
-      "description": "Page load completion condition (domcontentloaded, load, networkidle0, networkidle2)"
-    },
-    "CHROME_DELAY_AFTER_LOAD": {
-      "type": "number",
-      "default": 0,
-      "minimum": 0,
-      "description": "Extra delay in seconds after page load completes before archiving (useful for JS-heavy SPAs)"
-    },
-    "CHROME_CHECK_SSL_VALIDITY": {
-      "type": "boolean",
-      "default": true,
-      "x-fallback": "CHECK_SSL_VALIDITY",
-      "description": "Whether to verify SSL certificates (disable for self-signed certs)"
-    }
-  }
-}
diff --git a/archivebox/plugins/chrome/extract_cookies.js b/archivebox/plugins/chrome/extract_cookies.js
deleted file mode 100644
index c23515dc..00000000
--- a/archivebox/plugins/chrome/extract_cookies.js
+++ /dev/null
@@ -1,254 +0,0 @@
-#!/usr/bin/env node
-/**
- * Extract cookies from Chrome via CDP and write to Netscape cookies.txt format.
- *
- * This script launches Chrome with a given user data directory, connects via CDP,
- * extracts all cookies, and writes them to a cookies.txt file in Netscape format.
- *
- * Usage:
- *   CHROME_USER_DATA_DIR=/path/to/profile COOKIES_OUTPUT_FILE=/path/to/cookies.txt node extract_cookies.js
- *
- * Environment variables:
- *   CHROME_USER_DATA_DIR: Path to Chrome user data directory (required)
- *   COOKIES_OUTPUT_FILE: Path to output cookies.txt file (required)
- *   CHROME_HEADLESS: Run in headless mode (default: true)
- *   NODE_MODULES_DIR: Path to node_modules for module resolution
- */
-
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) {
-    module.paths.unshift(process.env.NODE_MODULES_DIR);
-}
-
-const fs = require('fs');
-const path = require('path');
-const {
-    findAnyChromiumBinary,
-    launchChromium,
-    killChrome,
-    getEnv,
-} = require('./chrome_utils.js');
-
-/**
- * Convert a cookie object to Netscape cookies.txt format line.
- *
- * Format: domain  includeSubdomains  path  secure  expiry  name  value
- *
- * @param {Object} cookie - CDP cookie object
- * @returns {string} - Netscape format cookie line
- */
-function cookieToNetscape(cookie) {
-    // Domain: prefix with . for domain cookies (not host-only)
-    let domain = cookie.domain;
-    if (!domain.startsWith('.') && !cookie.hostOnly) {
-        domain = '.' + domain;
-    }
-
-    // Include subdomains: TRUE if domain cookie (starts with .)
-    const includeSubdomains = domain.startsWith('.') ? 'TRUE' : 'FALSE';
-
-    // Path
-    const cookiePath = cookie.path || '/';
-
-    // Secure flag
-    const secure = cookie.secure ? 'TRUE' : 'FALSE';
-
-    // Expiry timestamp (0 for session cookies)
-    let expiry = '0';
-    if (cookie.expires && cookie.expires > 0) {
-        // CDP returns expiry in seconds since epoch
-        expiry = Math.floor(cookie.expires).toString();
-    }
-
-    // Name and value
-    const name = cookie.name;
-    const value = cookie.value;
-
-    return `${domain}\t${includeSubdomains}\t${cookiePath}\t${secure}\t${expiry}\t${name}\t${value}`;
-}
-
-/**
- * Write cookies to Netscape cookies.txt format file.
- *
- * @param {Array} cookies - Array of CDP cookie objects
- * @param {string} outputPath - Path to output file
- */
-function writeCookiesFile(cookies, outputPath) {
-    const lines = [
-        '# Netscape HTTP Cookie File',
-        '# https://curl.se/docs/http-cookies.html',
-        '# This file was generated by ArchiveBox persona cookie extraction',
-        '#',
-        '# Format: domain\\tincludeSubdomains\\tpath\\tsecure\\texpiry\\tname\\tvalue',
-        '',
-    ];
-
-    for (const cookie of cookies) {
-        lines.push(cookieToNetscape(cookie));
-    }
-
-    fs.writeFileSync(outputPath, lines.join('\n') + '\n');
-}
-
-async function main() {
-    const userDataDir = getEnv('CHROME_USER_DATA_DIR');
-    const outputFile = getEnv('COOKIES_OUTPUT_FILE');
-
-    if (!userDataDir) {
-        console.error('ERROR: CHROME_USER_DATA_DIR environment variable is required');
-        process.exit(1);
-    }
-
-    if (!outputFile) {
-        console.error('ERROR: COOKIES_OUTPUT_FILE environment variable is required');
-        process.exit(1);
-    }
-
-    if (!fs.existsSync(userDataDir)) {
-        console.error(`ERROR: User data directory does not exist: ${userDataDir}`);
-        process.exit(1);
-    }
-
-    const binary = findAnyChromiumBinary();
-    if (!binary) {
-        console.error('ERROR: Chromium-based browser binary not found');
-        process.exit(1);
-    }
-
-    console.error(`[*] Extracting cookies from: ${userDataDir}`);
-    console.error(`[*] Output file: ${outputFile}`);
-    console.error(`[*] Using browser: ${binary}`);
-
-    // Create a temporary output directory for Chrome files
-    const outputDir = fs.mkdtempSync(path.join(require('os').tmpdir(), 'chrome-cookies-'));
-
-    let chromePid = null;
-
-    try {
-        // Launch Chrome with the user data directory
-        const result = await launchChromium({
-            binary,
-            outputDir,
-            userDataDir,
-            headless: true,
-            killZombies: false,  // Don't kill other Chrome instances
-        });
-
-        if (!result.success) {
-            console.error(`ERROR: Failed to launch Chrome: ${result.error}`);
-            process.exit(1);
-        }
-
-        chromePid = result.pid;
-        const cdpUrl = result.cdpUrl;
-        const port = result.port;
-
-        console.error(`[*] Chrome launched (PID: ${chromePid})`);
-        console.error(`[*] CDP URL: ${cdpUrl}`);
-
-        // Connect to CDP and get cookies
-        const http = require('http');
-
-        // Use CDP directly via HTTP to get all cookies
-        const getCookies = () => {
-            return new Promise((resolve, reject) => {
-                const req = http.request(
-                    {
-                        hostname: '127.0.0.1',
-                        port: port,
-                        path: '/json/list',
-                        method: 'GET',
-                    },
-                    (res) => {
-                        let data = '';
-                        res.on('data', (chunk) => (data += chunk));
-                        res.on('end', () => {
-                            try {
-                                const targets = JSON.parse(data);
-                                // Find a page target
-                                const pageTarget = targets.find(t => t.type === 'page') || targets[0];
-                                if (!pageTarget) {
-                                    reject(new Error('No page target found'));
-                                    return;
-                                }
-
-                                // Connect via WebSocket and send CDP command
-                                const WebSocket = require('ws');
-                                const ws = new WebSocket(pageTarget.webSocketDebuggerUrl);
-
-                                ws.on('open', () => {
-                                    ws.send(JSON.stringify({
-                                        id: 1,
-                                        method: 'Network.getAllCookies',
-                                    }));
-                                });
-
-                                ws.on('message', (message) => {
-                                    const response = JSON.parse(message);
-                                    if (response.id === 1) {
-                                        ws.close();
-                                        if (response.result && response.result.cookies) {
-                                            resolve(response.result.cookies);
-                                        } else {
-                                            reject(new Error('Failed to get cookies: ' + JSON.stringify(response)));
-                                        }
-                                    }
-                                });
-
-                                ws.on('error', (err) => {
-                                    reject(err);
-                                });
-                            } catch (e) {
-                                reject(e);
-                            }
-                        });
-                    }
-                );
-
-                req.on('error', reject);
-                req.end();
-            });
-        };
-
-        // Wait a moment for the browser to fully initialize
-        await new Promise(r => setTimeout(r, 2000));
-
-        console.error('[*] Fetching cookies via CDP...');
-        const cookies = await getCookies();
-
-        console.error(`[+] Retrieved ${cookies.length} cookies`);
-
-        // Write cookies to file
-        writeCookiesFile(cookies, outputFile);
-        console.error(`[+] Wrote cookies to: ${outputFile}`);
-
-        // Clean up
-        await killChrome(chromePid, outputDir);
-        chromePid = null;
-
-        // Remove temp directory
-        fs.rmSync(outputDir, { recursive: true, force: true });
-
-        console.error('[+] Cookie extraction complete');
-        process.exit(0);
-
-    } catch (error) {
-        console.error(`ERROR: ${error.message}`);
-
-        // Clean up on error
-        if (chromePid) {
-            await killChrome(chromePid, outputDir);
-        }
-
-        try {
-            fs.rmSync(outputDir, { recursive: true, force: true });
-        } catch (e) {}
-
-        process.exit(1);
-    }
-}
-
-main().catch((e) => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
diff --git a/archivebox/plugins/chrome/on_Crawl__70_chrome_install.py b/archivebox/plugins/chrome/on_Crawl__70_chrome_install.py
deleted file mode 100755
index af0b8ec7..00000000
--- a/archivebox/plugins/chrome/on_Crawl__70_chrome_install.py
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/usr/bin/env python3
-"""
-Emit Chromium Binary dependency for the crawl.
-
-NOTE: We use Chromium instead of Chrome because Chrome 137+ removed support for
---load-extension and --disable-extensions-except flags, which are needed for
-loading unpacked extensions in headless mode.
-"""
-
-import json
-import os
-import sys
-
-
-def main():
-    # Check if Chrome is enabled
-    chrome_enabled = os.environ.get('CHROME_ENABLED', 'true').lower() not in ('false', '0', 'no', 'off')
-    if not chrome_enabled:
-        sys.exit(0)
-
-    record = {
-        'type': 'Binary',
-        'name': 'chromium',
-        'binproviders': 'puppeteer,env',
-        'overrides': {
-            'puppeteer': ['chromium@latest', '--install-deps'],
-        },
-    }
-    print(json.dumps(record))
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/chrome/on_Crawl__90_chrome_launch.bg.js b/archivebox/plugins/chrome/on_Crawl__90_chrome_launch.bg.js
deleted file mode 100644
index b5cb9822..00000000
--- a/archivebox/plugins/chrome/on_Crawl__90_chrome_launch.bg.js
+++ /dev/null
@@ -1,427 +0,0 @@
-#!/usr/bin/env node
-/**
- * Launch a shared Chromium browser session for the entire crawl.
- *
- * This runs once per crawl and keeps Chromium alive for all snapshots to share.
- * Each snapshot creates its own tab via on_Snapshot__10_chrome_tab.bg.js.
- *
- * NOTE: We use Chromium instead of Chrome because Chrome 137+ removed support for
- * --load-extension and --disable-extensions-except flags.
- *
- * Usage: on_Crawl__90_chrome_launch.bg.js --crawl-id=<uuid> --source-url=<url>
- * Output: Writes to current directory (executor creates chrome/ dir):
- *   - cdp_url.txt: WebSocket URL for CDP connection
- *   - chrome.pid: Chromium process ID (for cleanup)
- *   - port.txt: Debug port number
- *   - extensions.json: Loaded extensions metadata
- *
- * Environment variables:
- *     NODE_MODULES_DIR: Path to node_modules directory for module resolution
- *     CHROME_BINARY: Path to Chromium binary (falls back to auto-detection)
- *     CHROME_RESOLUTION: Page resolution (default: 1440,2000)
- *     CHROME_HEADLESS: Run in headless mode (default: true)
- *     CHROME_CHECK_SSL_VALIDITY: Whether to check SSL certificates (default: true)
- *     CHROME_EXTENSIONS_DIR: Directory containing Chrome extensions
- */
-
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) {
-    module.paths.unshift(process.env.NODE_MODULES_DIR);
-}
-
-const fs = require('fs');
-const path = require('path');
-const http = require('http');
-const puppeteer = require('puppeteer');
-const {
-    findChromium,
-    launchChromium,
-    killChrome,
-    getEnv,
-    getEnvBool,
-    getExtensionId,
-    writePidWithMtime,
-    getExtensionsDir,
-} = require('./chrome_utils.js');
-
-// Extractor metadata
-const PLUGIN_NAME = 'chrome_launch';
-const OUTPUT_DIR = '.';
-
-// Global state for cleanup
-let chromePid = null;
-let browserInstance = null;
-
-function parseCookiesTxt(contents) {
-    const cookies = [];
-    let skipped = 0;
-
-    for (const rawLine of contents.split(/\r?\n/)) {
-        const line = rawLine.trim();
-        if (!line) continue;
-
-        let httpOnly = false;
-        let dataLine = line;
-
-        if (dataLine.startsWith('#HttpOnly_')) {
-            httpOnly = true;
-            dataLine = dataLine.slice('#HttpOnly_'.length);
-        } else if (dataLine.startsWith('#')) {
-            continue;
-        }
-
-        const parts = dataLine.split('\t');
-        if (parts.length < 7) {
-            skipped += 1;
-            continue;
-        }
-
-        const [domainRaw, includeSubdomainsRaw, pathRaw, secureRaw, expiryRaw, name, value] = parts;
-        if (!name || !domainRaw) {
-            skipped += 1;
-            continue;
-        }
-
-        const includeSubdomains = (includeSubdomainsRaw || '').toUpperCase() === 'TRUE';
-        let domain = domainRaw;
-        if (includeSubdomains && !domain.startsWith('.')) domain = `.${domain}`;
-        if (!includeSubdomains && domain.startsWith('.')) domain = domain.slice(1);
-
-        const cookie = {
-            name,
-            value,
-            domain,
-            path: pathRaw || '/',
-            secure: (secureRaw || '').toUpperCase() === 'TRUE',
-            httpOnly,
-        };
-
-        const expires = parseInt(expiryRaw, 10);
-        if (!isNaN(expires) && expires > 0) {
-            cookie.expires = expires;
-        }
-
-        cookies.push(cookie);
-    }
-
-    return { cookies, skipped };
-}
-
-async function importCookiesFromFile(browser, cookiesFile, userDataDir) {
-    if (!cookiesFile) return;
-
-    if (!fs.existsSync(cookiesFile)) {
-        console.error(`[!] Cookies file not found: ${cookiesFile}`);
-        return;
-    }
-
-    let contents = '';
-    try {
-        contents = fs.readFileSync(cookiesFile, 'utf-8');
-    } catch (e) {
-        console.error(`[!] Failed to read COOKIES_TXT_FILE: ${e.message}`);
-        return;
-    }
-
-    const { cookies, skipped } = parseCookiesTxt(contents);
-    if (cookies.length === 0) {
-        console.error('[!] No cookies found to import');
-        return;
-    }
-
-    console.error(`[*] Importing ${cookies.length} cookies from ${cookiesFile}...`);
-    if (skipped) {
-        console.error(`[*] Skipped ${skipped} malformed cookie line(s)`);
-    }
-    if (!userDataDir) {
-        console.error('[!] CHROME_USER_DATA_DIR not set; cookies will not persist beyond this session');
-    }
-
-    const page = await browser.newPage();
-    const client = await page.target().createCDPSession();
-    await client.send('Network.enable');
-
-    const chunkSize = 200;
-    let imported = 0;
-    for (let i = 0; i < cookies.length; i += chunkSize) {
-        const chunk = cookies.slice(i, i + chunkSize);
-        try {
-            await client.send('Network.setCookies', { cookies: chunk });
-            imported += chunk.length;
-        } catch (e) {
-            console.error(`[!] Failed to import cookies ${i + 1}-${i + chunk.length}: ${e.message}`);
-        }
-    }
-
-    await page.close();
-    console.error(`[+] Imported ${imported}/${cookies.length} cookies`);
-}
-
-function getPortFromCdpUrl(cdpUrl) {
-    if (!cdpUrl) return null;
-    const match = cdpUrl.match(/:(\d+)\/devtools\//);
-    return match ? match[1] : null;
-}
-
-async function fetchDevtoolsTargets(cdpUrl) {
-    const port = getPortFromCdpUrl(cdpUrl);
-    if (!port) return [];
-
-    const urlPath = '/json/list';
-    return new Promise((resolve, reject) => {
-        const req = http.get(
-            { hostname: '127.0.0.1', port, path: urlPath },
-            (res) => {
-                let data = '';
-                res.on('data', (chunk) => (data += chunk));
-                res.on('end', () => {
-                    try {
-                        const targets = JSON.parse(data);
-                        resolve(Array.isArray(targets) ? targets : []);
-                    } catch (e) {
-                        reject(e);
-                    }
-                });
-            }
-        );
-        req.on('error', reject);
-    });
-}
-
-async function discoverExtensionTargets(cdpUrl, installedExtensions) {
-    const builtinIds = [
-        'nkeimhogjdpnpccoofpliimaahmaaome',
-        'fignfifoniblkonapihmkfakmlgkbkcf',
-        'ahfgeienlihckogmohjhadlkjgocpleb',
-        'mhjfbmdgcfjbbpaeojofohoefgiehjai',
-    ];
-
-    let targets = [];
-    for (let i = 0; i < 10; i += 1) {
-        try {
-            targets = await fetchDevtoolsTargets(cdpUrl);
-            if (targets.length > 0) break;
-        } catch (e) {
-            // Ignore and retry
-        }
-        await new Promise(r => setTimeout(r, 500));
-    }
-
-    const customExtTargets = targets.filter(t => {
-        const url = t.url || '';
-        if (!url.startsWith('chrome-extension://')) return false;
-        const extId = url.split('://')[1].split('/')[0];
-        return !builtinIds.includes(extId);
-    });
-
-    console.error(`[+] Found ${customExtTargets.length} custom extension target(s) via /json/list`);
-
-    for (const target of customExtTargets) {
-        const url = target.url || '';
-        const extId = url.split('://')[1].split('/')[0];
-        console.error(`[+] Extension target: ${extId} (${target.type || 'unknown'})`);
-    }
-
-    const runtimeIds = new Set(customExtTargets.map(t => (t.url || '').split('://')[1].split('/')[0]));
-    for (const ext of installedExtensions) {
-        if (ext.id) {
-            ext.loaded = runtimeIds.has(ext.id);
-        }
-    }
-
-    if (customExtTargets.length === 0 && installedExtensions.length > 0) {
-        console.error(`[!] Warning: No custom extensions detected. Extension loading may have failed.`);
-        console.error(`[!] Make sure you are using Chromium, not Chrome (Chrome 137+ removed --load-extension support)`);
-    }
-}
-
-// Parse command line arguments
-function parseArgs() {
-    const args = {};
-    process.argv.slice(2).forEach((arg) => {
-        if (arg.startsWith('--')) {
-            const [key, ...valueParts] = arg.slice(2).split('=');
-            args[key.replace(/-/g, '_')] = valueParts.join('=') || true;
-        }
-    });
-    return args;
-}
-
-// Cleanup handler for SIGTERM
-async function cleanup() {
-    console.error('[*] Cleaning up Chrome session...');
-
-    // Try graceful browser close first
-    if (browserInstance) {
-        try {
-            console.error('[*] Closing browser gracefully...');
-            await browserInstance.close();
-            browserInstance = null;
-            console.error('[+] Browser closed gracefully');
-        } catch (e) {
-            console.error(`[!] Graceful close failed: ${e.message}`);
-        }
-    }
-
-    // Kill Chrome process
-    if (chromePid) {
-        await killChrome(chromePid, OUTPUT_DIR);
-    }
-
-    process.exit(0);
-}
-
-// Register signal handlers
-process.on('SIGTERM', cleanup);
-process.on('SIGINT', cleanup);
-
-async function main() {
-    const args = parseArgs();
-    const crawlId = args.crawl_id;
-
-    try {
-        const binary = findChromium();
-        if (!binary) {
-            console.error('ERROR: Chromium binary not found');
-            console.error('DEPENDENCY_NEEDED=chromium');
-            console.error('BIN_PROVIDERS=puppeteer,env,playwright,apt,brew');
-            console.error('INSTALL_HINT=npx @puppeteer/browsers install chromium@latest');
-            process.exit(1);
-        }
-
-        // Get Chromium version
-        let version = '';
-        try {
-            const { execSync } = require('child_process');
-            version = execSync(`"${binary}" --version`, { encoding: 'utf8', timeout: 5000 })
-                .trim()
-                .slice(0, 64);
-        } catch (e) {}
-
-        console.error(`[*] Using browser: ${binary}`);
-        if (version) console.error(`[*] Version: ${version}`);
-
-        // Load installed extensions
-        const extensionsDir = getExtensionsDir();
-        const userDataDir = getEnv('CHROME_USER_DATA_DIR');
-        const cookiesFile = getEnv('COOKIES_TXT_FILE') || getEnv('COOKIES_FILE');
-
-        if (userDataDir) {
-            console.error(`[*] Using user data dir: ${userDataDir}`);
-        }
-        if (cookiesFile) {
-            console.error(`[*] Using cookies file: ${cookiesFile}`);
-        }
-
-        const installedExtensions = [];
-        const extensionPaths = [];
-        if (fs.existsSync(extensionsDir)) {
-            const files = fs.readdirSync(extensionsDir);
-            for (const file of files) {
-                if (file.endsWith('.extension.json')) {
-                    try {
-                        const extPath = path.join(extensionsDir, file);
-                        const extData = JSON.parse(fs.readFileSync(extPath, 'utf-8'));
-                        if (extData.unpacked_path && fs.existsSync(extData.unpacked_path)) {
-                            installedExtensions.push(extData);
-                            extensionPaths.push(extData.unpacked_path);
-                            console.error(`[*] Loading extension: ${extData.name || file}`);
-                        }
-                    } catch (e) {
-                        console.warn(`[!] Skipping invalid extension cache: ${file}`);
-                    }
-                }
-            }
-        }
-
-        if (installedExtensions.length > 0) {
-            console.error(`[+] Found ${installedExtensions.length} extension(s) to load`);
-        }
-
-        // Ensure extension IDs are available without chrome://extensions
-        for (const ext of installedExtensions) {
-            if (!ext.id && ext.unpacked_path) {
-                try {
-                    ext.id = getExtensionId(ext.unpacked_path);
-                } catch (e) {
-                    console.error(`[!] Failed to compute extension id for ${ext.name}: ${e.message}`);
-                }
-            }
-        }
-
-        // Note: PID file is written by run_hook() with hook-specific name
-        // Snapshot.cleanup() kills all *.pid processes when done
-        if (!fs.existsSync(OUTPUT_DIR)) {
-            fs.mkdirSync(OUTPUT_DIR, { recursive: true });
-        }
-
-        // Launch Chromium using consolidated function
-        // userDataDir is derived from ACTIVE_PERSONA by get_config() if not explicitly set
-        const result = await launchChromium({
-            binary,
-            outputDir: OUTPUT_DIR,
-            userDataDir,
-            extensionPaths,
-        });
-
-        if (!result.success) {
-            console.error(`ERROR: ${result.error}`);
-            process.exit(1);
-        }
-
-        chromePid = result.pid;
-        const cdpUrl = result.cdpUrl;
-
-        // Discover extension targets at launch (no chrome://extensions)
-        if (extensionPaths.length > 0) {
-            await new Promise(r => setTimeout(r, 2000));
-            console.error('[*] Discovering extension targets via devtools /json/list...');
-            await discoverExtensionTargets(cdpUrl, installedExtensions);
-        }
-
-        // Only connect to CDP when cookies import is needed to reduce crash risk.
-        if (cookiesFile) {
-            console.error(`[*] Connecting puppeteer to CDP for cookie import...`);
-            const browser = await puppeteer.connect({
-                browserWSEndpoint: cdpUrl,
-                defaultViewport: null,
-            });
-            browserInstance = browser;
-
-            // Import cookies into Chrome profile at crawl start
-            await importCookiesFromFile(browser, cookiesFile, userDataDir);
-
-            try {
-                browser.disconnect();
-            } catch (e) {}
-            browserInstance = null;
-        } else {
-            console.error('[*] Skipping puppeteer CDP connection (no cookies to import)');
-        }
-
-        // Write extensions metadata with actual IDs
-        if (installedExtensions.length > 0) {
-            fs.writeFileSync(
-                path.join(OUTPUT_DIR, 'extensions.json'),
-                JSON.stringify(installedExtensions, null, 2)
-            );
-        }
-
-        console.error(`[+] Chromium session started for crawl ${crawlId}`);
-        console.error(`[+] CDP URL: ${cdpUrl}`);
-        console.error(`[+] PID: ${chromePid}`);
-
-        // Stay alive to handle cleanup on SIGTERM
-        console.log('[*] Chromium launch hook staying alive to handle cleanup...');
-        setInterval(() => {}, 1000000);
-
-    } catch (e) {
-        console.error(`ERROR: ${e.name}: ${e.message}`);
-        process.exit(1);
-    }
-}
-
-main().catch((e) => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
diff --git a/archivebox/plugins/chrome/on_Snapshot__10_chrome_tab.bg.js b/archivebox/plugins/chrome/on_Snapshot__10_chrome_tab.bg.js
deleted file mode 100755
index 4f3c6594..00000000
--- a/archivebox/plugins/chrome/on_Snapshot__10_chrome_tab.bg.js
+++ /dev/null
@@ -1,264 +0,0 @@
-#!/usr/bin/env node
-/**
- * Create a Chrome tab for this snapshot in the shared crawl Chrome session.
- *
- * Connects to the crawl-level Chrome session (from on_Crawl__90_chrome_launch.bg.js)
- * and creates a new tab. This hook does NOT launch its own Chrome instance.
- *
- * Usage: on_Snapshot__10_chrome_tab.bg.js --url=<url> --snapshot-id=<uuid> --crawl-id=<uuid>
- * Output: Creates chrome/ directory under snapshot output dir with:
- *   - cdp_url.txt: WebSocket URL for CDP connection
- *   - chrome.pid: Chrome process ID (from crawl)
- *   - target_id.txt: Target ID of this snapshot's tab
- *   - url.txt: The URL to be navigated to
- *
- * Environment variables:
- *     CRAWL_OUTPUT_DIR: Crawl output directory (to find crawl's Chrome session)
- *     CHROME_BINARY: Path to Chromium binary (optional, for version info)
- *
- * This is a background hook that stays alive until SIGTERM so the tab
- * can be closed cleanly at the end of the snapshot run.
- */
-
-const fs = require('fs');
-const path = require('path');
-const { execSync } = require('child_process');
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-
-const puppeteer = require('puppeteer');
-const { getEnv, getEnvInt } = require('./chrome_utils.js');
-
-// Extractor metadata
-const PLUGIN_NAME = 'chrome_tab';
-const OUTPUT_DIR = '.';  // Hook already runs in chrome/ output directory
-const CHROME_SESSION_DIR = '.';
-const CHROME_SESSION_REQUIRED_ERROR = 'No Chrome session found (chrome plugin must run first)';
-
-let finalStatus = 'failed';
-let finalOutput = '';
-let finalError = '';
-let cmdVersion = '';
-let finalized = false;
-
-// Parse command line arguments
-function parseArgs() {
-    const args = {};
-    process.argv.slice(2).forEach(arg => {
-        if (arg.startsWith('--')) {
-            const [key, ...valueParts] = arg.slice(2).split('=');
-            args[key.replace(/-/g, '_')] = valueParts.join('=') || true;
-        }
-    });
-    return args;
-}
-
-function emitResult(statusOverride) {
-    if (finalized) return;
-    finalized = true;
-
-    const status = statusOverride || finalStatus;
-    const outputStr = status === 'succeeded'
-        ? finalOutput
-        : (finalError || finalOutput || '');
-
-    const result = {
-        type: 'ArchiveResult',
-        status,
-        output_str: outputStr,
-    };
-    if (cmdVersion) {
-        result.cmd_version = cmdVersion;
-    }
-    console.log(JSON.stringify(result));
-}
-
-// Cleanup handler for SIGTERM - close this snapshot's tab
-async function cleanup(signal) {
-    if (signal) {
-        console.error(`\nReceived ${signal}, closing chrome tab...`);
-    }
-    try {
-        const cdpFile = path.join(OUTPUT_DIR, 'cdp_url.txt');
-        const targetIdFile = path.join(OUTPUT_DIR, 'target_id.txt');
-
-        if (fs.existsSync(cdpFile) && fs.existsSync(targetIdFile)) {
-            const cdpUrl = fs.readFileSync(cdpFile, 'utf8').trim();
-            const targetId = fs.readFileSync(targetIdFile, 'utf8').trim();
-
-            const browser = await puppeteer.connect({ browserWSEndpoint: cdpUrl });
-            const pages = await browser.pages();
-            const page = pages.find(p => p.target()._targetId === targetId);
-
-            if (page) {
-                await page.close();
-            }
-            browser.disconnect();
-        }
-    } catch (e) {
-        // Best effort
-    }
-    emitResult();
-    process.exit(finalStatus === 'succeeded' ? 0 : 1);
-}
-
-// Register signal handlers
-process.on('SIGTERM', () => cleanup('SIGTERM'));
-process.on('SIGINT', () => cleanup('SIGINT'));
-
-// Try to find the crawl's Chrome session
-function getCrawlChromeSession() {
-    // Use CRAWL_OUTPUT_DIR env var set by get_config() in configset.py
-    const crawlOutputDir = getEnv('CRAWL_OUTPUT_DIR', '');
-    if (!crawlOutputDir) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-
-    const crawlChromeDir = path.join(crawlOutputDir, 'chrome');
-    const cdpFile = path.join(crawlChromeDir, 'cdp_url.txt');
-    const pidFile = path.join(crawlChromeDir, 'chrome.pid');
-
-    if (!fs.existsSync(cdpFile)) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-    if (!fs.existsSync(pidFile)) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-
-    const cdpUrl = fs.readFileSync(cdpFile, 'utf-8').trim();
-    const pid = parseInt(fs.readFileSync(pidFile, 'utf-8').trim(), 10);
-    if (!cdpUrl) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-    if (!pid || Number.isNaN(pid)) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-
-    // Verify the process is still running
-    try {
-        process.kill(pid, 0);  // Signal 0 = check if process exists
-    } catch (e) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-
-    return { cdpUrl, pid };
-}
-
-async function waitForCrawlChromeSession(timeoutMs, intervalMs = 250) {
-    const startTime = Date.now();
-    let lastError = null;
-
-    while (Date.now() - startTime < timeoutMs) {
-        try {
-            return getCrawlChromeSession();
-        } catch (e) {
-            lastError = e;
-        }
-        await new Promise(resolve => setTimeout(resolve, intervalMs));
-    }
-
-    if (lastError) {
-        throw lastError;
-    }
-    throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-}
-
-// Create a new tab in an existing Chrome session
-async function createTabInExistingChrome(cdpUrl, url, pid) {
-    console.log(`[*] Connecting to existing Chrome session: ${cdpUrl}`);
-
-    // Connect Puppeteer to the running Chrome
-    const browser = await puppeteer.connect({
-        browserWSEndpoint: cdpUrl,
-        defaultViewport: null,
-    });
-
-    // Create a new tab for this snapshot
-    const page = await browser.newPage();
-
-    // Get the page target ID
-    const target = page.target();
-    const targetId = target._targetId;
-
-    // Write session info
-    fs.writeFileSync(path.join(OUTPUT_DIR, 'cdp_url.txt'), cdpUrl);
-    fs.writeFileSync(path.join(OUTPUT_DIR, 'chrome.pid'), String(pid));
-    fs.writeFileSync(path.join(OUTPUT_DIR, 'target_id.txt'), targetId);
-    fs.writeFileSync(path.join(OUTPUT_DIR, 'url.txt'), url);
-
-    // Disconnect Puppeteer (Chrome and tab stay alive)
-    browser.disconnect();
-
-    return { success: true, output: OUTPUT_DIR, cdpUrl, targetId, pid };
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-    const snapshotId = args.snapshot_id;
-    const crawlId = args.crawl_id || getEnv('CRAWL_ID', '');
-
-    if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__10_chrome_tab.bg.js --url=<url> --snapshot-id=<uuid> [--crawl-id=<uuid>]');
-        process.exit(1);
-    }
-
-    let status = 'failed';
-    let output = '';
-    let error = '';
-    let version = '';
-
-    try {
-        // Get Chrome version
-        try {
-            const binary = getEnv('CHROME_BINARY', '').trim();
-            if (binary) {
-                version = execSync(`"${binary}" --version`, { encoding: 'utf8', timeout: 5000 }).trim().slice(0, 64);
-            }
-        } catch (e) {
-            version = '';
-        }
-
-        // Try to use existing crawl Chrome session (wait for readiness)
-        const timeoutSeconds = getEnvInt('CHROME_TAB_TIMEOUT', getEnvInt('CHROME_TIMEOUT', getEnvInt('TIMEOUT', 60)));
-        const crawlSession = await waitForCrawlChromeSession(timeoutSeconds * 1000);
-        console.log(`[*] Found existing Chrome session from crawl ${crawlId}`);
-        const result = await createTabInExistingChrome(crawlSession.cdpUrl, url, crawlSession.pid);
-
-        if (result.success) {
-            status = 'succeeded';
-            output = result.output;
-            console.log(`[+] Chrome tab ready`);
-            console.log(`[+] CDP URL: ${result.cdpUrl}`);
-            console.log(`[+] Page target ID: ${result.targetId}`);
-        } else {
-            status = 'failed';
-            error = result.error;
-        }
-    } catch (e) {
-        error = `${e.name}: ${e.message}`;
-        status = 'failed';
-    }
-
-    if (error) {
-        console.error(`ERROR: ${error}`);
-    }
-
-    finalStatus = status;
-    finalOutput = output || '';
-    finalError = error || '';
-    cmdVersion = version || '';
-
-    if (status !== 'succeeded') {
-        emitResult(status);
-        process.exit(1);
-    }
-
-    console.log('[*] Chrome tab created, waiting for cleanup signal...');
-    await new Promise(() => {}); // Keep alive until SIGTERM
-}
-
-main().catch(e => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
diff --git a/archivebox/plugins/chrome/on_Snapshot__11_chrome_wait.js b/archivebox/plugins/chrome/on_Snapshot__11_chrome_wait.js
deleted file mode 100644
index dae2a3db..00000000
--- a/archivebox/plugins/chrome/on_Snapshot__11_chrome_wait.js
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/usr/bin/env node
-/**
- * Wait for Chrome session files to exist (cdp_url.txt + target_id.txt).
- *
- * This is a foreground hook that blocks until the Chrome tab is ready,
- * so downstream hooks can safely connect to CDP.
- *
- * Usage: on_Snapshot__11_chrome_wait.js --url=<url> --snapshot-id=<uuid>
- */
-
-const fs = require('fs');
-const path = require('path');
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-
-const {
-    getEnvInt,
-    waitForChromeSession,
-    readCdpUrl,
-    readTargetId,
-} = require('./chrome_utils.js');
-
-const CHROME_SESSION_DIR = '.';
-const CHROME_SESSION_REQUIRED_ERROR = 'No Chrome session found (chrome plugin must run first)';
-
-function parseArgs() {
-    const args = {};
-    process.argv.slice(2).forEach(arg => {
-        if (arg.startsWith('--')) {
-            const [key, ...valueParts] = arg.slice(2).split('=');
-            args[key.replace(/-/g, '_')] = valueParts.join('=') || true;
-        }
-    });
-    return args;
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-    const snapshotId = args.snapshot_id;
-
-    if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__11_chrome_wait.js --url=<url> --snapshot-id=<uuid>');
-        process.exit(1);
-    }
-
-    const timeoutSeconds = getEnvInt('CHROME_TAB_TIMEOUT', getEnvInt('CHROME_TIMEOUT', getEnvInt('TIMEOUT', 60)));
-    const timeoutMs = timeoutSeconds * 1000;
-
-    console.error(`[chrome_wait] Waiting for Chrome session (timeout=${timeoutSeconds}s)...`);
-
-    const ready = await waitForChromeSession(CHROME_SESSION_DIR, timeoutMs);
-    if (!ready) {
-        const error = CHROME_SESSION_REQUIRED_ERROR;
-        console.error(`[chrome_wait] ERROR: ${error}`);
-        console.log(JSON.stringify({ type: 'ArchiveResult', status: 'failed', output_str: error }));
-        process.exit(1);
-    }
-
-    const cdpUrl = readCdpUrl(CHROME_SESSION_DIR);
-    const targetId = readTargetId(CHROME_SESSION_DIR);
-    if (!cdpUrl || !targetId) {
-        const error = CHROME_SESSION_REQUIRED_ERROR;
-        console.error(`[chrome_wait] ERROR: ${error}`);
-        console.log(JSON.stringify({ type: 'ArchiveResult', status: 'failed', output_str: error }));
-        process.exit(1);
-    }
-
-    console.error(`[chrome_wait] Chrome session ready (cdp_url=${cdpUrl.slice(0, 32)}..., target_id=${targetId}).`);
-    console.log(JSON.stringify({ type: 'ArchiveResult', status: 'succeeded', output_str: 'chrome session ready' }));
-    process.exit(0);
-}
-
-main().catch(e => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
diff --git a/archivebox/plugins/chrome/on_Snapshot__30_chrome_navigate.js b/archivebox/plugins/chrome/on_Snapshot__30_chrome_navigate.js
deleted file mode 100644
index 33c515ec..00000000
--- a/archivebox/plugins/chrome/on_Snapshot__30_chrome_navigate.js
+++ /dev/null
@@ -1,225 +0,0 @@
-#!/usr/bin/env node
-/**
- * Navigate the Chrome browser to the target URL.
- *
- * This is a simple hook that ONLY navigates - nothing else.
- * Pre-load hooks (21-29) should set up their own CDP listeners.
- * Post-load hooks (31+) can then read from the loaded page.
- *
- * Usage: on_Snapshot__30_chrome_navigate.js --url=<url> --snapshot-id=<uuid>
- * Output: Writes page_loaded.txt marker when navigation completes
- *
- * Environment variables:
- *     CHROME_PAGELOAD_TIMEOUT: Timeout in seconds (default: 60)
- *     CHROME_DELAY_AFTER_LOAD: Extra delay after load in seconds (default: 0)
- *     CHROME_WAIT_FOR: Wait condition (default: networkidle2)
- */
-
-const fs = require('fs');
-const path = require('path');
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-const puppeteer = require('puppeteer');
-
-const PLUGIN_NAME = 'chrome_navigate';
-const CHROME_SESSION_DIR = '.';
-const OUTPUT_DIR = '.';
-const CHROME_SESSION_REQUIRED_ERROR = 'No Chrome session found (chrome plugin must run first)';
-
-function parseArgs() {
-    const args = {};
-    process.argv.slice(2).forEach(arg => {
-        if (arg.startsWith('--')) {
-            const [key, ...valueParts] = arg.slice(2).split('=');
-            args[key.replace(/-/g, '_')] = valueParts.join('=') || true;
-        }
-    });
-    return args;
-}
-
-function getEnv(name, defaultValue = '') {
-    return (process.env[name] || defaultValue).trim();
-}
-
-function getEnvInt(name, defaultValue = 0) {
-    const val = parseInt(getEnv(name, String(defaultValue)), 10);
-    return isNaN(val) ? defaultValue : val;
-}
-
-function getEnvFloat(name, defaultValue = 0) {
-    const val = parseFloat(getEnv(name, String(defaultValue)));
-    return isNaN(val) ? defaultValue : val;
-}
-
-async function waitForChromeTabOpen(timeoutMs = 60000) {
-    const cdpFile = path.join(CHROME_SESSION_DIR, 'cdp_url.txt');
-    const targetIdFile = path.join(CHROME_SESSION_DIR, 'target_id.txt');
-    const startTime = Date.now();
-
-    while (Date.now() - startTime < timeoutMs) {
-        if (fs.existsSync(cdpFile) && fs.existsSync(targetIdFile)) {
-            return true;
-        }
-        // Wait 100ms before checking again
-        await new Promise(resolve => setTimeout(resolve, 100));
-    }
-
-    return false;
-}
-
-function getCdpUrl() {
-    const cdpFile = path.join(CHROME_SESSION_DIR, 'cdp_url.txt');
-    if (!fs.existsSync(cdpFile)) return null;
-    return fs.readFileSync(cdpFile, 'utf8').trim();
-}
-
-function getPageId() {
-    const targetIdFile = path.join(CHROME_SESSION_DIR, 'target_id.txt');
-    if (!fs.existsSync(targetIdFile)) return null;
-    return fs.readFileSync(targetIdFile, 'utf8').trim();
-}
-
-function getWaitCondition() {
-    const waitFor = getEnv('CHROME_WAIT_FOR', 'networkidle2').toLowerCase();
-    const valid = ['domcontentloaded', 'load', 'networkidle0', 'networkidle2'];
-    return valid.includes(waitFor) ? waitFor : 'networkidle2';
-}
-
-function sleep(ms) {
-    return new Promise(resolve => setTimeout(resolve, ms));
-}
-
-async function navigate(url, cdpUrl) {
-    const timeout = (getEnvInt('CHROME_PAGELOAD_TIMEOUT') || getEnvInt('CHROME_TIMEOUT') || getEnvInt('TIMEOUT', 60)) * 1000;
-    const delayAfterLoad = getEnvFloat('CHROME_DELAY_AFTER_LOAD', 0) * 1000;
-    const waitUntil = getWaitCondition();
-    const targetId = getPageId();
-
-    let browser = null;
-    const navStartTime = Date.now();
-
-    try {
-        browser = await puppeteer.connect({ browserWSEndpoint: cdpUrl });
-
-        const pages = await browser.pages();
-        if (pages.length === 0) {
-            return { success: false, error: 'No pages found in browser', waitUntil, elapsed: Date.now() - navStartTime };
-        }
-
-        // Find page by target ID if available
-        let page = null;
-        if (targetId) {
-            page = pages.find(p => {
-                const target = p.target();
-                return target && target._targetId === targetId;
-            });
-        }
-        if (!page) {
-            page = pages[pages.length - 1];
-        }
-
-        // Navigate
-        console.log(`Navigating to ${url} (wait: ${waitUntil}, timeout: ${timeout}ms)`);
-        const response = await page.goto(url, { waitUntil, timeout });
-
-        // Optional delay
-        if (delayAfterLoad > 0) {
-            console.log(`Waiting ${delayAfterLoad}ms after load...`);
-            await sleep(delayAfterLoad);
-        }
-
-        const finalUrl = page.url();
-        const status = response ? response.status() : null;
-        const elapsed = Date.now() - navStartTime;
-
-        // Write navigation state as JSON
-        const navigationState = {
-            waitUntil,
-            elapsed,
-            url,
-            finalUrl,
-            status,
-            timestamp: new Date().toISOString()
-        };
-        fs.writeFileSync(path.join(OUTPUT_DIR, 'navigation.json'), JSON.stringify(navigationState, null, 2));
-
-        // Write marker files for backwards compatibility
-        fs.writeFileSync(path.join(OUTPUT_DIR, 'page_loaded.txt'), new Date().toISOString());
-        fs.writeFileSync(path.join(OUTPUT_DIR, 'final_url.txt'), finalUrl);
-
-        browser.disconnect();
-
-        return { success: true, finalUrl, status, waitUntil, elapsed };
-
-    } catch (e) {
-        if (browser) browser.disconnect();
-        const elapsed = Date.now() - navStartTime;
-        return { success: false, error: `${e.name}: ${e.message}`, waitUntil, elapsed };
-    }
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-    const snapshotId = args.snapshot_id;
-
-    if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__30_chrome_navigate.js --url=<url> --snapshot-id=<uuid>');
-        process.exit(1);
-    }
-
-    const startTs = new Date();
-    let status = 'failed';
-    let output = null;
-    let error = '';
-
-    // Wait for chrome tab to be open (up to 60s)
-    const tabOpen = await waitForChromeTabOpen(60000);
-    if (!tabOpen) {
-        console.error(`ERROR: ${CHROME_SESSION_REQUIRED_ERROR}`);
-        process.exit(1);
-    }
-
-    const cdpUrl = getCdpUrl();
-    if (!cdpUrl) {
-        console.error(`ERROR: ${CHROME_SESSION_REQUIRED_ERROR}`);
-        process.exit(1);
-    }
-
-    const result = await navigate(url, cdpUrl);
-
-    if (result.success) {
-        status = 'succeeded';
-        output = 'navigation.json';
-        console.log(`Page loaded: ${result.finalUrl} (HTTP ${result.status}) in ${result.elapsed}ms (waitUntil: ${result.waitUntil})`);
-    } else {
-        error = result.error;
-        // Save navigation state even on failure
-        const navigationState = {
-            waitUntil: result.waitUntil,
-            elapsed: result.elapsed,
-            url,
-            error: result.error,
-            timestamp: new Date().toISOString()
-        };
-        fs.writeFileSync(path.join(OUTPUT_DIR, 'navigation.json'), JSON.stringify(navigationState, null, 2));
-    }
-
-    const endTs = new Date();
-
-    if (error) console.error(`ERROR: ${error}`);
-
-    // Output clean JSONL (no RESULT_JSON= prefix)
-    console.log(JSON.stringify({
-        type: 'ArchiveResult',
-        status,
-        output_str: output || error || '',
-    }));
-
-    process.exit(status === 'succeeded' ? 0 : 1);
-}
-
-main().catch(e => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
diff --git a/archivebox/plugins/chrome/templates/icon.html b/archivebox/plugins/chrome/templates/icon.html
deleted file mode 100644
index 18555344..00000000
--- a/archivebox/plugins/chrome/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--chrome" title="Chrome"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><rect x="3" y="4.5" width="18" height="15" rx="2"/><path d="M3 9h18"/><circle cx="7" cy="7" r="1" fill="currentColor" stroke="none"/><circle cx="11" cy="7" r="1" fill="currentColor" stroke="none"/></svg></span>
diff --git a/archivebox/plugins/chrome/tests/chrome_test_helpers.py b/archivebox/plugins/chrome/tests/chrome_test_helpers.py
deleted file mode 100644
index 3e37ce26..00000000
--- a/archivebox/plugins/chrome/tests/chrome_test_helpers.py
+++ /dev/null
@@ -1,1002 +0,0 @@
-"""
-Shared Chrome test helpers for plugin integration tests.
-
-This module provides common utilities for Chrome-based plugin tests, reducing
-duplication across test files. Functions delegate to chrome_utils.js (the single
-source of truth) with Python fallbacks.
-
-Function names match the JS equivalents in snake_case:
-    JS: getMachineType()  -> Python: get_machine_type()
-    JS: getLibDir()       -> Python: get_lib_dir()
-    JS: getNodeModulesDir() -> Python: get_node_modules_dir()
-    JS: getExtensionsDir() -> Python: get_extensions_dir()
-    JS: findChromium()    -> Python: find_chromium()
-    JS: killChrome()      -> Python: kill_chrome()
-    JS: getTestEnv()      -> Python: get_test_env()
-
-Usage:
-    # Path helpers (delegate to chrome_utils.js):
-    from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-        get_test_env,           # env dict with LIB_DIR, NODE_MODULES_DIR, MACHINE_TYPE
-        get_machine_type,       # e.g., 'x86_64-linux', 'arm64-darwin'
-        get_lib_dir,            # Path to lib dir
-        get_node_modules_dir,   # Path to node_modules
-        get_extensions_dir,     # Path to chrome extensions
-        find_chromium,          # Find Chrome/Chromium binary
-        kill_chrome,            # Kill Chrome process by PID
-    )
-
-    # Test file helpers:
-    from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-        get_plugin_dir,         # get_plugin_dir(__file__) -> plugin dir Path
-        get_hook_script,        # Find hook script by glob pattern
-        PLUGINS_ROOT,           # Path to plugins root
-        LIB_DIR,                # Path to lib dir (lazy-loaded)
-        NODE_MODULES_DIR,       # Path to node_modules (lazy-loaded)
-    )
-
-    # For Chrome session tests:
-    from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-        chrome_session,         # Context manager (Full Chrome + tab setup with automatic cleanup)
-        cleanup_chrome,         # Manual cleanup by PID (rarely needed)
-    )
-
-    # For extension tests:
-    from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-        setup_test_env,         # Full dir structure + Chrome install
-        launch_chromium_session, # Launch Chrome, return CDP URL
-        kill_chromium_session,   # Cleanup Chrome
-    )
-
-    # Run hooks and parse JSONL:
-    from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-        run_hook,               # Run hook, return (returncode, stdout, stderr)
-        parse_jsonl_output,     # Parse JSONL from stdout
-    )
-"""
-
-import json
-import os
-import platform
-import signal
-import subprocess
-import sys
-import time
-from datetime import datetime
-from pathlib import Path
-from typing import Tuple, Optional, List, Dict, Any
-from contextlib import contextmanager
-
-
-# Plugin directory locations
-CHROME_PLUGIN_DIR = Path(__file__).parent.parent
-PLUGINS_ROOT = CHROME_PLUGIN_DIR.parent
-
-# Hook script locations
-CHROME_INSTALL_HOOK = CHROME_PLUGIN_DIR / 'on_Crawl__70_chrome_install.py'
-CHROME_LAUNCH_HOOK = CHROME_PLUGIN_DIR / 'on_Crawl__90_chrome_launch.bg.js'
-CHROME_TAB_HOOK = CHROME_PLUGIN_DIR / 'on_Snapshot__10_chrome_tab.bg.js'
-CHROME_NAVIGATE_HOOK = next(CHROME_PLUGIN_DIR.glob('on_Snapshot__*_chrome_navigate.*'), None)
-CHROME_UTILS = CHROME_PLUGIN_DIR / 'chrome_utils.js'
-PUPPETEER_BINARY_HOOK = PLUGINS_ROOT / 'puppeteer' / 'on_Binary__12_puppeteer_install.py'
-PUPPETEER_CRAWL_HOOK = PLUGINS_ROOT / 'puppeteer' / 'on_Crawl__60_puppeteer_install.py'
-NPM_BINARY_HOOK = PLUGINS_ROOT / 'npm' / 'on_Binary__10_npm_install.py'
-
-
-# =============================================================================
-# Path Helpers - delegates to chrome_utils.js with Python fallback
-# Function names match JS: getMachineType -> get_machine_type, etc.
-# =============================================================================
-
-
-def _call_chrome_utils(command: str, *args: str, env: Optional[dict] = None) -> Tuple[int, str, str]:
-    """Call chrome_utils.js CLI command (internal helper).
-
-    This is the central dispatch for calling the JS utilities from Python.
-    All path calculations and Chrome operations are centralized in chrome_utils.js
-    to ensure consistency between Python and JavaScript code.
-
-    Args:
-        command: The CLI command (e.g., 'findChromium', 'getTestEnv')
-        *args: Additional command arguments
-        env: Environment dict (default: current env)
-
-    Returns:
-        Tuple of (returncode, stdout, stderr)
-    """
-    cmd = ['node', str(CHROME_UTILS), command] + list(args)
-    result = subprocess.run(
-        cmd,
-        capture_output=True,
-        text=True,
-        timeout=30,
-        env=env or os.environ.copy()
-    )
-    return result.returncode, result.stdout, result.stderr
-
-
-def get_plugin_dir(test_file: str) -> Path:
-    """Get the plugin directory from a test file path.
-
-    Usage:
-        PLUGIN_DIR = get_plugin_dir(__file__)
-
-    Args:
-        test_file: The __file__ of the test module (e.g., test_screenshot.py)
-
-    Returns:
-        Path to the plugin directory (e.g., plugins/screenshot/)
-    """
-    return Path(test_file).parent.parent
-
-
-def get_hook_script(plugin_dir: Path, pattern: str) -> Optional[Path]:
-    """Find a hook script in a plugin directory by pattern.
-
-    Usage:
-        HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_screenshot.*')
-
-    Args:
-        plugin_dir: Path to the plugin directory
-        pattern: Glob pattern to match
-
-    Returns:
-        Path to the hook script or None if not found
-    """
-    matches = list(plugin_dir.glob(pattern))
-    return matches[0] if matches else None
-
-
-def get_machine_type() -> str:
-    """Get machine type string (e.g., 'x86_64-linux', 'arm64-darwin').
-
-    Matches JS: getMachineType()
-
-    Tries chrome_utils.js first, falls back to Python computation.
-    """
-    # Try JS first (single source of truth)
-    returncode, stdout, stderr = _call_chrome_utils('getMachineType')
-    if returncode == 0 and stdout.strip():
-        return stdout.strip()
-
-    # Fallback to Python computation
-    if os.environ.get('MACHINE_TYPE'):
-        return os.environ['MACHINE_TYPE']
-
-    machine = platform.machine().lower()
-    system = platform.system().lower()
-    if machine in ('arm64', 'aarch64'):
-        machine = 'arm64'
-    elif machine in ('x86_64', 'amd64'):
-        machine = 'x86_64'
-    return f"{machine}-{system}"
-
-
-def get_lib_dir() -> Path:
-    """Get LIB_DIR path for platform-specific binaries.
-
-    Matches JS: getLibDir()
-
-    Tries chrome_utils.js first, falls back to Python computation.
-    """
-    # Try JS first
-    returncode, stdout, stderr = _call_chrome_utils('getLibDir')
-    if returncode == 0 and stdout.strip():
-        return Path(stdout.strip())
-
-    # Fallback to Python
-    if os.environ.get('LIB_DIR'):
-        return Path(os.environ['LIB_DIR'])
-    raise Exception('LIB_DIR env var must be set!')
-
-
-def get_node_modules_dir() -> Path:
-    """Get NODE_MODULES_DIR path for npm packages.
-
-    Matches JS: getNodeModulesDir()
-
-    Tries chrome_utils.js first, falls back to Python computation.
-    """
-    # Try JS first
-    returncode, stdout, stderr = _call_chrome_utils('getNodeModulesDir')
-    if returncode == 0 and stdout.strip():
-        return Path(stdout.strip())
-
-    # Fallback to Python
-    if os.environ.get('NODE_MODULES_DIR'):
-        return Path(os.environ['NODE_MODULES_DIR'])
-    lib_dir = get_lib_dir()
-    return lib_dir / 'npm' / 'node_modules'
-
-
-def get_extensions_dir() -> str:
-    """Get the Chrome extensions directory path.
-
-    Matches JS: getExtensionsDir()
-
-    Tries chrome_utils.js first, falls back to Python computation.
-    """
-    try:
-        returncode, stdout, stderr = _call_chrome_utils('getExtensionsDir')
-        if returncode == 0 and stdout.strip():
-            return stdout.strip()
-    except subprocess.TimeoutExpired:
-        pass  # Fall through to default computation
-
-    # Fallback to default computation if JS call fails
-    data_dir = os.environ.get('DATA_DIR', '.')
-    persona = os.environ.get('ACTIVE_PERSONA', 'Default')
-    return str(Path(data_dir) / 'personas' / persona / 'chrome_extensions')
-
-
-def link_puppeteer_cache(lib_dir: Path) -> None:
-    """Best-effort symlink from system Puppeteer cache into test lib_dir.
-
-    Avoids repeated Chromium downloads across tests by reusing the
-    default Puppeteer cache directory.
-    """
-    cache_dir = lib_dir / 'puppeteer'
-    cache_dir.mkdir(parents=True, exist_ok=True)
-
-    candidates = [
-        Path.home() / 'Library' / 'Caches' / 'puppeteer',
-        Path.home() / '.cache' / 'puppeteer',
-    ]
-    for src_root in candidates:
-        if not src_root.exists():
-            continue
-        for item in src_root.iterdir():
-            dst = cache_dir / item.name
-            if dst.exists():
-                continue
-            try:
-                os.symlink(item, dst, target_is_directory=item.is_dir())
-            except Exception:
-                # Best-effort only; if symlink fails, leave as-is.
-                pass
-
-
-def find_chromium(data_dir: Optional[str] = None) -> Optional[str]:
-    """Find the Chromium binary path.
-
-    Matches JS: findChromium()
-
-    Uses chrome_utils.js which checks:
-    - CHROME_BINARY env var
-    - @puppeteer/browsers install locations
-    - System Chromium locations
-    - Falls back to Chrome (with warning)
-
-    Args:
-        data_dir: Optional DATA_DIR override
-
-    Returns:
-        Path to Chromium binary or None if not found
-    """
-    env = os.environ.copy()
-    if data_dir:
-        env['DATA_DIR'] = str(data_dir)
-    returncode, stdout, stderr = _call_chrome_utils('findChromium', env=env)
-    if returncode == 0 and stdout.strip():
-        return stdout.strip()
-    return None
-
-
-def kill_chrome(pid: int, output_dir: Optional[str] = None) -> bool:
-    """Kill a Chrome process by PID.
-
-    Matches JS: killChrome()
-
-    Uses chrome_utils.js which handles:
-    - SIGTERM then SIGKILL
-    - Process group killing
-    - Zombie process cleanup
-
-    Args:
-        pid: Process ID to kill
-        output_dir: Optional chrome output directory for PID file cleanup
-
-    Returns:
-        True if the kill command succeeded
-    """
-    args = [str(pid)]
-    if output_dir:
-        args.append(str(output_dir))
-    returncode, stdout, stderr = _call_chrome_utils('killChrome', *args)
-    return returncode == 0
-
-
-def get_test_env() -> dict:
-    """Get environment dict with all paths set correctly for tests.
-
-    Matches JS: getTestEnv()
-
-    Tries chrome_utils.js first for path values, builds env dict.
-    Use this for all subprocess calls in plugin tests.
-    """
-    env = os.environ.copy()
-
-    # Try to get all paths from JS (single source of truth)
-    returncode, stdout, stderr = _call_chrome_utils('getTestEnv')
-    if returncode == 0 and stdout.strip():
-        try:
-            js_env = json.loads(stdout)
-            env.update(js_env)
-            return env
-        except json.JSONDecodeError:
-            pass
-
-    # Fallback to Python computation
-    lib_dir = get_lib_dir()
-    env['LIB_DIR'] = str(lib_dir)
-    env['NODE_MODULES_DIR'] = str(get_node_modules_dir())
-    env['MACHINE_TYPE'] = get_machine_type()
-    return env
-
-
-# Backward compatibility aliases (deprecated, use new names)
-find_chromium_binary = find_chromium
-kill_chrome_via_js = kill_chrome
-get_machine_type_from_js = get_machine_type
-get_test_env_from_js = get_test_env
-
-
-# =============================================================================
-# Module-level constants (lazy-loaded on first access)
-# Import these directly: from chrome_test_helpers import LIB_DIR, NODE_MODULES_DIR
-# =============================================================================
-
-# These are computed once when first accessed
-_LIB_DIR: Optional[Path] = None
-_NODE_MODULES_DIR: Optional[Path] = None
-
-
-def _get_lib_dir_cached() -> Path:
-    global _LIB_DIR
-    if _LIB_DIR is None:
-        _LIB_DIR = get_lib_dir()
-    return _LIB_DIR
-
-
-def _get_node_modules_dir_cached() -> Path:
-    global _NODE_MODULES_DIR
-    if _NODE_MODULES_DIR is None:
-        _NODE_MODULES_DIR = get_node_modules_dir()
-    return _NODE_MODULES_DIR
-
-
-# Module-level constants that can be imported directly
-# Usage: from chrome_test_helpers import LIB_DIR, NODE_MODULES_DIR
-class _LazyPath:
-    """Lazy path that computes value on first access."""
-    def __init__(self, getter):
-        self._getter = getter
-        self._value = None
-
-    def __fspath__(self):
-        if self._value is None:
-            self._value = self._getter()
-        return str(self._value)
-
-    def __truediv__(self, other):
-        if self._value is None:
-            self._value = self._getter()
-        return self._value / other
-
-    def __str__(self):
-        return self.__fspath__()
-
-    def __repr__(self):
-        return f"<LazyPath: {self.__fspath__()}>"
-
-
-LIB_DIR = _LazyPath(_get_lib_dir_cached)
-NODE_MODULES_DIR = _LazyPath(_get_node_modules_dir_cached)
-
-
-# =============================================================================
-# Hook Execution Helpers
-# =============================================================================
-
-
-def run_hook(
-    hook_script: Path,
-    url: str,
-    snapshot_id: str,
-    cwd: Optional[Path] = None,
-    env: Optional[dict] = None,
-    timeout: int = 60,
-    extra_args: Optional[List[str]] = None,
-) -> Tuple[int, str, str]:
-    """Run a hook script and return (returncode, stdout, stderr).
-
-    Usage:
-        returncode, stdout, stderr = run_hook(
-            HOOK_SCRIPT, 'https://example.com', 'test-snap-123',
-            cwd=tmpdir, env=get_test_env()
-        )
-
-    Args:
-        hook_script: Path to the hook script
-        url: URL to process
-        snapshot_id: Snapshot ID
-        cwd: Working directory (default: current dir)
-        env: Environment dict (default: get_test_env())
-        timeout: Timeout in seconds
-        extra_args: Additional arguments to pass
-
-    Returns:
-        Tuple of (returncode, stdout, stderr)
-    """
-    if env is None:
-        env = get_test_env()
-
-    # Determine interpreter based on file extension
-    if hook_script.suffix == '.py':
-        cmd = [sys.executable, str(hook_script)]
-    elif hook_script.suffix == '.js':
-        cmd = ['node', str(hook_script)]
-    else:
-        cmd = [str(hook_script)]
-
-    cmd.extend([f'--url={url}', f'--snapshot-id={snapshot_id}'])
-    if extra_args:
-        cmd.extend(extra_args)
-
-    result = subprocess.run(
-        cmd,
-        cwd=str(cwd) if cwd else None,
-        capture_output=True,
-        text=True,
-        env=env,
-        timeout=timeout
-    )
-    return result.returncode, result.stdout, result.stderr
-
-
-def parse_jsonl_output(stdout: str, record_type: str = 'ArchiveResult') -> Optional[Dict[str, Any]]:
-    """Parse JSONL output from hook stdout and return the specified record type.
-
-    Usage:
-        result = parse_jsonl_output(stdout)
-        if result and result['status'] == 'succeeded':
-            print("Success!")
-
-    Args:
-        stdout: The stdout from a hook execution
-        record_type: The 'type' field to look for (default: 'ArchiveResult')
-
-    Returns:
-        The parsed JSON dict or None if not found
-    """
-    for line in stdout.strip().split('\n'):
-        line = line.strip()
-        if not line.startswith('{'):
-            continue
-        try:
-            record = json.loads(line)
-            if record.get('type') == record_type:
-                return record
-        except json.JSONDecodeError:
-            continue
-    return None
-
-
-def parse_jsonl_records(stdout: str) -> List[Dict[str, Any]]:
-    """Parse all JSONL records from stdout."""
-    records: List[Dict[str, Any]] = []
-    for line in stdout.strip().split('\n'):
-        line = line.strip()
-        if not line.startswith('{'):
-            continue
-        try:
-            records.append(json.loads(line))
-        except json.JSONDecodeError:
-            continue
-    return records
-
-
-def apply_machine_updates(records: List[Dict[str, Any]], env: dict) -> None:
-    """Apply Machine update records to env dict in-place."""
-    for record in records:
-        if record.get('type') != 'Machine':
-            continue
-        config = record.get('config')
-        if not isinstance(config, dict):
-            continue
-        env.update(config)
-
-
-def install_chromium_with_hooks(env: dict, timeout: int = 300) -> str:
-    """Install Chromium via chrome crawl hook + puppeteer/npm hooks.
-
-    Returns absolute path to Chromium binary.
-    """
-    puppeteer_result = subprocess.run(
-        [sys.executable, str(PUPPETEER_CRAWL_HOOK)],
-        capture_output=True,
-        text=True,
-        timeout=timeout,
-        env=env,
-    )
-    if puppeteer_result.returncode != 0:
-        raise RuntimeError(f"Puppeteer crawl hook failed: {puppeteer_result.stderr}")
-
-    puppeteer_record = parse_jsonl_output(puppeteer_result.stdout, record_type='Binary') or {}
-    if not puppeteer_record or puppeteer_record.get('name') != 'puppeteer':
-        raise RuntimeError("Puppeteer Binary record not emitted by crawl hook")
-
-    npm_cmd = [
-        sys.executable,
-        str(NPM_BINARY_HOOK),
-        '--machine-id=test-machine',
-        '--binary-id=test-puppeteer',
-        '--name=puppeteer',
-        f"--binproviders={puppeteer_record.get('binproviders', '*')}",
-    ]
-    puppeteer_overrides = puppeteer_record.get('overrides')
-    if puppeteer_overrides:
-        npm_cmd.append(f'--overrides={json.dumps(puppeteer_overrides)}')
-
-    npm_result = subprocess.run(
-        npm_cmd,
-        capture_output=True,
-        text=True,
-        timeout=timeout,
-        env=env,
-    )
-    if npm_result.returncode != 0:
-        raise RuntimeError(f"Npm install failed: {npm_result.stderr}")
-
-    apply_machine_updates(parse_jsonl_records(npm_result.stdout), env)
-
-    chrome_result = subprocess.run(
-        [sys.executable, str(CHROME_INSTALL_HOOK)],
-        capture_output=True,
-        text=True,
-        timeout=timeout,
-        env=env,
-    )
-    if chrome_result.returncode != 0:
-        raise RuntimeError(f"Chrome install hook failed: {chrome_result.stderr}")
-
-    chrome_record = parse_jsonl_output(chrome_result.stdout, record_type='Binary') or {}
-    if not chrome_record or chrome_record.get('name') not in ('chromium', 'chrome'):
-        raise RuntimeError("Chrome Binary record not emitted by crawl hook")
-
-    chromium_cmd = [
-        sys.executable,
-        str(PUPPETEER_BINARY_HOOK),
-        '--machine-id=test-machine',
-        '--binary-id=test-chromium',
-        f"--name={chrome_record.get('name', 'chromium')}",
-        f"--binproviders={chrome_record.get('binproviders', '*')}",
-    ]
-    chrome_overrides = chrome_record.get('overrides')
-    if chrome_overrides:
-        chromium_cmd.append(f'--overrides={json.dumps(chrome_overrides)}')
-
-    result = subprocess.run(
-        chromium_cmd,
-        capture_output=True,
-        text=True,
-        timeout=timeout,
-        env=env,
-    )
-    if result.returncode != 0:
-        raise RuntimeError(f"Puppeteer chromium install failed: {result.stderr}")
-
-    records = parse_jsonl_records(result.stdout)
-    chromium_record = None
-    for record in records:
-        if record.get('type') == 'Binary' and record.get('name') in ('chromium', 'chrome'):
-            chromium_record = record
-            break
-    if not chromium_record:
-        chromium_record = parse_jsonl_output(result.stdout, record_type='Binary')
-
-    chromium_path = chromium_record.get('abspath')
-    if not chromium_path or not Path(chromium_path).exists():
-        raise RuntimeError(f"Chromium binary not found after install: {chromium_path}")
-
-    env['CHROME_BINARY'] = chromium_path
-    apply_machine_updates(records, env)
-    return chromium_path
-
-
-def run_hook_and_parse(
-    hook_script: Path,
-    url: str,
-    snapshot_id: str,
-    cwd: Optional[Path] = None,
-    env: Optional[dict] = None,
-    timeout: int = 60,
-    extra_args: Optional[List[str]] = None,
-) -> Tuple[int, Optional[Dict[str, Any]], str]:
-    """Run a hook and parse its JSONL output.
-
-    Convenience function combining run_hook() and parse_jsonl_output().
-
-    Returns:
-        Tuple of (returncode, parsed_result_or_none, stderr)
-    """
-    returncode, stdout, stderr = run_hook(
-        hook_script, url, snapshot_id,
-        cwd=cwd, env=env, timeout=timeout, extra_args=extra_args
-    )
-    result = parse_jsonl_output(stdout)
-    return returncode, result, stderr
-
-
-# =============================================================================
-# Extension Test Helpers
-# Used by extension tests (ublock, istilldontcareaboutcookies, twocaptcha)
-# =============================================================================
-
-
-def setup_test_env(tmpdir: Path) -> dict:
-    """Set up isolated data/lib directory structure for extension tests.
-
-    Creates structure matching real ArchiveBox data dir:
-        <tmpdir>/data/
-            lib/
-                arm64-darwin/   (or x86_64-linux, etc.)
-                    npm/
-                        .bin/
-                        node_modules/
-            personas/
-                Default/
-                    chrome_extensions/
-            users/
-                testuser/
-                    crawls/
-                    snapshots/
-
-    Calls chrome install hook + puppeteer/npm hooks for Chromium installation.
-    Returns env dict with DATA_DIR, LIB_DIR, NPM_BIN_DIR, NODE_MODULES_DIR, CHROME_BINARY, etc.
-
-    Args:
-        tmpdir: Base temporary directory for the test
-
-    Returns:
-        Environment dict with all paths set.
-    """
-
-    # Determine machine type (matches archivebox.config.paths.get_machine_type())
-    machine = platform.machine().lower()
-    system = platform.system().lower()
-    if machine in ('arm64', 'aarch64'):
-        machine = 'arm64'
-    elif machine in ('x86_64', 'amd64'):
-        machine = 'x86_64'
-    machine_type = f"{machine}-{system}"
-
-    # Create proper directory structure matching real ArchiveBox layout
-    data_dir = tmpdir / 'data'
-    lib_dir = data_dir / 'lib' / machine_type
-    npm_dir = lib_dir / 'npm'
-    npm_bin_dir = npm_dir / '.bin'
-    node_modules_dir = npm_dir / 'node_modules'
-
-    # Extensions go under personas/Default/
-    chrome_extensions_dir = data_dir / 'personas' / 'Default' / 'chrome_extensions'
-
-    # User data goes under users/{username}/
-    date_str = datetime.now().strftime('%Y%m%d')
-    users_dir = data_dir / 'users' / 'testuser'
-    crawls_dir = users_dir / 'crawls' / date_str
-    snapshots_dir = users_dir / 'snapshots' / date_str
-
-    # Create all directories
-    node_modules_dir.mkdir(parents=True, exist_ok=True)
-    npm_bin_dir.mkdir(parents=True, exist_ok=True)
-    chrome_extensions_dir.mkdir(parents=True, exist_ok=True)
-    crawls_dir.mkdir(parents=True, exist_ok=True)
-    snapshots_dir.mkdir(parents=True, exist_ok=True)
-
-    # Build complete env dict
-    env = os.environ.copy()
-    env.update({
-        'DATA_DIR': str(data_dir),
-        'LIB_DIR': str(lib_dir),
-        'MACHINE_TYPE': machine_type,
-        'NPM_BIN_DIR': str(npm_bin_dir),
-        'NODE_MODULES_DIR': str(node_modules_dir),
-        'CHROME_EXTENSIONS_DIR': str(chrome_extensions_dir),
-        'CRAWLS_DIR': str(crawls_dir),
-        'SNAPSHOTS_DIR': str(snapshots_dir),
-    })
-
-    # Only set headless if not already in environment (allow override for debugging)
-    if 'CHROME_HEADLESS' not in os.environ:
-        env['CHROME_HEADLESS'] = 'true'
-
-    try:
-        install_chromium_with_hooks(env)
-    except RuntimeError as e:
-        raise RuntimeError(str(e))
-    return env
-
-
-def launch_chromium_session(env: dict, chrome_dir: Path, crawl_id: str) -> Tuple[subprocess.Popen, str]:
-    """Launch Chromium and return (process, cdp_url).
-
-    This launches Chrome using the chrome launch hook and waits for the CDP URL
-    to become available. Use this for extension tests that need direct CDP access.
-
-    Args:
-        env: Environment dict (from setup_test_env)
-        chrome_dir: Directory for Chrome to write its files (cdp_url.txt, chrome.pid, etc.)
-        crawl_id: ID for the crawl
-
-    Returns:
-        Tuple of (chrome_launch_process, cdp_url)
-
-    Raises:
-        RuntimeError: If Chrome fails to launch or CDP URL not available after 20s
-    """
-    chrome_dir.mkdir(parents=True, exist_ok=True)
-
-    chrome_launch_process = subprocess.Popen(
-        ['node', str(CHROME_LAUNCH_HOOK), f'--crawl-id={crawl_id}'],
-        cwd=str(chrome_dir),
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-        text=True,
-        env=env
-    )
-
-    # Wait for Chromium to launch and CDP URL to be available
-    cdp_url = None
-    for i in range(20):
-        if chrome_launch_process.poll() is not None:
-            stdout, stderr = chrome_launch_process.communicate()
-            raise RuntimeError(f"Chromium launch failed:\nStdout: {stdout}\nStderr: {stderr}")
-        cdp_file = chrome_dir / 'cdp_url.txt'
-        if cdp_file.exists():
-            cdp_url = cdp_file.read_text().strip()
-            break
-        time.sleep(1)
-
-    if not cdp_url:
-        chrome_launch_process.kill()
-        raise RuntimeError("Chromium CDP URL not found after 20s")
-
-    return chrome_launch_process, cdp_url
-
-
-def kill_chromium_session(chrome_launch_process: subprocess.Popen, chrome_dir: Path) -> None:
-    """Clean up Chromium process launched by launch_chromium_session.
-
-    Uses chrome_utils.js killChrome for proper process group handling.
-
-    Args:
-        chrome_launch_process: The Popen object from launch_chromium_session
-        chrome_dir: The chrome directory containing chrome.pid
-    """
-    # First try to terminate the launch process gracefully
-    try:
-        chrome_launch_process.send_signal(signal.SIGTERM)
-        chrome_launch_process.wait(timeout=5)
-    except Exception:
-        pass
-
-    # Read PID and use JS to kill with proper cleanup
-    chrome_pid_file = chrome_dir / 'chrome.pid'
-    if chrome_pid_file.exists():
-        try:
-            chrome_pid = int(chrome_pid_file.read_text().strip())
-            kill_chrome(chrome_pid, str(chrome_dir))
-        except (ValueError, FileNotFoundError):
-            pass
-
-
-@contextmanager
-def chromium_session(env: dict, chrome_dir: Path, crawl_id: str):
-    """Context manager for Chromium sessions with automatic cleanup.
-
-    Usage:
-        with chromium_session(env, chrome_dir, 'test-crawl') as (process, cdp_url):
-            # Use cdp_url to connect with puppeteer
-            pass
-        # Chromium automatically cleaned up
-
-    Args:
-        env: Environment dict (from setup_test_env)
-        chrome_dir: Directory for Chrome files
-        crawl_id: ID for the crawl
-
-    Yields:
-        Tuple of (chrome_launch_process, cdp_url)
-    """
-    chrome_launch_process = None
-    try:
-        chrome_launch_process, cdp_url = launch_chromium_session(env, chrome_dir, crawl_id)
-        yield chrome_launch_process, cdp_url
-    finally:
-        if chrome_launch_process:
-            kill_chromium_session(chrome_launch_process, chrome_dir)
-
-
-# =============================================================================
-# Tab-based Test Helpers
-# Used by tab-based tests (infiniscroll, modalcloser)
-# =============================================================================
-
-
-def cleanup_chrome(chrome_launch_process: subprocess.Popen, chrome_pid: int, chrome_dir: Optional[Path] = None) -> None:
-    """Clean up Chrome processes using chrome_utils.js killChrome.
-
-    Uses the centralized kill logic from chrome_utils.js which handles:
-    - SIGTERM then SIGKILL
-    - Process group killing
-    - Zombie process cleanup
-
-    Args:
-        chrome_launch_process: The Popen object for the chrome launch hook
-        chrome_pid: The PID of the Chrome process
-        chrome_dir: Optional path to chrome output directory
-    """
-    # First try to terminate the launch process gracefully
-    try:
-        chrome_launch_process.send_signal(signal.SIGTERM)
-        chrome_launch_process.wait(timeout=5)
-    except Exception:
-        pass
-
-    # Use JS to kill Chrome with proper process group handling
-    kill_chrome(chrome_pid, str(chrome_dir) if chrome_dir else None)
-
-
-@contextmanager
-def chrome_session(
-    tmpdir: Path,
-    crawl_id: str = 'test-crawl',
-    snapshot_id: str = 'test-snapshot',
-    test_url: str = 'about:blank',
-    navigate: bool = True,
-    timeout: int = 15,
-):
-    """Context manager for Chrome sessions with automatic cleanup.
-
-    Creates the directory structure, launches Chrome, creates a tab,
-    and optionally navigates to the test URL. Automatically cleans up
-    Chrome on exit.
-
-    Usage:
-        with chrome_session(tmpdir, test_url='https://example.com') as (process, pid, chrome_dir, env):
-            # Run tests with chrome session
-            pass
-        # Chrome automatically cleaned up
-
-    Args:
-        tmpdir: Temporary directory for test files
-        crawl_id: ID to use for the crawl
-        snapshot_id: ID to use for the snapshot
-        test_url: URL to navigate to (if navigate=True)
-        navigate: Whether to navigate to the URL after creating tab
-        timeout: Seconds to wait for Chrome to start
-
-    Yields:
-        Tuple of (chrome_launch_process, chrome_pid, snapshot_chrome_dir, env)
-
-    Raises:
-        RuntimeError: If Chrome fails to start or tab creation fails
-    """
-    chrome_launch_process = None
-    chrome_pid = None
-    try:
-        # Create proper directory structure in tmpdir
-        machine = platform.machine().lower()
-        system = platform.system().lower()
-        if machine in ('arm64', 'aarch64'):
-            machine = 'arm64'
-        elif machine in ('x86_64', 'amd64'):
-            machine = 'x86_64'
-        machine_type = f"{machine}-{system}"
-
-        data_dir = Path(tmpdir) / 'data'
-        lib_dir = data_dir / 'lib' / machine_type
-        npm_dir = lib_dir / 'npm'
-        node_modules_dir = npm_dir / 'node_modules'
-        puppeteer_cache_dir = lib_dir / 'puppeteer'
-
-        # Create lib structure for puppeteer installation
-        node_modules_dir.mkdir(parents=True, exist_ok=True)
-
-        # Create crawl and snapshot directories
-        crawl_dir = Path(tmpdir) / 'crawl'
-        crawl_dir.mkdir(exist_ok=True)
-        chrome_dir = crawl_dir / 'chrome'
-        chrome_dir.mkdir(exist_ok=True)
-
-        # Build env with tmpdir-specific paths
-        env = os.environ.copy()
-        env.update({
-            'DATA_DIR': str(data_dir),
-            'LIB_DIR': str(lib_dir),
-            'MACHINE_TYPE': machine_type,
-            'NODE_MODULES_DIR': str(node_modules_dir),
-            'NODE_PATH': str(node_modules_dir),
-            'NPM_BIN_DIR': str(npm_dir / '.bin'),
-            'CHROME_HEADLESS': 'true',
-            'PUPPETEER_CACHE_DIR': str(puppeteer_cache_dir),
-        })
-
-        # Reuse system Puppeteer cache to avoid redundant Chromium downloads
-        link_puppeteer_cache(lib_dir)
-
-        # Install Chromium via npm + puppeteer hooks using normal Binary flow
-        install_chromium_with_hooks(env)
-
-        # Launch Chrome at crawl level
-        chrome_launch_process = subprocess.Popen(
-            ['node', str(CHROME_LAUNCH_HOOK), f'--crawl-id={crawl_id}'],
-            cwd=str(chrome_dir),
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-            env=env
-        )
-
-        # Wait for Chrome to launch
-        for i in range(timeout):
-            if chrome_launch_process.poll() is not None:
-                stdout, stderr = chrome_launch_process.communicate()
-                raise RuntimeError(f"Chrome launch failed:\nStdout: {stdout}\nStderr: {stderr}")
-            if (chrome_dir / 'cdp_url.txt').exists():
-                break
-            time.sleep(1)
-
-        if not (chrome_dir / 'cdp_url.txt').exists():
-            raise RuntimeError(f"Chrome CDP URL not found after {timeout}s")
-
-        chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
-
-        # Create snapshot directory structure
-        snapshot_dir = Path(tmpdir) / 'snapshot'
-        snapshot_dir.mkdir(exist_ok=True)
-        snapshot_chrome_dir = snapshot_dir / 'chrome'
-        snapshot_chrome_dir.mkdir(exist_ok=True)
-
-        # Create tab
-        tab_env = env.copy()
-        tab_env['CRAWL_OUTPUT_DIR'] = str(crawl_dir)
-        try:
-            result = subprocess.run(
-                ['node', str(CHROME_TAB_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}', f'--crawl-id={crawl_id}'],
-                cwd=str(snapshot_chrome_dir),
-                capture_output=True,
-                text=True,
-                timeout=60,
-                env=tab_env
-            )
-            if result.returncode != 0:
-                cleanup_chrome(chrome_launch_process, chrome_pid)
-                raise RuntimeError(f"Tab creation failed: {result.stderr}")
-        except subprocess.TimeoutExpired:
-            cleanup_chrome(chrome_launch_process, chrome_pid)
-            raise RuntimeError("Tab creation timed out after 60s")
-
-        # Navigate to URL if requested
-        if navigate and CHROME_NAVIGATE_HOOK and test_url != 'about:blank':
-            try:
-                result = subprocess.run(
-                    ['node', str(CHROME_NAVIGATE_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                    cwd=str(snapshot_chrome_dir),
-                    capture_output=True,
-                    text=True,
-                    timeout=120,
-                    env=env
-                )
-                if result.returncode != 0:
-                    cleanup_chrome(chrome_launch_process, chrome_pid)
-                    raise RuntimeError(f"Navigation failed: {result.stderr}")
-            except subprocess.TimeoutExpired:
-                cleanup_chrome(chrome_launch_process, chrome_pid)
-                raise RuntimeError("Navigation timed out after 120s")
-
-        yield chrome_launch_process, chrome_pid, snapshot_chrome_dir, env
-    finally:
-        if chrome_launch_process and chrome_pid:
-            cleanup_chrome(chrome_launch_process, chrome_pid)
diff --git a/archivebox/plugins/chrome/tests/test_chrome.py b/archivebox/plugins/chrome/tests/test_chrome.py
deleted file mode 100644
index 33d328c9..00000000
--- a/archivebox/plugins/chrome/tests/test_chrome.py
+++ /dev/null
@@ -1,722 +0,0 @@
-"""
-Integration tests for chrome plugin
-
-Tests verify:
-1. Chromium install via @puppeteer/browsers
-2. Verify deps with abx-pkg
-3. Chrome hooks exist
-4. Chromium launches at crawl level
-5. Tab creation at snapshot level
-6. Tab navigation works
-7. Tab cleanup on SIGTERM
-8. Chromium cleanup on crawl end
-
-NOTE: We use Chromium instead of Chrome because Chrome 137+ removed support for
---load-extension and --disable-extensions-except flags, which are needed for
-loading unpacked extensions in headless mode.
-"""
-
-import json
-import os
-import signal
-import subprocess
-import sys
-import time
-from pathlib import Path
-import pytest
-import tempfile
-import shutil
-import platform
-
-from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-    get_test_env,
-    find_chromium_binary,
-    install_chromium_with_hooks,
-    CHROME_PLUGIN_DIR as PLUGIN_DIR,
-    CHROME_LAUNCH_HOOK,
-    CHROME_TAB_HOOK,
-    CHROME_NAVIGATE_HOOK,
-)
-
-def _get_cookies_via_cdp(port: int, env: dict) -> list[dict]:
-    node_script = r"""
-const http = require('http');
-const WebSocket = require('ws');
-const port = process.env.CDP_PORT;
-
-function getTargets() {
-  return new Promise((resolve, reject) => {
-    const req = http.get(`http://127.0.0.1:${port}/json/list`, (res) => {
-      let data = '';
-      res.on('data', (chunk) => (data += chunk));
-      res.on('end', () => {
-        try {
-          resolve(JSON.parse(data));
-        } catch (e) {
-          reject(e);
-        }
-      });
-    });
-    req.on('error', reject);
-  });
-}
-
-(async () => {
-  const targets = await getTargets();
-  const pageTarget = targets.find(t => t.type === 'page') || targets[0];
-  if (!pageTarget) {
-    console.error('No page target found');
-    process.exit(2);
-  }
-
-  const ws = new WebSocket(pageTarget.webSocketDebuggerUrl);
-  const timer = setTimeout(() => {
-    console.error('Timeout waiting for cookies');
-    process.exit(3);
-  }, 10000);
-
-  ws.on('open', () => {
-    ws.send(JSON.stringify({ id: 1, method: 'Network.getAllCookies' }));
-  });
-
-  ws.on('message', (data) => {
-    const msg = JSON.parse(data);
-    if (msg.id === 1) {
-      clearTimeout(timer);
-      ws.close();
-      if (!msg.result || !msg.result.cookies) {
-        console.error('No cookies in response');
-        process.exit(4);
-      }
-      process.stdout.write(JSON.stringify(msg.result.cookies));
-      process.exit(0);
-    }
-  });
-
-  ws.on('error', (err) => {
-    console.error(String(err));
-    process.exit(5);
-  });
-})().catch((err) => {
-  console.error(String(err));
-  process.exit(1);
-});
-"""
-
-    result = subprocess.run(
-        ['node', '-e', node_script],
-        capture_output=True,
-        text=True,
-        timeout=30,
-        env=env | {'CDP_PORT': str(port)},
-    )
-    assert result.returncode == 0, f"Failed to read cookies via CDP: {result.stderr}\nStdout: {result.stdout}"
-    return json.loads(result.stdout or '[]')
-
-
-@pytest.fixture(scope="session", autouse=True)
-def ensure_chromium_and_puppeteer_installed(tmp_path_factory):
-    """Ensure Chromium and puppeteer are installed before running tests."""
-    if not os.environ.get('DATA_DIR'):
-        test_data_dir = tmp_path_factory.mktemp('chrome_test_data')
-        os.environ['DATA_DIR'] = str(test_data_dir)
-    env = get_test_env()
-
-    try:
-        chromium_binary = install_chromium_with_hooks(env)
-    except RuntimeError as e:
-        raise RuntimeError(str(e))
-
-    if not chromium_binary:
-        raise RuntimeError("Chromium not found after install")
-
-    os.environ['CHROME_BINARY'] = chromium_binary
-    for key in ('NODE_MODULES_DIR', 'NODE_PATH', 'PATH'):
-        if env.get(key):
-            os.environ[key] = env[key]
-
-
-def test_hook_scripts_exist():
-    """Verify chrome hooks exist."""
-    assert CHROME_LAUNCH_HOOK.exists(), f"Hook not found: {CHROME_LAUNCH_HOOK}"
-    assert CHROME_TAB_HOOK.exists(), f"Hook not found: {CHROME_TAB_HOOK}"
-    assert CHROME_NAVIGATE_HOOK.exists(), f"Hook not found: {CHROME_NAVIGATE_HOOK}"
-
-
-def test_verify_chromium_available():
-    """Verify Chromium is available via CHROME_BINARY env var."""
-    chromium_binary = os.environ.get('CHROME_BINARY') or find_chromium_binary()
-
-    assert chromium_binary, "Chromium binary should be available (set by fixture or found)"
-    assert Path(chromium_binary).exists(), f"Chromium binary should exist at {chromium_binary}"
-
-    # Verify it's actually Chromium by checking version
-    result = subprocess.run(
-        [chromium_binary, '--version'],
-        capture_output=True,
-        text=True,
-        timeout=10
-    )
-    assert result.returncode == 0, f"Failed to get Chromium version: {result.stderr}"
-    assert 'Chromium' in result.stdout or 'Chrome' in result.stdout, f"Unexpected version output: {result.stdout}"
-
-
-def test_chrome_launch_and_tab_creation():
-    """Integration test: Launch Chrome at crawl level and create tab at snapshot level."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        crawl_dir = Path(tmpdir) / 'crawl'
-        crawl_dir.mkdir()
-        chrome_dir = crawl_dir / 'chrome'
-        chrome_dir.mkdir()
-
-        # Get test environment with NODE_MODULES_DIR set
-        env = get_test_env()
-        env['CHROME_HEADLESS'] = 'true'
-
-        # Launch Chrome at crawl level (background process)
-        chrome_launch_process = subprocess.Popen(
-            ['node', str(CHROME_LAUNCH_HOOK), '--crawl-id=test-crawl-123'],
-            cwd=str(chrome_dir),
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-            env=env
-        )
-
-        # Wait for Chrome to launch (check process isn't dead and files exist)
-        for i in range(15):  # Wait up to 15 seconds for Chrome to start
-            if chrome_launch_process.poll() is not None:
-                stdout, stderr = chrome_launch_process.communicate()
-                pytest.fail(f"Chrome launch process exited early:\nStdout: {stdout}\nStderr: {stderr}")
-            if (chrome_dir / 'cdp_url.txt').exists():
-                break
-            time.sleep(1)
-
-        # Verify Chrome launch outputs - if it failed, get the error from the process
-        if not (chrome_dir / 'cdp_url.txt').exists():
-            # Try to get output from the process
-            try:
-                stdout, stderr = chrome_launch_process.communicate(timeout=1)
-            except subprocess.TimeoutExpired:
-                # Process still running, try to read available output
-                stdout = stderr = "(process still running)"
-
-            # Check what files exist
-            if chrome_dir.exists():
-                files = list(chrome_dir.iterdir())
-                # Check if Chrome process is still alive
-                if (chrome_dir / 'chrome.pid').exists():
-                    chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
-                    try:
-                        os.kill(chrome_pid, 0)
-                        chrome_alive = "yes"
-                    except OSError:
-                        chrome_alive = "no"
-                    pytest.fail(f"cdp_url.txt missing after 15s. Chrome dir files: {files}. Chrome process {chrome_pid} alive: {chrome_alive}\nLaunch stdout: {stdout}\nLaunch stderr: {stderr}")
-                else:
-                    pytest.fail(f"cdp_url.txt missing. Chrome dir exists with files: {files}\nLaunch stdout: {stdout}\nLaunch stderr: {stderr}")
-            else:
-                pytest.fail(f"Chrome dir {chrome_dir} doesn't exist\nLaunch stdout: {stdout}\nLaunch stderr: {stderr}")
-
-        assert (chrome_dir / 'cdp_url.txt').exists(), "cdp_url.txt should exist"
-        assert (chrome_dir / 'chrome.pid').exists(), "chrome.pid should exist"
-        assert (chrome_dir / 'port.txt').exists(), "port.txt should exist"
-
-        cdp_url = (chrome_dir / 'cdp_url.txt').read_text().strip()
-        chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
-
-        assert cdp_url.startswith('ws://'), f"CDP URL should be WebSocket URL: {cdp_url}"
-        assert chrome_pid > 0, "Chrome PID should be valid"
-
-        # Verify Chrome process is running
-        try:
-            os.kill(chrome_pid, 0)
-        except OSError:
-            pytest.fail(f"Chrome process {chrome_pid} is not running")
-
-        # Create snapshot directory and tab
-        snapshot_dir = Path(tmpdir) / 'snapshot1'
-        snapshot_dir.mkdir()
-        snapshot_chrome_dir = snapshot_dir / 'chrome'
-        snapshot_chrome_dir.mkdir()
-
-        # Launch tab at snapshot level
-        env['CRAWL_OUTPUT_DIR'] = str(crawl_dir)
-        result = subprocess.run(
-            ['node', str(CHROME_TAB_HOOK), '--url=https://example.com', '--snapshot-id=snap-123', '--crawl-id=test-crawl-123'],
-            cwd=str(snapshot_chrome_dir),
-            capture_output=True,
-            text=True,
-            timeout=60,
-            env=env
-        )
-
-        assert result.returncode == 0, f"Tab creation failed: {result.stderr}\nStdout: {result.stdout}"
-
-        # Verify tab creation outputs
-        assert (snapshot_chrome_dir / 'cdp_url.txt').exists(), "Snapshot cdp_url.txt should exist"
-        assert (snapshot_chrome_dir / 'target_id.txt').exists(), "target_id.txt should exist"
-        assert (snapshot_chrome_dir / 'url.txt').exists(), "url.txt should exist"
-
-        target_id = (snapshot_chrome_dir / 'target_id.txt').read_text().strip()
-        assert len(target_id) > 0, "Target ID should not be empty"
-
-        # Cleanup: Kill Chrome and launch process
-        try:
-            chrome_launch_process.send_signal(signal.SIGTERM)
-            chrome_launch_process.wait(timeout=5)
-        except:
-            pass
-        try:
-            os.kill(chrome_pid, signal.SIGKILL)
-        except OSError:
-            pass
-
-
-def test_cookies_imported_on_launch():
-    """Integration test: COOKIES_TXT_FILE is imported at crawl start."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        crawl_dir = Path(tmpdir) / 'crawl'
-        crawl_dir.mkdir()
-        chrome_dir = crawl_dir / 'chrome'
-        chrome_dir.mkdir()
-
-        cookies_file = Path(tmpdir) / 'cookies.txt'
-        cookies_file.write_text(
-            '\n'.join([
-                '# Netscape HTTP Cookie File',
-                '# https://curl.se/docs/http-cookies.html',
-                '# This file was generated by a test',
-                '',
-                'example.com\tTRUE\t/\tFALSE\t2147483647\tabx_test_cookie\thello',
-                '',
-            ])
-        )
-
-        profile_dir = Path(tmpdir) / 'profile'
-        env = get_test_env()
-        env.update({
-            'CHROME_HEADLESS': 'true',
-            'CHROME_USER_DATA_DIR': str(profile_dir),
-            'COOKIES_TXT_FILE': str(cookies_file),
-        })
-
-        chrome_launch_process = subprocess.Popen(
-            ['node', str(CHROME_LAUNCH_HOOK), '--crawl-id=test-crawl-cookies'],
-            cwd=str(chrome_dir),
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-            env=env
-        )
-
-        for _ in range(15):
-            if (chrome_dir / 'port.txt').exists():
-                break
-            time.sleep(1)
-
-        assert (chrome_dir / 'port.txt').exists(), "port.txt should exist"
-        chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
-        port = int((chrome_dir / 'port.txt').read_text().strip())
-
-        cookie_found = False
-        for _ in range(15):
-            cookies = _get_cookies_via_cdp(port, env)
-            cookie_found = any(
-                c.get('name') == 'abx_test_cookie' and c.get('value') == 'hello'
-                for c in cookies
-            )
-            if cookie_found:
-                break
-            time.sleep(1)
-
-        assert cookie_found, "Imported cookie should be present in Chrome session"
-
-        # Cleanup
-        try:
-            chrome_launch_process.send_signal(signal.SIGTERM)
-            chrome_launch_process.wait(timeout=5)
-        except:
-            pass
-        try:
-            os.kill(chrome_pid, signal.SIGKILL)
-        except OSError:
-            pass
-
-
-def test_chrome_navigation():
-    """Integration test: Navigate to a URL."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        crawl_dir = Path(tmpdir) / 'crawl'
-        crawl_dir.mkdir()
-        chrome_dir = crawl_dir / 'chrome'
-        chrome_dir.mkdir()
-
-        # Launch Chrome (background process)
-        chrome_launch_process = subprocess.Popen(
-            ['node', str(CHROME_LAUNCH_HOOK), '--crawl-id=test-crawl-nav'],
-            cwd=str(chrome_dir),
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-            env=get_test_env() | {'CHROME_HEADLESS': 'true'}
-        )
-
-        # Wait for Chrome to launch
-        time.sleep(3)
-
-        chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
-
-        # Create snapshot and tab
-        snapshot_dir = Path(tmpdir) / 'snapshot1'
-        snapshot_dir.mkdir()
-        snapshot_chrome_dir = snapshot_dir / 'chrome'
-        snapshot_chrome_dir.mkdir()
-
-        result = subprocess.run(
-            ['node', str(CHROME_TAB_HOOK), '--url=https://example.com', '--snapshot-id=snap-nav-123', '--crawl-id=test-crawl-nav'],
-            cwd=str(snapshot_chrome_dir),
-            capture_output=True,
-            text=True,
-            timeout=60,
-            env=get_test_env() | {'CRAWL_OUTPUT_DIR': str(crawl_dir), 'CHROME_HEADLESS': 'true'}
-        )
-        assert result.returncode == 0, f"Tab creation failed: {result.stderr}"
-
-        # Navigate to URL
-        result = subprocess.run(
-            ['node', str(CHROME_NAVIGATE_HOOK), '--url=https://example.com', '--snapshot-id=snap-nav-123'],
-            cwd=str(snapshot_chrome_dir),
-            capture_output=True,
-            text=True,
-            timeout=120,
-            env=get_test_env() | {'CHROME_PAGELOAD_TIMEOUT': '30', 'CHROME_WAIT_FOR': 'load'}
-        )
-
-        assert result.returncode == 0, f"Navigation failed: {result.stderr}\nStdout: {result.stdout}"
-
-        # Verify navigation outputs
-        assert (snapshot_chrome_dir / 'navigation.json').exists(), "navigation.json should exist"
-        assert (snapshot_chrome_dir / 'page_loaded.txt').exists(), "page_loaded.txt should exist"
-
-        nav_data = json.loads((snapshot_chrome_dir / 'navigation.json').read_text())
-        assert nav_data.get('status') in [200, 301, 302], f"Should get valid HTTP status: {nav_data}"
-        assert nav_data.get('finalUrl'), "Should have final URL"
-
-        # Cleanup
-        try:
-            chrome_launch_process.send_signal(signal.SIGTERM)
-            chrome_launch_process.wait(timeout=5)
-        except:
-            pass
-        try:
-            os.kill(chrome_pid, signal.SIGKILL)
-        except OSError:
-            pass
-
-
-def test_tab_cleanup_on_sigterm():
-    """Integration test: Tab cleanup when receiving SIGTERM."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        crawl_dir = Path(tmpdir) / 'crawl'
-        crawl_dir.mkdir()
-        chrome_dir = crawl_dir / 'chrome'
-        chrome_dir.mkdir()
-
-        # Launch Chrome (background process)
-        chrome_launch_process = subprocess.Popen(
-            ['node', str(CHROME_LAUNCH_HOOK), '--crawl-id=test-cleanup'],
-            cwd=str(chrome_dir),
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-            env=get_test_env() | {'CHROME_HEADLESS': 'true'}
-        )
-
-        # Wait for Chrome to launch
-        time.sleep(3)
-
-        chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
-
-        # Create snapshot and tab - run in background
-        snapshot_dir = Path(tmpdir) / 'snapshot1'
-        snapshot_dir.mkdir()
-        snapshot_chrome_dir = snapshot_dir / 'chrome'
-        snapshot_chrome_dir.mkdir()
-
-        tab_process = subprocess.Popen(
-            ['node', str(CHROME_TAB_HOOK), '--url=https://example.com', '--snapshot-id=snap-cleanup', '--crawl-id=test-cleanup'],
-            cwd=str(snapshot_chrome_dir),
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-            env=get_test_env() | {'CRAWL_OUTPUT_DIR': str(crawl_dir), 'CHROME_HEADLESS': 'true'}
-        )
-
-        # Wait for tab to be created
-        time.sleep(3)
-
-        # Send SIGTERM to tab process
-        tab_process.send_signal(signal.SIGTERM)
-        stdout, stderr = tab_process.communicate(timeout=10)
-
-        assert tab_process.returncode == 0, f"Tab process should exit cleanly: {stderr}"
-
-        # Chrome should still be running
-        try:
-            os.kill(chrome_pid, 0)
-        except OSError:
-            pytest.fail("Chrome should still be running after tab cleanup")
-
-        # Cleanup
-        try:
-            chrome_launch_process.send_signal(signal.SIGTERM)
-            chrome_launch_process.wait(timeout=5)
-        except:
-            pass
-        try:
-            os.kill(chrome_pid, signal.SIGKILL)
-        except OSError:
-            pass
-
-
-def test_multiple_snapshots_share_chrome():
-    """Integration test: Multiple snapshots share one Chrome instance."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        crawl_dir = Path(tmpdir) / 'crawl'
-        crawl_dir.mkdir()
-        chrome_dir = crawl_dir / 'chrome'
-        chrome_dir.mkdir()
-
-        # Launch Chrome at crawl level
-        chrome_launch_process = subprocess.Popen(
-            ['node', str(CHROME_LAUNCH_HOOK), '--crawl-id=test-multi-crawl'],
-            cwd=str(chrome_dir),
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-            env=get_test_env() | {'CHROME_HEADLESS': 'true'}
-        )
-
-        # Wait for Chrome to launch
-        for i in range(15):
-            if (chrome_dir / 'cdp_url.txt').exists():
-                break
-            time.sleep(1)
-
-        chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
-        crawl_cdp_url = (chrome_dir / 'cdp_url.txt').read_text().strip()
-
-        # Create multiple snapshots that share this Chrome
-        snapshot_dirs = []
-        target_ids = []
-
-        for snap_num in range(3):
-            snapshot_dir = Path(tmpdir) / f'snapshot{snap_num}'
-            snapshot_dir.mkdir()
-            snapshot_chrome_dir = snapshot_dir / 'chrome'
-            snapshot_chrome_dir.mkdir()
-            snapshot_dirs.append(snapshot_chrome_dir)
-
-            # Create tab for this snapshot
-            result = subprocess.run(
-                ['node', str(CHROME_TAB_HOOK), f'--url=https://example.com/{snap_num}', f'--snapshot-id=snap-{snap_num}', '--crawl-id=test-multi-crawl'],
-                cwd=str(snapshot_chrome_dir),
-                capture_output=True,
-                text=True,
-                timeout=60,
-                env=get_test_env() | {'CRAWL_OUTPUT_DIR': str(crawl_dir), 'CHROME_HEADLESS': 'true'}
-            )
-
-            assert result.returncode == 0, f"Tab {snap_num} creation failed: {result.stderr}"
-
-            # Verify each snapshot has its own target_id but same Chrome PID
-            assert (snapshot_chrome_dir / 'target_id.txt').exists()
-            assert (snapshot_chrome_dir / 'cdp_url.txt').exists()
-            assert (snapshot_chrome_dir / 'chrome.pid').exists()
-
-            target_id = (snapshot_chrome_dir / 'target_id.txt').read_text().strip()
-            snapshot_cdp_url = (snapshot_chrome_dir / 'cdp_url.txt').read_text().strip()
-            snapshot_pid = int((snapshot_chrome_dir / 'chrome.pid').read_text().strip())
-
-            target_ids.append(target_id)
-
-            # All snapshots should share same Chrome
-            assert snapshot_pid == chrome_pid, f"Snapshot {snap_num} should use crawl Chrome PID"
-            assert snapshot_cdp_url == crawl_cdp_url, f"Snapshot {snap_num} should use crawl CDP URL"
-
-        # All target IDs should be unique (different tabs)
-        assert len(set(target_ids)) == 3, f"All snapshots should have unique tabs: {target_ids}"
-
-        # Chrome should still be running with all 3 tabs
-        try:
-            os.kill(chrome_pid, 0)
-        except OSError:
-            pytest.fail("Chrome should still be running after creating 3 tabs")
-
-        # Cleanup
-        try:
-            chrome_launch_process.send_signal(signal.SIGTERM)
-            chrome_launch_process.wait(timeout=5)
-        except:
-            pass
-        try:
-            os.kill(chrome_pid, signal.SIGKILL)
-        except OSError:
-            pass
-
-
-def test_chrome_cleanup_on_crawl_end():
-    """Integration test: Chrome cleanup at end of crawl."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        crawl_dir = Path(tmpdir) / 'crawl'
-        crawl_dir.mkdir()
-        chrome_dir = crawl_dir / 'chrome'
-        chrome_dir.mkdir()
-
-        # Launch Chrome in background
-        chrome_launch_process = subprocess.Popen(
-            ['node', str(CHROME_LAUNCH_HOOK), '--crawl-id=test-crawl-end'],
-            cwd=str(chrome_dir),
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-            env=get_test_env() | {'CHROME_HEADLESS': 'true'}
-        )
-
-        # Wait for Chrome to launch
-        time.sleep(3)
-
-        # Verify Chrome is running
-        assert (chrome_dir / 'chrome.pid').exists(), "Chrome PID file should exist"
-        chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
-
-        try:
-            os.kill(chrome_pid, 0)
-        except OSError:
-            pytest.fail("Chrome should be running")
-
-        # Send SIGTERM to chrome launch process
-        chrome_launch_process.send_signal(signal.SIGTERM)
-        stdout, stderr = chrome_launch_process.communicate(timeout=10)
-
-        # Wait for cleanup
-        time.sleep(3)
-
-        # Verify Chrome process is killed
-        try:
-            os.kill(chrome_pid, 0)
-            pytest.fail("Chrome should be killed after SIGTERM")
-        except OSError:
-            # Expected - Chrome should be dead
-            pass
-
-
-def test_zombie_prevention_hook_killed():
-    """Integration test: Chrome is killed even if hook process is SIGKILL'd."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        crawl_dir = Path(tmpdir) / 'crawl'
-        crawl_dir.mkdir()
-        chrome_dir = crawl_dir / 'chrome'
-        chrome_dir.mkdir()
-
-        # Launch Chrome
-        chrome_launch_process = subprocess.Popen(
-            ['node', str(CHROME_LAUNCH_HOOK), '--crawl-id=test-zombie'],
-            cwd=str(chrome_dir),
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-            env=get_test_env() | {'CHROME_HEADLESS': 'true'}
-        )
-
-        # Wait for Chrome to launch
-        for i in range(15):
-            if (chrome_dir / 'chrome.pid').exists():
-                break
-            time.sleep(1)
-
-        assert (chrome_dir / 'chrome.pid').exists(), "Chrome PID file should exist"
-
-        chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
-        hook_pid = chrome_launch_process.pid  # Use the Popen process PID instead of hook.pid file
-
-        # Verify both Chrome and hook are running
-        try:
-            os.kill(chrome_pid, 0)
-            os.kill(hook_pid, 0)
-        except OSError:
-            pytest.fail("Both Chrome and hook should be running")
-
-        # Simulate hook getting SIGKILL'd (can't cleanup)
-        os.kill(hook_pid, signal.SIGKILL)
-        time.sleep(1)
-
-        # Chrome should still be running (orphaned)
-        try:
-            os.kill(chrome_pid, 0)
-        except OSError:
-            pytest.fail("Chrome should still be running after hook SIGKILL")
-
-        # Simulate Crawl.cleanup() using the actual cleanup logic
-        def is_process_alive(pid):
-            """Check if a process exists."""
-            try:
-                os.kill(pid, 0)
-                return True
-            except (OSError, ProcessLookupError):
-                return False
-
-        for pid_file in chrome_dir.glob('**/*.pid'):
-            try:
-                pid = int(pid_file.read_text().strip())
-
-                # Step 1: SIGTERM for graceful shutdown
-                try:
-                    try:
-                        os.killpg(pid, signal.SIGTERM)
-                    except (OSError, ProcessLookupError):
-                        os.kill(pid, signal.SIGTERM)
-                except ProcessLookupError:
-                    pid_file.unlink(missing_ok=True)
-                    continue
-
-                # Step 2: Wait for graceful shutdown
-                time.sleep(2)
-
-                # Step 3: Check if still alive
-                if not is_process_alive(pid):
-                    pid_file.unlink(missing_ok=True)
-                    continue
-
-                # Step 4: Force kill ENTIRE process group with SIGKILL
-                try:
-                    try:
-                        # Always kill entire process group with SIGKILL
-                        os.killpg(pid, signal.SIGKILL)
-                    except (OSError, ProcessLookupError):
-                        os.kill(pid, signal.SIGKILL)
-                except ProcessLookupError:
-                    pid_file.unlink(missing_ok=True)
-                    continue
-
-                # Step 5: Wait and verify death
-                time.sleep(1)
-
-                if not is_process_alive(pid):
-                    pid_file.unlink(missing_ok=True)
-
-            except (ValueError, OSError):
-                pass
-
-        # Chrome should now be dead
-        try:
-            os.kill(chrome_pid, 0)
-            pytest.fail("Chrome should be killed after cleanup")
-        except OSError:
-            # Expected - Chrome is dead
-            pass
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/chrome/tests/test_chrome_test_helpers.py b/archivebox/plugins/chrome/tests/test_chrome_test_helpers.py
deleted file mode 100644
index 703ea037..00000000
--- a/archivebox/plugins/chrome/tests/test_chrome_test_helpers.py
+++ /dev/null
@@ -1,260 +0,0 @@
-"""
-Tests for chrome_test_helpers.py functions.
-
-These tests verify the Python helper functions used across Chrome plugin tests.
-"""
-
-import os
-import pytest
-import tempfile
-from pathlib import Path
-
-from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-    get_test_env,
-    get_machine_type,
-    get_lib_dir,
-    get_node_modules_dir,
-    get_extensions_dir,
-    find_chromium_binary,
-    get_plugin_dir,
-    get_hook_script,
-    parse_jsonl_output,
-)
-
-
-def test_get_machine_type():
-    """Test get_machine_type() returns valid format."""
-    machine_type = get_machine_type()
-    assert isinstance(machine_type, str)
-    assert '-' in machine_type, "Machine type should be in format: arch-os"
-    # Should be one of the expected formats
-    assert any(x in machine_type for x in ['arm64', 'x86_64']), "Should contain valid architecture"
-    assert any(x in machine_type for x in ['darwin', 'linux', 'win32']), "Should contain valid OS"
-
-
-def test_get_lib_dir_with_env_var():
-    """Test get_lib_dir() respects LIB_DIR env var."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        custom_lib = Path(tmpdir) / 'custom_lib'
-        custom_lib.mkdir()
-
-        old_lib_dir = os.environ.get('LIB_DIR')
-        try:
-            os.environ['LIB_DIR'] = str(custom_lib)
-            lib_dir = get_lib_dir()
-            assert lib_dir == custom_lib
-        finally:
-            if old_lib_dir:
-                os.environ['LIB_DIR'] = old_lib_dir
-            else:
-                os.environ.pop('LIB_DIR', None)
-
-
-def test_get_node_modules_dir_with_env_var():
-    """Test get_node_modules_dir() respects NODE_MODULES_DIR env var."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        custom_nm = Path(tmpdir) / 'node_modules'
-        custom_nm.mkdir()
-
-        old_nm_dir = os.environ.get('NODE_MODULES_DIR')
-        try:
-            os.environ['NODE_MODULES_DIR'] = str(custom_nm)
-            nm_dir = get_node_modules_dir()
-            assert nm_dir == custom_nm
-        finally:
-            if old_nm_dir:
-                os.environ['NODE_MODULES_DIR'] = old_nm_dir
-            else:
-                os.environ.pop('NODE_MODULES_DIR', None)
-
-
-def test_get_extensions_dir_default():
-    """Test get_extensions_dir() returns expected path format."""
-    ext_dir = get_extensions_dir()
-    assert isinstance(ext_dir, str)
-    assert 'personas' in ext_dir
-    assert 'chrome_extensions' in ext_dir
-
-
-def test_get_extensions_dir_with_custom_persona():
-    """Test get_extensions_dir() respects ACTIVE_PERSONA env var."""
-    old_persona = os.environ.get('ACTIVE_PERSONA')
-    old_data_dir = os.environ.get('DATA_DIR')
-    try:
-        os.environ['ACTIVE_PERSONA'] = 'TestPersona'
-        os.environ['DATA_DIR'] = '/tmp/test'
-        ext_dir = get_extensions_dir()
-        assert 'TestPersona' in ext_dir
-        assert '/tmp/test' in ext_dir
-    finally:
-        if old_persona:
-            os.environ['ACTIVE_PERSONA'] = old_persona
-        else:
-            os.environ.pop('ACTIVE_PERSONA', None)
-        if old_data_dir:
-            os.environ['DATA_DIR'] = old_data_dir
-        else:
-            os.environ.pop('DATA_DIR', None)
-
-
-def test_get_test_env_returns_dict():
-    """Test get_test_env() returns properly formatted environment dict."""
-    env = get_test_env()
-    assert isinstance(env, dict)
-
-    # Should include key paths
-    assert 'MACHINE_TYPE' in env
-    assert 'LIB_DIR' in env
-    assert 'NODE_MODULES_DIR' in env
-    assert 'NODE_PATH' in env  # Critical for module resolution
-    assert 'NPM_BIN_DIR' in env
-    assert 'CHROME_EXTENSIONS_DIR' in env
-
-    # Verify NODE_PATH equals NODE_MODULES_DIR (for Node.js module resolution)
-    assert env['NODE_PATH'] == env['NODE_MODULES_DIR']
-
-
-def test_get_test_env_paths_are_absolute():
-    """Test that get_test_env() returns absolute paths."""
-    env = get_test_env()
-
-    # All path-like values should be absolute
-    assert Path(env['LIB_DIR']).is_absolute()
-    assert Path(env['NODE_MODULES_DIR']).is_absolute()
-    assert Path(env['NODE_PATH']).is_absolute()
-
-
-def test_find_chromium_binary():
-    """Test find_chromium_binary() returns a path or None."""
-    binary = find_chromium_binary()
-    if binary:
-        assert isinstance(binary, str)
-        # Should be an absolute path if found
-        assert os.path.isabs(binary)
-
-
-def test_get_plugin_dir():
-    """Test get_plugin_dir() finds correct plugin directory."""
-    # Use this test file's path
-    test_file = __file__
-    plugin_dir = get_plugin_dir(test_file)
-
-    assert plugin_dir.exists()
-    assert plugin_dir.is_dir()
-    # Should be the chrome plugin directory
-    assert plugin_dir.name == 'chrome'
-    assert (plugin_dir.parent.name == 'plugins')
-
-
-def test_get_hook_script_finds_existing_hook():
-    """Test get_hook_script() can find an existing hook."""
-    from archivebox.plugins.chrome.tests.chrome_test_helpers import CHROME_PLUGIN_DIR
-
-    # Try to find the chrome launch hook
-    hook = get_hook_script(CHROME_PLUGIN_DIR, 'on_Crawl__*_chrome_launch.*')
-
-    if hook:  # May not exist in all test environments
-        assert hook.exists()
-        assert hook.is_file()
-        assert 'chrome_launch' in hook.name
-
-
-def test_get_hook_script_returns_none_for_missing():
-    """Test get_hook_script() returns None for non-existent hooks."""
-    from archivebox.plugins.chrome.tests.chrome_test_helpers import CHROME_PLUGIN_DIR
-
-    hook = get_hook_script(CHROME_PLUGIN_DIR, 'nonexistent_hook_*_pattern.*')
-    assert hook is None
-
-
-def test_parse_jsonl_output_valid():
-    """Test parse_jsonl_output() parses valid JSONL."""
-    jsonl_output = '''{"type": "ArchiveResult", "status": "succeeded", "output": "test1"}
-{"type": "ArchiveResult", "status": "failed", "error": "test2"}
-'''
-
-    # Returns first match only
-    result = parse_jsonl_output(jsonl_output)
-    assert result is not None
-    assert result['type'] == 'ArchiveResult'
-    assert result['status'] == 'succeeded'
-    assert result['output'] == 'test1'
-
-
-def test_parse_jsonl_output_with_non_json_lines():
-    """Test parse_jsonl_output() skips non-JSON lines."""
-    mixed_output = '''Some non-JSON output
-{"type": "ArchiveResult", "status": "succeeded"}
-More non-JSON
-{"type": "ArchiveResult", "status": "failed"}
-'''
-
-    result = parse_jsonl_output(mixed_output)
-    assert result is not None
-    assert result['type'] == 'ArchiveResult'
-    assert result['status'] == 'succeeded'
-
-
-def test_parse_jsonl_output_empty():
-    """Test parse_jsonl_output() handles empty input."""
-    result = parse_jsonl_output('')
-    assert result is None
-
-
-def test_parse_jsonl_output_filters_by_type():
-    """Test parse_jsonl_output() can filter by record type."""
-    jsonl_output = '''{"type": "LogEntry", "data": "log1"}
-{"type": "ArchiveResult", "data": "result1"}
-{"type": "ArchiveResult", "data": "result2"}
-'''
-
-    # Should return first ArchiveResult, not LogEntry
-    result = parse_jsonl_output(jsonl_output, record_type='ArchiveResult')
-    assert result is not None
-    assert result['type'] == 'ArchiveResult'
-    assert result['data'] == 'result1'  # First ArchiveResult
-
-
-def test_parse_jsonl_output_filters_custom_type():
-    """Test parse_jsonl_output() can filter by custom record type."""
-    jsonl_output = '''{"type": "ArchiveResult", "data": "result1"}
-{"type": "LogEntry", "data": "log1"}
-{"type": "ArchiveResult", "data": "result2"}
-'''
-
-    result = parse_jsonl_output(jsonl_output, record_type='LogEntry')
-    assert result is not None
-    assert result['type'] == 'LogEntry'
-    assert result['data'] == 'log1'
-
-
-def test_machine_type_consistency():
-    """Test that machine type is consistent across calls."""
-    mt1 = get_machine_type()
-    mt2 = get_machine_type()
-    assert mt1 == mt2, "Machine type should be stable across calls"
-
-
-def test_lib_dir_is_directory():
-    """Test that lib_dir points to an actual directory when DATA_DIR is set."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        old_data_dir = os.environ.get('DATA_DIR')
-        try:
-            os.environ['DATA_DIR'] = tmpdir
-            # Create the expected directory structure
-            machine_type = get_machine_type()
-            lib_dir = Path(tmpdir) / 'lib' / machine_type
-            lib_dir.mkdir(parents=True, exist_ok=True)
-
-            result = get_lib_dir()
-            # Should return a Path object
-            assert isinstance(result, Path)
-        finally:
-            if old_data_dir:
-                os.environ['DATA_DIR'] = old_data_dir
-            else:
-                os.environ.pop('DATA_DIR', None)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/consolelog/config.json b/archivebox/plugins/consolelog/config.json
deleted file mode 100644
index f03ae547..00000000
--- a/archivebox/plugins/consolelog/config.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "required_plugins": ["chrome"],
-  "properties": {
-    "CONSOLELOG_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_CONSOLELOG", "USE_CONSOLELOG"],
-      "description": "Enable console log capture"
-    },
-    "CONSOLELOG_TIMEOUT": {
-      "type": "integer",
-      "default": 30,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for console log capture in seconds"
-    }
-  }
-}
diff --git a/archivebox/plugins/consolelog/on_Snapshot__21_consolelog.bg.js b/archivebox/plugins/consolelog/on_Snapshot__21_consolelog.bg.js
deleted file mode 100755
index 92351c05..00000000
--- a/archivebox/plugins/consolelog/on_Snapshot__21_consolelog.bg.js
+++ /dev/null
@@ -1,201 +0,0 @@
-#!/usr/bin/env node
-/**
- * Capture console output from a page.
- *
- * This hook sets up CDP listeners BEFORE chrome_navigate loads the page,
- * then waits for navigation to complete. The listeners stay active through
- * navigation and capture all console output.
- *
- * Usage: on_Snapshot__21_consolelog.js --url=<url> --snapshot-id=<uuid>
- * Output: Writes console.jsonl
- */
-
-const fs = require('fs');
-const path = require('path');
-
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-
-const puppeteer = require('puppeteer-core');
-
-// Import shared utilities from chrome_utils.js
-const {
-    getEnvBool,
-    getEnvInt,
-    parseArgs,
-    connectToPage,
-    waitForPageLoaded,
-} = require('../chrome/chrome_utils.js');
-
-const PLUGIN_NAME = 'consolelog';
-const OUTPUT_DIR = '.';
-const OUTPUT_FILE = 'console.jsonl';
-const CHROME_SESSION_DIR = '../chrome';
-
-let browser = null;
-let page = null;
-let logCount = 0;
-let errorCount = 0;
-let requestFailCount = 0;
-let shuttingDown = false;
-
-async function serializeArgs(args) {
-    const serialized = [];
-    for (const arg of args) {
-        try {
-            const json = await arg.jsonValue();
-            serialized.push(json);
-        } catch (e) {
-            try {
-                serialized.push(String(arg));
-            } catch (e2) {
-                serialized.push('[Unserializable]');
-            }
-        }
-    }
-    return serialized;
-}
-
-async function setupListeners() {
-    const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
-    const timeout = getEnvInt('CONSOLELOG_TIMEOUT', 30) * 1000;
-
-    fs.writeFileSync(outputPath, ''); // Clear existing
-
-    // Connect to Chrome page using shared utility
-    const { browser, page } = await connectToPage({
-        chromeSessionDir: CHROME_SESSION_DIR,
-        timeoutMs: timeout,
-        puppeteer,
-    });
-
-    // Set up listeners that write directly to file
-    page.on('console', async (msg) => {
-        try {
-            const logEntry = {
-                timestamp: new Date().toISOString(),
-                type: msg.type(),
-                text: msg.text(),
-                args: await serializeArgs(msg.args()),
-                location: msg.location(),
-            };
-            fs.appendFileSync(outputPath, JSON.stringify(logEntry) + '\n');
-            logCount += 1;
-        } catch (e) {
-            // Ignore errors
-        }
-    });
-
-    page.on('pageerror', (error) => {
-        try {
-            const logEntry = {
-                timestamp: new Date().toISOString(),
-                type: 'error',
-                text: error.message,
-                stack: error.stack || '',
-            };
-            fs.appendFileSync(outputPath, JSON.stringify(logEntry) + '\n');
-            errorCount += 1;
-        } catch (e) {
-            // Ignore
-        }
-    });
-
-    page.on('requestfailed', (request) => {
-        try {
-            const failure = request.failure();
-            const logEntry = {
-                timestamp: new Date().toISOString(),
-                type: 'request_failed',
-                text: `Request failed: ${request.url()}`,
-                error: failure ? failure.errorText : 'Unknown error',
-                url: request.url(),
-            };
-            fs.appendFileSync(outputPath, JSON.stringify(logEntry) + '\n');
-            requestFailCount += 1;
-        } catch (e) {
-            // Ignore
-        }
-    });
-
-    return { browser, page };
-}
-
-function emitResult(status = 'succeeded') {
-    if (shuttingDown) return;
-    shuttingDown = true;
-
-    const counts = `${logCount} console, ${errorCount} errors, ${requestFailCount} failed requests`;
-    console.log(JSON.stringify({
-        type: 'ArchiveResult',
-        status,
-        output_str: `${OUTPUT_FILE} (${counts})`,
-    }));
-}
-
-async function handleShutdown(signal) {
-    console.error(`\nReceived ${signal}, emitting final results...`);
-    emitResult('succeeded');
-    if (browser) {
-        try {
-            browser.disconnect();
-        } catch (e) {}
-    }
-    process.exit(0);
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-    const snapshotId = args.snapshot_id;
-
-    if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__21_consolelog.js --url=<url> --snapshot-id=<uuid>');
-        process.exit(1);
-    }
-
-    if (!getEnvBool('CONSOLELOG_ENABLED', true)) {
-        console.error('Skipping (CONSOLELOG_ENABLED=False)');
-        console.log(JSON.stringify({type: 'ArchiveResult', status: 'skipped', output_str: 'CONSOLELOG_ENABLED=False'}));
-        process.exit(0);
-    }
-
-    try {
-        // Set up listeners BEFORE navigation
-        const connection = await setupListeners();
-        browser = connection.browser;
-        page = connection.page;
-
-        // Register signal handlers for graceful shutdown
-        process.on('SIGTERM', () => handleShutdown('SIGTERM'));
-        process.on('SIGINT', () => handleShutdown('SIGINT'));
-
-        // Wait for chrome_navigate to complete (non-fatal)
-        try {
-            const timeout = getEnvInt('CONSOLELOG_TIMEOUT', 30) * 1000;
-            await waitForPageLoaded(CHROME_SESSION_DIR, timeout * 4, 500);
-        } catch (e) {
-            console.error(`WARN: ${e.message}`);
-        }
-
-        // console.error('Consolelog active, waiting for cleanup signal...');
-        await new Promise(() => {}); // Keep alive until SIGTERM
-        return;
-
-    } catch (e) {
-        const error = `${e.name}: ${e.message}`;
-        console.error(`ERROR: ${error}`);
-
-        console.log(JSON.stringify({
-            type: 'ArchiveResult',
-            status: 'failed',
-            output_str: error,
-        }));
-        process.exit(1);
-    }
-}
-
-main().catch(e => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
diff --git a/archivebox/plugins/consolelog/templates/icon.html b/archivebox/plugins/consolelog/templates/icon.html
deleted file mode 100644
index c68b8db5..00000000
--- a/archivebox/plugins/consolelog/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--consolelog" title="Console Log"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><rect x="3" y="4.5" width="18" height="15" rx="2"/><path d="M7 12l2 2-2 2"/><path d="M11 16h6"/></svg></span>
diff --git a/archivebox/plugins/consolelog/tests/test_consolelog.py b/archivebox/plugins/consolelog/tests/test_consolelog.py
deleted file mode 100644
index ab851d15..00000000
--- a/archivebox/plugins/consolelog/tests/test_consolelog.py
+++ /dev/null
@@ -1,127 +0,0 @@
-"""
-Tests for the consolelog plugin.
-
-Tests the real consolelog hook with an actual URL to verify
-console output capture.
-"""
-
-import json
-import shutil
-import subprocess
-import sys
-import tempfile
-import time
-from pathlib import Path
-
-from django.test import TestCase
-
-# Import chrome test helpers
-sys.path.insert(0, str(Path(__file__).parent.parent.parent / 'chrome' / 'tests'))
-from chrome_test_helpers import (
-    chrome_session,
-    CHROME_NAVIGATE_HOOK,
-    get_plugin_dir,
-    get_hook_script,
-)
-
-
-# Get the path to the consolelog hook
-PLUGIN_DIR = get_plugin_dir(__file__)
-CONSOLELOG_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_consolelog.*')
-
-
-class TestConsolelogPlugin(TestCase):
-    """Test the consolelog plugin."""
-
-    def test_consolelog_hook_exists(self):
-        """Consolelog hook script should exist."""
-        self.assertIsNotNone(CONSOLELOG_HOOK, "Consolelog hook not found in plugin directory")
-        self.assertTrue(CONSOLELOG_HOOK.exists(), f"Hook not found: {CONSOLELOG_HOOK}")
-
-
-class TestConsolelogWithChrome(TestCase):
-    """Integration tests for consolelog plugin with Chrome."""
-
-    def setUp(self):
-        """Set up test environment."""
-        self.temp_dir = Path(tempfile.mkdtemp())
-
-    def tearDown(self):
-        """Clean up."""
-        shutil.rmtree(self.temp_dir, ignore_errors=True)
-
-    def test_consolelog_captures_output(self):
-        """Consolelog hook should capture console output from page."""
-        test_url = 'data:text/html,<script>console.log("archivebox-console-test")</script>'
-        snapshot_id = 'test-consolelog-snapshot'
-
-        with chrome_session(
-            self.temp_dir,
-            crawl_id='test-consolelog-crawl',
-            snapshot_id=snapshot_id,
-            test_url=test_url,
-            navigate=False,
-            timeout=30,
-        ) as (chrome_process, chrome_pid, snapshot_chrome_dir, env):
-            console_dir = snapshot_chrome_dir.parent / 'consolelog'
-            console_dir.mkdir(exist_ok=True)
-
-            # Run consolelog hook with the active Chrome session (background hook)
-            result = subprocess.Popen(
-                ['node', str(CONSOLELOG_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                cwd=str(console_dir),
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE,
-                text=True,
-                env=env
-            )
-
-            nav_result = subprocess.run(
-                ['node', str(CHROME_NAVIGATE_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                cwd=str(snapshot_chrome_dir),
-                capture_output=True,
-                text=True,
-                timeout=120,
-                env=env
-            )
-            self.assertEqual(nav_result.returncode, 0, f"Navigation failed: {nav_result.stderr}")
-
-            # Check for output file
-            console_output = console_dir / 'console.jsonl'
-
-            # Allow it to run briefly, then terminate (background hook)
-            for _ in range(10):
-                if console_output.exists() and console_output.stat().st_size > 0:
-                    break
-                time.sleep(1)
-            if result.poll() is None:
-                result.terminate()
-                try:
-                    stdout, stderr = result.communicate(timeout=5)
-                except subprocess.TimeoutExpired:
-                    result.kill()
-                    stdout, stderr = result.communicate()
-            else:
-                stdout, stderr = result.communicate()
-
-            # At minimum, verify no crash
-            self.assertNotIn('Traceback', stderr)
-
-            # If output file exists, verify it's valid JSONL and has output
-            if console_output.exists():
-                with open(console_output) as f:
-                    content = f.read().strip()
-                    self.assertTrue(content, "Console output should not be empty")
-                    for line in content.split('\n'):
-                        if line.strip():
-                            try:
-                                record = json.loads(line)
-                                # Verify structure
-                                self.assertIn('timestamp', record)
-                                self.assertIn('type', record)
-                            except json.JSONDecodeError:
-                                pass  # Some lines may be incomplete
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/custom/on_Binary__14_custom_install.py b/archivebox/plugins/custom/on_Binary__14_custom_install.py
deleted file mode 100644
index 47eea07f..00000000
--- a/archivebox/plugins/custom/on_Binary__14_custom_install.py
+++ /dev/null
@@ -1,98 +0,0 @@
-#!/usr/bin/env python3
-"""
-Install a binary using a custom bash command.
-
-This provider runs arbitrary shell commands to install binaries
-that don't fit into standard package managers.
-
-Usage: on_Binary__install_using_custom_bash.py --binary-id=<uuid> --machine-id=<uuid> --name=<name> --custom-cmd=<cmd>
-Output: Binary JSONL record to stdout after installation
-
-Environment variables:
-    MACHINE_ID: Machine UUID (set by orchestrator)
-"""
-
-import json
-import os
-import subprocess
-import sys
-
-import rich_click as click
-from abx_pkg import Binary, EnvProvider
-
-
-@click.command()
-@click.option('--binary-id', required=True, help="Binary UUID")
-@click.option('--machine-id', required=True, help="Machine UUID")
-@click.option('--name', required=True, help="Binary name to install")
-@click.option('--binproviders', default='*', help="Allowed providers (comma-separated)")
-@click.option('--custom-cmd', required=True, help="Custom bash command to run")
-def main(binary_id: str, machine_id: str, name: str, binproviders: str, custom_cmd: str):
-    """Install binary using custom bash command."""
-
-    if binproviders != '*' and 'custom' not in binproviders.split(','):
-        click.echo(f"custom provider not allowed for {name}", err=True)
-        sys.exit(0)
-
-    if not custom_cmd:
-        click.echo("custom provider requires --custom-cmd", err=True)
-        sys.exit(1)
-
-    click.echo(f"Installing {name} via custom command: {custom_cmd}", err=True)
-
-    try:
-        result = subprocess.run(
-            custom_cmd,
-            shell=True,
-            timeout=600,  # 10 minute timeout for custom installs
-        )
-        if result.returncode != 0:
-            click.echo(f"Custom install failed (exit={result.returncode})", err=True)
-            sys.exit(1)
-    except subprocess.TimeoutExpired:
-        click.echo("Custom install timed out", err=True)
-        sys.exit(1)
-
-    # Use abx-pkg to load the binary and get its info
-    provider = EnvProvider()
-    try:
-        binary = Binary(name=name, binproviders=[provider]).load()
-    except Exception:
-        try:
-            binary = Binary(
-                name=name,
-                binproviders=[provider],
-                overrides={'env': {'version': '0.0.1'}},
-            ).load()
-        except Exception as e:
-            click.echo(f"{name} not found after custom install: {e}", err=True)
-            sys.exit(1)
-
-    if not binary.abspath:
-        click.echo(f"{name} not found after custom install", err=True)
-        sys.exit(1)
-
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    # Output Binary JSONL record to stdout
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'abspath': str(binary.abspath),
-        'version': str(binary.version) if binary.version else '',
-        'sha256': binary.sha256 or '',
-        'binprovider': 'custom',
-        'machine_id': machine_id,
-        'binary_id': binary_id,
-    }
-    print(json.dumps(record))
-
-    # Log human-readable info to stderr
-    click.echo(f"Installed {name} at {binary.abspath}", err=True)
-    click.echo(f"  version: {binary.version}", err=True)
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/custom/templates/icon.html b/archivebox/plugins/custom/templates/icon.html
deleted file mode 100644
index e69de29b..00000000
diff --git a/archivebox/plugins/custom/tests/test_custom_provider.py b/archivebox/plugins/custom/tests/test_custom_provider.py
deleted file mode 100644
index 22a2cb1d..00000000
--- a/archivebox/plugins/custom/tests/test_custom_provider.py
+++ /dev/null
@@ -1,149 +0,0 @@
-"""
-Tests for the custom binary provider plugin.
-
-Tests the custom bash binary installer with safe commands.
-"""
-
-import json
-import os
-import subprocess
-import sys
-import tempfile
-from pathlib import Path
-
-import pytest
-from django.test import TestCase
-
-
-# Get the path to the custom provider hook
-PLUGIN_DIR = Path(__file__).parent.parent
-INSTALL_HOOK = next(PLUGIN_DIR.glob('on_Binary__*_custom_install.py'), None)
-
-
-class TestCustomProviderHook(TestCase):
-    """Test the custom binary provider hook."""
-
-    def setUp(self):
-        """Set up test environment."""
-        self.temp_dir = tempfile.mkdtemp()
-
-    def tearDown(self):
-        """Clean up."""
-        import shutil
-        shutil.rmtree(self.temp_dir, ignore_errors=True)
-
-    def test_hook_script_exists(self):
-        """Hook script should exist."""
-        self.assertTrue(INSTALL_HOOK and INSTALL_HOOK.exists(), f"Hook not found: {INSTALL_HOOK}")
-
-    def test_hook_skips_when_custom_not_allowed(self):
-        """Hook should skip when custom not in allowed binproviders."""
-        env = os.environ.copy()
-        env['DATA_DIR'] = self.temp_dir
-
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=echo',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-                '--binproviders=pip,apt',  # custom not allowed
-                '--custom-cmd=echo hello',
-            ],
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=env
-        )
-
-        # Should exit cleanly (code 0) when custom not allowed
-        self.assertEqual(result.returncode, 0)
-        self.assertIn('custom provider not allowed', result.stderr)
-
-    def test_hook_runs_custom_command_and_finds_binary(self):
-        """Hook should run custom command and find the binary in PATH."""
-        env = os.environ.copy()
-        env['DATA_DIR'] = self.temp_dir
-
-        # Use a simple echo command that doesn't actually install anything
-        # Then check for 'echo' which is already in PATH
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=echo',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-                '--custom-cmd=echo "custom install simulation"',
-            ],
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=env
-        )
-
-        # Should succeed since echo is in PATH
-        self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}")
-
-        # Parse JSONL output
-        for line in result.stdout.split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'Binary' and record.get('name') == 'echo':
-                        self.assertEqual(record['binprovider'], 'custom')
-                        self.assertTrue(record['abspath'])
-                        return
-                except json.JSONDecodeError:
-                    continue
-
-        self.fail("No Binary JSONL record found in output")
-
-    def test_hook_fails_for_missing_binary_after_command(self):
-        """Hook should fail if binary not found after running custom command."""
-        env = os.environ.copy()
-        env['DATA_DIR'] = self.temp_dir
-
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=nonexistent_binary_xyz123',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-                '--custom-cmd=echo "failed install"',  # Doesn't actually install
-            ],
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=env
-        )
-
-        # Should fail since binary not found after command
-        self.assertEqual(result.returncode, 1)
-        self.assertIn('not found', result.stderr.lower())
-
-    def test_hook_fails_for_failing_command(self):
-        """Hook should fail if custom command returns non-zero exit code."""
-        env = os.environ.copy()
-        env['DATA_DIR'] = self.temp_dir
-
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=echo',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-                '--custom-cmd=exit 1',  # Command that fails
-            ],
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=env
-        )
-
-        # Should fail with exit code 1
-        self.assertEqual(result.returncode, 1)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/dns/config.json b/archivebox/plugins/dns/config.json
deleted file mode 100644
index 2a69a4c8..00000000
--- a/archivebox/plugins/dns/config.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "required_plugins": ["chrome"],
-  "properties": {
-    "DNS_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_DNS", "USE_DNS"],
-      "description": "Enable DNS traffic recording during page load"
-    },
-    "DNS_TIMEOUT": {
-      "type": "integer",
-      "default": 30,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for DNS recording in seconds"
-    }
-  }
-}
diff --git a/archivebox/plugins/dns/on_Snapshot__22_dns.bg.js b/archivebox/plugins/dns/on_Snapshot__22_dns.bg.js
deleted file mode 100755
index 105f13d8..00000000
--- a/archivebox/plugins/dns/on_Snapshot__22_dns.bg.js
+++ /dev/null
@@ -1,265 +0,0 @@
-#!/usr/bin/env node
-/**
- * Record all DNS traffic (hostname -> IP resolutions) during page load.
- *
- * This hook sets up CDP listeners BEFORE chrome_navigate loads the page,
- * then waits for navigation to complete. The listeners capture all DNS
- * resolutions by extracting hostname/IP pairs from network responses.
- *
- * Usage: on_Snapshot__22_dns.js --url=<url> --snapshot-id=<uuid>
- * Output: Writes dns.jsonl with one line per DNS resolution record
- */
-
-const fs = require('fs');
-const path = require('path');
-
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-
-const puppeteer = require('puppeteer-core');
-
-// Import shared utilities from chrome_utils.js
-const {
-    getEnvBool,
-    getEnvInt,
-    parseArgs,
-    connectToPage,
-    waitForPageLoaded,
-} = require('../chrome/chrome_utils.js');
-
-const PLUGIN_NAME = 'dns';
-const OUTPUT_DIR = '.';
-const OUTPUT_FILE = 'dns.jsonl';
-const CHROME_SESSION_DIR = '../chrome';
-
-let browser = null;
-let page = null;
-let recordCount = 0;
-let shuttingDown = false;
-
-function extractHostname(url) {
-    try {
-        const urlObj = new URL(url);
-        return urlObj.hostname;
-    } catch (e) {
-        return null;
-    }
-}
-
-async function setupListener(targetUrl) {
-    const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
-    const timeout = getEnvInt('DNS_TIMEOUT', 30) * 1000;
-
-    // Initialize output file
-    fs.writeFileSync(outputPath, '');
-
-    // Track seen hostname -> IP mappings to avoid duplicates per request
-    const seenResolutions = new Map();
-    // Track request IDs to their URLs for correlation
-    const requestUrls = new Map();
-
-    // Connect to Chrome page using shared utility
-    const { browser, page } = await connectToPage({
-        chromeSessionDir: CHROME_SESSION_DIR,
-        timeoutMs: timeout,
-        puppeteer,
-    });
-
-    // Get CDP session for low-level network events
-    const client = await page.target().createCDPSession();
-
-    // Enable network domain to receive events
-    await client.send('Network.enable');
-
-    // Listen for request events to track URLs
-    client.on('Network.requestWillBeSent', (params) => {
-        requestUrls.set(params.requestId, params.request.url);
-    });
-
-    // Listen for response events which contain remoteIPAddress (the resolved IP)
-    client.on('Network.responseReceived', (params) => {
-        try {
-            const response = params.response;
-            const url = response.url;
-            const remoteIPAddress = response.remoteIPAddress;
-            const remotePort = response.remotePort;
-
-            if (!url || !remoteIPAddress) {
-                return;
-            }
-
-            const hostname = extractHostname(url);
-            if (!hostname) {
-                return;
-            }
-
-            // Skip if IP address is same as hostname (already an IP)
-            if (hostname === remoteIPAddress) {
-                return;
-            }
-
-            // Create a unique key for this resolution
-            const resolutionKey = `${hostname}:${remoteIPAddress}`;
-
-            // Skip if we've already recorded this resolution
-            if (seenResolutions.has(resolutionKey)) {
-                return;
-            }
-            seenResolutions.set(resolutionKey, true);
-
-            // Determine record type (A for IPv4, AAAA for IPv6)
-            const isIPv6 = remoteIPAddress.includes(':');
-            const recordType = isIPv6 ? 'AAAA' : 'A';
-
-            // Create DNS record
-            const timestamp = new Date().toISOString();
-            const dnsRecord = {
-                ts: timestamp,
-                hostname: hostname,
-                ip: remoteIPAddress,
-                port: remotePort || null,
-                type: recordType,
-                protocol: url.startsWith('https://') ? 'https' : 'http',
-                url: url,
-                requestId: params.requestId,
-            };
-
-            // Append to output file
-            fs.appendFileSync(outputPath, JSON.stringify(dnsRecord) + '\n');
-            recordCount += 1;
-
-        } catch (e) {
-            // Ignore errors
-        }
-    });
-
-    // Listen for failed requests too - they still involve DNS
-    client.on('Network.loadingFailed', (params) => {
-        try {
-            const requestId = params.requestId;
-            const url = requestUrls.get(requestId);
-
-            if (!url) {
-                return;
-            }
-
-            const hostname = extractHostname(url);
-            if (!hostname) {
-                return;
-            }
-
-            // Check if this is a DNS-related failure
-            const errorText = params.errorText || '';
-            if (errorText.includes('net::ERR_NAME_NOT_RESOLVED') ||
-                errorText.includes('net::ERR_NAME_RESOLUTION_FAILED')) {
-
-                // Create a unique key for this failed resolution
-                const resolutionKey = `${hostname}:NXDOMAIN`;
-
-                // Skip if we've already recorded this NXDOMAIN
-                if (seenResolutions.has(resolutionKey)) {
-                    return;
-                }
-                seenResolutions.set(resolutionKey, true);
-
-                const timestamp = new Date().toISOString();
-                const dnsRecord = {
-                    ts: timestamp,
-                    hostname: hostname,
-                    ip: null,
-                    port: null,
-                    type: 'NXDOMAIN',
-                    protocol: url.startsWith('https://') ? 'https' : 'http',
-                    url: url,
-                    requestId: requestId,
-                    error: errorText,
-                };
-
-                fs.appendFileSync(outputPath, JSON.stringify(dnsRecord) + '\n');
-                recordCount += 1;
-            }
-        } catch (e) {
-            // Ignore errors
-        }
-    });
-
-    return { browser, page, client };
-}
-
-function emitResult(status = 'succeeded') {
-    if (shuttingDown) return;
-    shuttingDown = true;
-
-    console.log(JSON.stringify({
-        type: 'ArchiveResult',
-        status,
-        output_str: `${OUTPUT_FILE} (${recordCount} DNS records)`,
-    }));
-}
-
-async function handleShutdown(signal) {
-    console.error(`\nReceived ${signal}, emitting final results...`);
-    emitResult('succeeded');
-    if (browser) {
-        try {
-            browser.disconnect();
-        } catch (e) {}
-    }
-    process.exit(0);
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-    const snapshotId = args.snapshot_id;
-
-    if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__22_dns.js --url=<url> --snapshot-id=<uuid>');
-        process.exit(1);
-    }
-
-    if (!getEnvBool('DNS_ENABLED', true)) {
-        console.error('Skipping (DNS_ENABLED=False)');
-        console.log(JSON.stringify({type: 'ArchiveResult', status: 'skipped', output_str: 'DNS_ENABLED=False'}));
-        process.exit(0);
-    }
-
-    try {
-        // Set up listener BEFORE navigation
-        const connection = await setupListener(url);
-        browser = connection.browser;
-        page = connection.page;
-
-        // Register signal handlers for graceful shutdown
-        process.on('SIGTERM', () => handleShutdown('SIGTERM'));
-        process.on('SIGINT', () => handleShutdown('SIGINT'));
-
-        // Wait for chrome_navigate to complete (non-fatal)
-        try {
-            const timeout = getEnvInt('DNS_TIMEOUT', 30) * 1000;
-            await waitForPageLoaded(CHROME_SESSION_DIR, timeout * 4, 500);
-        } catch (e) {
-            console.error(`WARN: ${e.message}`);
-        }
-
-        // console.error('DNS listener active, waiting for cleanup signal...');
-        await new Promise(() => {}); // Keep alive until SIGTERM
-        return;
-
-    } catch (e) {
-        const error = `${e.name}: ${e.message}`;
-        console.error(`ERROR: ${error}`);
-
-        console.log(JSON.stringify({
-            type: 'ArchiveResult',
-            status: 'failed',
-            output_str: error,
-        }));
-        process.exit(1);
-    }
-}
-
-main().catch(e => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
diff --git a/archivebox/plugins/dns/templates/icon.html b/archivebox/plugins/dns/templates/icon.html
deleted file mode 100644
index 1a558d40..00000000
--- a/archivebox/plugins/dns/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--dns" title="DNS"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><circle cx="6" cy="12" r="2"/><circle cx="18" cy="6" r="2"/><circle cx="18" cy="18" r="2"/><path d="M8 12h6"/><path d="M16 8l-2 2"/><path d="M16 16l-2-2"/></svg></span>
diff --git a/archivebox/plugins/dns/tests/test_dns.py b/archivebox/plugins/dns/tests/test_dns.py
deleted file mode 100644
index ac10a478..00000000
--- a/archivebox/plugins/dns/tests/test_dns.py
+++ /dev/null
@@ -1,126 +0,0 @@
-"""
-Tests for the DNS plugin.
-
-Tests the real DNS hook with an actual URL to verify
-DNS resolution capture.
-"""
-
-import json
-import shutil
-import subprocess
-import sys
-import tempfile
-import time
-from pathlib import Path
-
-from django.test import TestCase
-
-# Import chrome test helpers
-sys.path.insert(0, str(Path(__file__).parent.parent.parent / 'chrome' / 'tests'))
-from chrome_test_helpers import (
-    chrome_session,
-    CHROME_NAVIGATE_HOOK,
-    get_plugin_dir,
-    get_hook_script,
-)
-
-
-# Get the path to the DNS hook
-PLUGIN_DIR = get_plugin_dir(__file__)
-DNS_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_dns.*')
-
-
-class TestDNSPlugin(TestCase):
-    """Test the DNS plugin."""
-
-    def test_dns_hook_exists(self):
-        """DNS hook script should exist."""
-        self.assertIsNotNone(DNS_HOOK, "DNS hook not found in plugin directory")
-        self.assertTrue(DNS_HOOK.exists(), f"Hook not found: {DNS_HOOK}")
-
-
-class TestDNSWithChrome(TestCase):
-    """Integration tests for DNS plugin with Chrome."""
-
-    def setUp(self):
-        """Set up test environment."""
-        self.temp_dir = Path(tempfile.mkdtemp())
-
-    def tearDown(self):
-        """Clean up."""
-        shutil.rmtree(self.temp_dir, ignore_errors=True)
-
-    def test_dns_records_captured(self):
-        """DNS hook should capture DNS records from a real URL."""
-        test_url = 'https://example.com'
-        snapshot_id = 'test-dns-snapshot'
-
-        with chrome_session(
-            self.temp_dir,
-            crawl_id='test-dns-crawl',
-            snapshot_id=snapshot_id,
-            test_url=test_url,
-            navigate=False,
-            timeout=30,
-        ) as (_process, _pid, snapshot_chrome_dir, env):
-            dns_dir = snapshot_chrome_dir.parent / 'dns'
-            dns_dir.mkdir(exist_ok=True)
-
-            result = subprocess.Popen(
-                ['node', str(DNS_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                cwd=str(dns_dir),
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE,
-                text=True,
-                env=env
-            )
-
-            nav_result = subprocess.run(
-                ['node', str(CHROME_NAVIGATE_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                cwd=str(snapshot_chrome_dir),
-                capture_output=True,
-                text=True,
-                timeout=120,
-                env=env
-            )
-            self.assertEqual(nav_result.returncode, 0, f"Navigation failed: {nav_result.stderr}")
-
-            dns_output = dns_dir / 'dns.jsonl'
-            for _ in range(30):
-                if dns_output.exists() and dns_output.stat().st_size > 0:
-                    break
-                time.sleep(1)
-
-            if result.poll() is None:
-                result.terminate()
-                try:
-                    stdout, stderr = result.communicate(timeout=5)
-                except subprocess.TimeoutExpired:
-                    result.kill()
-                    stdout, stderr = result.communicate()
-            else:
-                stdout, stderr = result.communicate()
-
-            self.assertNotIn('Traceback', stderr)
-
-            self.assertTrue(dns_output.exists(), "dns.jsonl not created")
-            content = dns_output.read_text().strip()
-            self.assertTrue(content, "DNS output should not be empty")
-
-            records = []
-            for line in content.split('\n'):
-                line = line.strip()
-                if not line:
-                    continue
-                try:
-                    records.append(json.loads(line))
-                except json.JSONDecodeError:
-                    pass
-
-            self.assertTrue(records, "No DNS records parsed")
-            has_ip_record = any(r.get('hostname') and r.get('ip') for r in records)
-            self.assertTrue(has_ip_record, f"No DNS record with hostname + ip: {records}")
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/dom/config.json b/archivebox/plugins/dom/config.json
deleted file mode 100644
index 7863e873..00000000
--- a/archivebox/plugins/dom/config.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "required_plugins": ["chrome"],
-  "properties": {
-    "DOM_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_DOM", "USE_DOM"],
-      "description": "Enable DOM capture"
-    },
-    "DOM_TIMEOUT": {
-      "type": "integer",
-      "default": 60,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for DOM capture in seconds"
-    }
-  }
-}
diff --git a/archivebox/plugins/dom/on_Snapshot__53_dom.js b/archivebox/plugins/dom/on_Snapshot__53_dom.js
deleted file mode 100644
index db8a2420..00000000
--- a/archivebox/plugins/dom/on_Snapshot__53_dom.js
+++ /dev/null
@@ -1,184 +0,0 @@
-#!/usr/bin/env node
-/**
- * Dump the DOM of a URL using Chrome/Puppeteer.
- *
- * Requires a Chrome session (from chrome plugin) and connects to it via CDP.
- *
- * Usage: on_Snapshot__53_dom.js --url=<url> --snapshot-id=<uuid>
- * Output: Writes dom/output.html
- *
- * Environment variables:
- *     DOM_ENABLED: Enable DOM extraction (default: true)
- */
-
-const fs = require('fs');
-const path = require('path');
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-
-const {
-    getEnvBool,
-    parseArgs,
-    readCdpUrl,
-} = require('../chrome/chrome_utils.js');
-
-// Check if DOM is enabled BEFORE requiring puppeteer
-if (!getEnvBool('DOM_ENABLED', true)) {
-    console.error('Skipping DOM (DOM_ENABLED=False)');
-    // Temporary failure (config disabled) - NO JSONL emission
-    process.exit(0);
-}
-
-// Now safe to require puppeteer
-const puppeteer = require('puppeteer-core');
-
-// Extractor metadata
-const PLUGIN_NAME = 'dom';
-const OUTPUT_DIR = '.';
-const OUTPUT_FILE = 'output.html';
-const CHROME_SESSION_DIR = '../chrome';
-
-// Check if staticfile extractor already downloaded this URL
-const STATICFILE_DIR = '../staticfile';
-function hasStaticFileOutput() {
-    if (!fs.existsSync(STATICFILE_DIR)) return false;
-    const stdoutPath = path.join(STATICFILE_DIR, 'stdout.log');
-    if (!fs.existsSync(stdoutPath)) return false;
-    const stdout = fs.readFileSync(stdoutPath, 'utf8');
-    for (const line of stdout.split('\n')) {
-        const trimmed = line.trim();
-        if (!trimmed.startsWith('{')) continue;
-        try {
-            const record = JSON.parse(trimmed);
-            if (record.type === 'ArchiveResult' && record.status === 'succeeded') {
-                return true;
-            }
-        } catch (e) {}
-    }
-    return false;
-}
-
-// Wait for chrome tab to be fully loaded
-async function waitForChromeTabLoaded(timeoutMs = 60000) {
-    const navigationFile = path.join(CHROME_SESSION_DIR, 'navigation.json');
-    const startTime = Date.now();
-
-    while (Date.now() - startTime < timeoutMs) {
-        if (fs.existsSync(navigationFile)) {
-            return true;
-        }
-        // Wait 100ms before checking again
-        await new Promise(resolve => setTimeout(resolve, 100));
-    }
-
-    return false;
-}
-
-async function dumpDom(url) {
-    // Output directory is current directory (hook already runs in output dir)
-    const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
-
-    let browser = null;
-    let page = null;
-
-    try {
-        // Connect to existing Chrome session (required)
-        const cdpUrl = readCdpUrl(CHROME_SESSION_DIR);
-        if (!cdpUrl) {
-            return { success: false, error: 'No Chrome session found (chrome plugin must run first)' };
-        }
-
-        browser = await puppeteer.connect({
-            browserWSEndpoint: cdpUrl,
-            defaultViewport: null,
-        });
-
-        // Get existing pages or create new one
-        const pages = await browser.pages();
-        page = pages.find(p => p.url().startsWith('http')) || pages[0];
-
-        if (!page) {
-            page = await browser.newPage();
-        }
-
-        // Get the full DOM content
-        const domContent = await page.content();
-
-        if (domContent && domContent.length > 100) {
-            fs.writeFileSync(outputPath, domContent, 'utf8');
-            return { success: true, output: outputPath };
-        } else {
-            return { success: false, error: 'DOM content too short or empty' };
-        }
-
-    } catch (e) {
-        return { success: false, error: `${e.name}: ${e.message}` };
-    } finally {
-        if (browser) {
-            browser.disconnect();
-        }
-    }
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-    const snapshotId = args.snapshot_id;
-
-    if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__53_dom.js --url=<url> --snapshot-id=<uuid>');
-        process.exit(1);
-    }
-
-    try {
-        // Check if staticfile extractor already handled this (permanent skip)
-        if (hasStaticFileOutput()) {
-            console.error(`Skipping DOM - staticfile extractor already downloaded this`);
-            // Permanent skip - emit ArchiveResult with status='skipped'
-            console.log(JSON.stringify({
-                type: 'ArchiveResult',
-                status: 'skipped',
-                output_str: 'staticfile already handled',
-            }));
-            process.exit(0);
-        }
-
-        const cdpUrl = readCdpUrl(CHROME_SESSION_DIR);
-        if (!cdpUrl) {
-            throw new Error('No Chrome session found (chrome plugin must run first)');
-        }
-
-        // Wait for page to be fully loaded
-        const pageLoaded = await waitForChromeTabLoaded(60000);
-        if (!pageLoaded) {
-            throw new Error('Page not loaded after 60s (chrome_navigate must complete first)');
-        }
-
-        const result = await dumpDom(url);
-
-        if (result.success) {
-            // Success - emit ArchiveResult
-            const size = fs.statSync(result.output).size;
-            console.error(`DOM saved (${size} bytes)`);
-            console.log(JSON.stringify({
-                type: 'ArchiveResult',
-                status: 'succeeded',
-                output_str: result.output,
-            }));
-            process.exit(0);
-        } else {
-            // Transient error - emit NO JSONL
-            console.error(`ERROR: ${result.error}`);
-            process.exit(1);
-        }
-    } catch (e) {
-        // Transient error - emit NO JSONL
-        console.error(`ERROR: ${e.name}: ${e.message}`);
-        process.exit(1);
-    }
-}
-
-main().catch(e => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
diff --git a/archivebox/plugins/dom/templates/card.html b/archivebox/plugins/dom/templates/card.html
deleted file mode 100644
index 88f126df..00000000
--- a/archivebox/plugins/dom/templates/card.html
+++ /dev/null
@@ -1,8 +0,0 @@
-<!-- DOM thumbnail - scaled down iframe preview of captured DOM HTML -->
-<div class="extractor-thumbnail dom-thumbnail" style="width: 100%; height: 100px; overflow: hidden; background: #fff;">
-    <iframe src="{{ output_path }}"
-            style="width: 400%; height: 400px; transform: scale(0.25); transform-origin: top left; pointer-events: none; border: none;"
-            loading="lazy"
-            sandbox="allow-same-origin">
-    </iframe>
-</div>
diff --git a/archivebox/plugins/dom/templates/icon.html b/archivebox/plugins/dom/templates/icon.html
deleted file mode 100644
index 56efac8d..00000000
--- a/archivebox/plugins/dom/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--dom" title="DOM"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M8 9l-3 3 3 3"/><path d="M16 9l3 3-3 3"/><path d="M10 20l4-16"/></svg></span>
diff --git a/archivebox/plugins/dom/tests/test_dom.py b/archivebox/plugins/dom/tests/test_dom.py
deleted file mode 100644
index 2d98d873..00000000
--- a/archivebox/plugins/dom/tests/test_dom.py
+++ /dev/null
@@ -1,185 +0,0 @@
-"""
-Integration tests for dom plugin
-
-Tests verify:
-1. Hook script exists
-2. Dependencies installed via chrome validation hooks
-3. Verify deps with abx-pkg
-4. DOM extraction works on https://example.com
-5. JSONL output is correct
-6. Filesystem output contains actual page content
-7. Config options work
-"""
-
-import json
-import os
-import subprocess
-import sys
-import tempfile
-from pathlib import Path
-
-import pytest
-
-from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-    get_test_env,
-    get_plugin_dir,
-    get_hook_script,
-    run_hook_and_parse,
-    LIB_DIR,
-    NODE_MODULES_DIR,
-    PLUGINS_ROOT,
-    chrome_session,
-)
-
-
-PLUGIN_DIR = get_plugin_dir(__file__)
-DOM_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_dom.*')
-NPM_PROVIDER_HOOK = get_hook_script(PLUGINS_ROOT / 'npm', 'on_Binary__install_using_npm_provider.py')
-TEST_URL = 'https://example.com'
-
-
-def test_hook_script_exists():
-    """Verify on_Snapshot hook exists."""
-    assert DOM_HOOK.exists(), f"Hook not found: {DOM_HOOK}"
-
-
-def test_verify_deps_with_abx_pkg():
-    """Verify dependencies are available via abx-pkg after hook installation."""
-    from abx_pkg import Binary, EnvProvider, BinProviderOverrides
-
-    EnvProvider.model_rebuild()
-
-    # Verify node is available
-    node_binary = Binary(name='node', binproviders=[EnvProvider()])
-    node_loaded = node_binary.load()
-    assert node_loaded and node_loaded.abspath, "Node.js required for dom plugin"
-
-
-def test_extracts_dom_from_example_com():
-    """Test full workflow: extract DOM from real example.com via hook."""
-    # Prerequisites checked by earlier test
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        with chrome_session(tmpdir, test_url=TEST_URL) as (_process, _pid, snapshot_chrome_dir, env):
-            dom_dir = snapshot_chrome_dir.parent / 'dom'
-            dom_dir.mkdir(exist_ok=True)
-
-            # Run DOM extraction hook
-            result = subprocess.run(
-                ['node', str(DOM_HOOK), f'--url={TEST_URL}', '--snapshot-id=test789'],
-                cwd=dom_dir,
-                capture_output=True,
-                text=True,
-                timeout=120,
-                env=env
-            )
-
-        assert result.returncode == 0, f"Extraction failed: {result.stderr}"
-
-        # Parse clean JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-        # Verify filesystem output (hook writes directly to working dir)
-        dom_file = dom_dir / 'output.html'
-        assert dom_file.exists(), f"output.html not created. Files: {list(tmpdir.iterdir())}"
-
-        # Verify HTML content contains REAL example.com text
-        html_content = dom_file.read_text(errors='ignore')
-        assert len(html_content) > 200, f"HTML content too short: {len(html_content)} bytes"
-        assert '<html' in html_content.lower(), "Missing <html> tag"
-        assert 'example domain' in html_content.lower(), "Missing 'Example Domain' in HTML"
-        assert ('this domain' in html_content.lower() or
-                'illustrative examples' in html_content.lower()), \
-            "Missing example.com description text"
-
-
-def test_config_save_dom_false_skips():
-    """Test that DOM_ENABLED=False exits without emitting JSONL."""
-    import os
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-        env = os.environ.copy()
-        env['DOM_ENABLED'] = 'False'
-
-        result = subprocess.run(
-            ['node', str(DOM_HOOK), f'--url={TEST_URL}', '--snapshot-id=test999'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=30
-        )
-
-        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
-
-        # Feature disabled - temporary failure, should NOT emit JSONL
-        assert 'Skipping DOM' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
-
-        # Should NOT emit any JSONL
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
-
-
-def test_staticfile_present_skips():
-    """Test that dom skips when staticfile already downloaded."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Create directory structure like real ArchiveBox:
-        # tmpdir/
-        #   staticfile/  <- staticfile extractor output
-        #   dom/         <- dom extractor runs here, looks for ../staticfile
-        staticfile_dir = tmpdir / 'staticfile'
-        staticfile_dir.mkdir()
-        (staticfile_dir / 'stdout.log').write_text('{"type":"ArchiveResult","status":"succeeded","output_str":"index.html"}\n')
-
-        dom_dir = tmpdir / 'dom'
-        dom_dir.mkdir()
-
-        result = subprocess.run(
-            ['node', str(DOM_HOOK), f'--url={TEST_URL}', '--snapshot-id=teststatic'],
-            cwd=dom_dir,  # Run from dom subdirectory
-            capture_output=True,
-            text=True,
-            timeout=30
-        ,
-            env=get_test_env())
-
-        assert result.returncode == 0, "Should exit 0 when permanently skipping"
-
-        # Permanent skip - should emit ArchiveResult with status='skipped'
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, "Should emit ArchiveResult JSONL for permanent skip"
-        assert result_json['status'] == 'skipped', f"Should have status='skipped': {result_json}"
-        assert 'staticfile' in result_json.get('output_str', '').lower(), "Should mention staticfile in output_str"
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/env/on_Binary__15_env_install.py b/archivebox/plugins/env/on_Binary__15_env_install.py
deleted file mode 100644
index 35b3a9ca..00000000
--- a/archivebox/plugins/env/on_Binary__15_env_install.py
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/usr/bin/env python3
-"""
-Check if a binary is already available in the system PATH.
-
-This is the simplest "provider" - it doesn't install anything,
-it just discovers binaries that are already installed.
-
-Usage: on_Binary__install_using_env_provider.py --binary-id=<uuid> --machine-id=<uuid> --name=<name>
-Output: Binary JSONL record to stdout if binary found in PATH
-
-Environment variables:
-    MACHINE_ID: Machine UUID (set by orchestrator)
-"""
-
-import json
-import os
-import sys
-
-import rich_click as click
-from abx_pkg import Binary, EnvProvider
-
-
-@click.command()
-@click.option('--machine-id', required=True, help="Machine UUID")
-@click.option('--binary-id', required=True, help="Dependency UUID")
-@click.option('--name', required=True, help="Binary name to find")
-@click.option('--binproviders', default='*', help="Allowed providers (comma-separated)")
-@click.option('--overrides', default=None, help="JSON-encoded overrides dict (unused)")
-def main(binary_id: str, machine_id: str, name: str, binproviders: str, overrides: str | None):
-    """Check if binary is available in PATH and record it."""
-
-    # Check if env provider is allowed
-    if binproviders != '*' and 'env' not in binproviders.split(','):
-        click.echo(f"env provider not allowed for {name}", err=True)
-        sys.exit(0)  # Not an error, just skip
-
-    # Use abx-pkg EnvProvider to find binary
-    provider = EnvProvider()
-    try:
-        binary = Binary(name=name, binproviders=[provider]).load()
-    except Exception as e:
-        click.echo(f"{name} not found in PATH: {e}", err=True)
-        sys.exit(1)
-
-    if not binary.abspath:
-        click.echo(f"{name} not found in PATH", err=True)
-        sys.exit(1)
-
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    # Output Binary JSONL record to stdout
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'abspath': str(binary.abspath),
-        'version': str(binary.version) if binary.version else '',
-        'sha256': binary.sha256 or '',
-        'binprovider': 'env',
-        'machine_id': machine_id,
-        'binary_id': binary_id,
-    }
-    print(json.dumps(record))
-
-    # Log human-readable info to stderr
-    click.echo(f"Found {name} at {binary.abspath}", err=True)
-    click.echo(f"  version: {binary.version}", err=True)
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/env/templates/icon.html b/archivebox/plugins/env/templates/icon.html
deleted file mode 100644
index e69de29b..00000000
diff --git a/archivebox/plugins/env/tests/test_env_provider.py b/archivebox/plugins/env/tests/test_env_provider.py
deleted file mode 100644
index 2bffcfca..00000000
--- a/archivebox/plugins/env/tests/test_env_provider.py
+++ /dev/null
@@ -1,159 +0,0 @@
-"""
-Tests for the env binary provider plugin.
-
-Tests the real env provider hook with actual system binaries.
-"""
-
-import json
-import os
-import subprocess
-import sys
-import tempfile
-from pathlib import Path
-
-import pytest
-from django.test import TestCase
-
-
-# Get the path to the env provider hook
-PLUGIN_DIR = Path(__file__).parent.parent
-INSTALL_HOOK = next(PLUGIN_DIR.glob('on_Binary__*_env_install.py'), None)
-
-
-class TestEnvProviderHook(TestCase):
-    """Test the env binary provider hook."""
-
-    def setUp(self):
-        """Set up test environment."""
-        self.temp_dir = tempfile.mkdtemp()
-
-    def tearDown(self):
-        """Clean up."""
-        import shutil
-        shutil.rmtree(self.temp_dir, ignore_errors=True)
-
-    def test_hook_script_exists(self):
-        """Hook script should exist."""
-        self.assertTrue(INSTALL_HOOK and INSTALL_HOOK.exists(), f"Hook not found: {INSTALL_HOOK}")
-
-    def test_hook_finds_python(self):
-        """Hook should find python3 binary in PATH."""
-        env = os.environ.copy()
-        env['DATA_DIR'] = self.temp_dir
-
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=python3',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-            ],
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=env
-        )
-
-        # Should succeed and output JSONL
-        self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}")
-
-        # Parse JSONL output
-        for line in result.stdout.split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'Binary' and record.get('name') == 'python3':
-                        self.assertEqual(record['binprovider'], 'env')
-                        self.assertTrue(record['abspath'])
-                        self.assertTrue(Path(record['abspath']).exists())
-                        return
-                except json.JSONDecodeError:
-                    continue
-
-        self.fail("No Binary JSONL record found in output")
-
-    def test_hook_finds_bash(self):
-        """Hook should find bash binary in PATH."""
-        env = os.environ.copy()
-        env['DATA_DIR'] = self.temp_dir
-
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=bash',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-            ],
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=env
-        )
-
-        # Should succeed and output JSONL
-        self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}")
-
-        # Parse JSONL output
-        for line in result.stdout.split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'Binary' and record.get('name') == 'bash':
-                        self.assertEqual(record['binprovider'], 'env')
-                        self.assertTrue(record['abspath'])
-                        return
-                except json.JSONDecodeError:
-                    continue
-
-        self.fail("No Binary JSONL record found in output")
-
-    def test_hook_fails_for_missing_binary(self):
-        """Hook should fail for binary not in PATH."""
-        env = os.environ.copy()
-        env['DATA_DIR'] = self.temp_dir
-
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=nonexistent_binary_xyz123',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-            ],
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=env
-        )
-
-        # Should fail with exit code 1
-        self.assertEqual(result.returncode, 1)
-        self.assertIn('not found', result.stderr.lower())
-
-    def test_hook_skips_when_env_not_allowed(self):
-        """Hook should skip when env not in allowed binproviders."""
-        env = os.environ.copy()
-        env['DATA_DIR'] = self.temp_dir
-
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=python3',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-                '--binproviders=pip,apt',  # env not allowed
-            ],
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=env
-        )
-
-        # Should exit cleanly (code 0) when env not allowed
-        self.assertEqual(result.returncode, 0)
-        self.assertIn('env provider not allowed', result.stderr)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/favicon/config.json b/archivebox/plugins/favicon/config.json
deleted file mode 100644
index 4c67e18f..00000000
--- a/archivebox/plugins/favicon/config.json
+++ /dev/null
@@ -1,26 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "FAVICON_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_FAVICON", "USE_FAVICON"],
-      "description": "Enable favicon downloading"
-    },
-    "FAVICON_TIMEOUT": {
-      "type": "integer",
-      "default": 30,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for favicon fetch in seconds"
-    },
-    "FAVICON_USER_AGENT": {
-      "type": "string",
-      "default": "",
-      "x-fallback": "USER_AGENT",
-      "description": "User agent string"
-    }
-  }
-}
diff --git a/archivebox/plugins/favicon/on_Snapshot__11_favicon.bg.py b/archivebox/plugins/favicon/on_Snapshot__11_favicon.bg.py
deleted file mode 100644
index fc4604f4..00000000
--- a/archivebox/plugins/favicon/on_Snapshot__11_favicon.bg.py
+++ /dev/null
@@ -1,153 +0,0 @@
-#!/usr/bin/env python3
-"""
-Extract favicon from a URL.
-
-Usage: on_Snapshot__favicon.bg.py --url=<url> --snapshot-id=<uuid>
-Output: Writes favicon.ico to $PWD
-
-Environment variables:
-    FAVICON_TIMEOUT: Timeout in seconds (default: 30)
-    USER_AGENT: User agent string
-
-    # Fallback to ARCHIVING_CONFIG values if FAVICON_* not set:
-    TIMEOUT: Fallback timeout
-
-Note: This extractor uses the 'requests' library which is bundled with ArchiveBox.
-      It can run standalone if requests is installed: pip install requests
-"""
-
-import json
-import os
-import re
-import sys
-from pathlib import Path
-from urllib.parse import urljoin, urlparse
-
-import rich_click as click
-
-
-# Extractor metadata
-PLUGIN_NAME = 'favicon'
-OUTPUT_DIR = '.'
-OUTPUT_FILE = 'favicon.ico'
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-
-def get_env_int(name: str, default: int = 0) -> int:
-    try:
-        return int(get_env(name, str(default)))
-    except ValueError:
-        return default
-
-
-def get_favicon(url: str) -> tuple[bool, str | None, str]:
-    """
-    Fetch favicon from URL.
-
-    Returns: (success, output_path, error_message)
-    """
-    try:
-        import requests
-    except ImportError:
-        return False, None, 'requests library not installed'
-
-    timeout = get_env_int('FAVICON_TIMEOUT') or get_env_int('TIMEOUT', 30)
-    user_agent = get_env('USER_AGENT', 'Mozilla/5.0 (compatible; ArchiveBox/1.0)')
-    headers = {'User-Agent': user_agent}
-
-    # Build list of possible favicon URLs
-    parsed = urlparse(url)
-    base_url = f"{parsed.scheme}://{parsed.netloc}"
-
-    favicon_urls = [
-        urljoin(base_url, '/favicon.ico'),
-        urljoin(base_url, '/favicon.png'),
-        urljoin(base_url, '/apple-touch-icon.png'),
-    ]
-
-    # Try to extract favicon URL from HTML link tags
-    try:
-        response = requests.get(url, timeout=timeout, headers=headers)
-        if response.ok:
-            # Look for <link rel="icon" href="...">
-            for match in re.finditer(
-                r'<link[^>]+rel=["\'](?:shortcut )?icon["\'][^>]+href=["\']([^"\']+)["\']',
-                response.text,
-                re.I
-            ):
-                favicon_urls.insert(0, urljoin(url, match.group(1)))
-
-            # Also check reverse order: href before rel
-            for match in re.finditer(
-                r'<link[^>]+href=["\']([^"\']+)["\'][^>]+rel=["\'](?:shortcut )?icon["\']',
-                response.text,
-                re.I
-            ):
-                favicon_urls.insert(0, urljoin(url, match.group(1)))
-    except Exception:
-        pass  # Continue with default favicon URLs
-
-    # Try each URL until we find one that works
-    for favicon_url in favicon_urls:
-        try:
-            response = requests.get(favicon_url, timeout=15, headers=headers)
-            if response.ok and len(response.content) > 0:
-                Path(OUTPUT_FILE).write_bytes(response.content)
-                return True, OUTPUT_FILE, ''
-        except Exception:
-            continue
-
-    # Try Google's favicon service as fallback
-    try:
-        google_url = f'https://www.google.com/s2/favicons?domain={parsed.netloc}'
-        response = requests.get(google_url, timeout=15, headers=headers)
-        if response.ok and len(response.content) > 0:
-            Path(OUTPUT_FILE).write_bytes(response.content)
-            return True, OUTPUT_FILE, ''
-    except Exception:
-        pass
-
-    return False, None, 'No favicon found'
-
-
-@click.command()
-@click.option('--url', required=True, help='URL to extract favicon from')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
-def main(url: str, snapshot_id: str):
-    """Extract favicon from a URL."""
-
-    output = None
-    status = 'failed'
-    error = ''
-
-    try:
-        # Run extraction
-        success, output, error = get_favicon(url)
-        if success:
-            status = 'succeeded'
-        else:
-            status = 'failed'
-
-    except Exception as e:
-        error = f'{type(e).__name__}: {e}'
-        status = 'failed'
-
-    if error:
-        print(f'ERROR: {error}', file=sys.stderr)
-
-    # Output clean JSONL (no RESULT_JSON= prefix)
-    result = {
-        'type': 'ArchiveResult',
-        'status': status,
-        'output_str': output or error or '',
-    }
-    print(json.dumps(result))
-
-    sys.exit(0 if status == 'succeeded' else 1)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/favicon/templates/card.html b/archivebox/plugins/favicon/templates/card.html
deleted file mode 100644
index c5df1617..00000000
--- a/archivebox/plugins/favicon/templates/card.html
+++ /dev/null
@@ -1,9 +0,0 @@
-<!-- Favicon thumbnail - small favicon preview -->
-<div class="extractor-thumbnail favicon-thumbnail" style="width: 100%; height: 100px; display: flex; align-items: center; justify-content: center; background: #fff;">
-    {% if output_path %}
-        <img src="{{ output_path }}"
-             alt="Favicon"
-             style="width: 30px; height: 30px; max-width: 30px; max-height: 30px; object-fit: contain;"
-             loading="lazy">
-    {% endif %}
-</div>
diff --git a/archivebox/plugins/favicon/templates/icon.html b/archivebox/plugins/favicon/templates/icon.html
deleted file mode 100644
index 7ba648b3..00000000
--- a/archivebox/plugins/favicon/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--favicon" title="Favicon"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M12 3l2.5 5.5 6 .5-4.5 3.8 1.5 5.7L12 15.5 6.5 18.5 8 12.8 3.5 9l6-.5z"/></svg></span>
diff --git a/archivebox/plugins/favicon/tests/test_favicon.py b/archivebox/plugins/favicon/tests/test_favicon.py
deleted file mode 100644
index 4434d1a8..00000000
--- a/archivebox/plugins/favicon/tests/test_favicon.py
+++ /dev/null
@@ -1,293 +0,0 @@
-"""
-Integration tests for favicon plugin
-
-Tests verify:
-1. Plugin script exists
-2. requests library is available
-3. Favicon extraction works for real example.com
-4. Output file is actual image data
-5. Tries multiple favicon URLs
-6. Falls back to Google's favicon service
-7. Config options work (TIMEOUT, USER_AGENT)
-8. Handles failures gracefully
-"""
-
-import json
-import subprocess
-import sys
-import tempfile
-from pathlib import Path
-
-import pytest
-
-from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-    get_plugin_dir,
-    get_hook_script,
-    parse_jsonl_output,
-)
-
-
-PLUGIN_DIR = get_plugin_dir(__file__)
-FAVICON_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_favicon.*')
-TEST_URL = 'https://example.com'
-
-
-def test_hook_script_exists():
-    """Verify hook script exists."""
-    assert FAVICON_HOOK.exists(), f"Hook script not found: {FAVICON_HOOK}"
-
-
-def test_requests_library_available():
-    """Test that requests library is available."""
-    result = subprocess.run(
-        [sys.executable, '-c', 'import requests; print(requests.__version__)'],
-        capture_output=True,
-        text=True
-    )
-
-    if result.returncode != 0:
-        pass
-
-    assert len(result.stdout.strip()) > 0, "Should report requests version"
-
-
-def test_extracts_favicon_from_example_com():
-    """Test full workflow: extract favicon from real example.com.
-
-    Note: example.com doesn't have a favicon and Google's service may also fail,
-    so we test that the extraction completes and reports appropriate status.
-    """
-
-    # Check requests is available
-    check_result = subprocess.run(
-        [sys.executable, '-c', 'import requests'],
-        capture_output=True
-    )
-    if check_result.returncode != 0:
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Run favicon extraction
-        result = subprocess.run(
-            [sys.executable, str(FAVICON_HOOK), '--url', TEST_URL, '--snapshot-id', 'test789'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            timeout=60
-        )
-
-        # May succeed (if Google service works) or fail (if no favicon)
-        assert result.returncode in (0, 1), "Should complete extraction attempt"
-
-        # Parse clean JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                pass
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, "Should have ArchiveResult JSONL output"
-
-        # If it succeeded, verify the favicon file
-        if result_json['status'] == 'succeeded':
-            favicon_file = tmpdir / 'favicon.ico'
-            assert favicon_file.exists(), "favicon.ico not created"
-
-            # Verify file is not empty and contains actual image data
-            file_size = favicon_file.stat().st_size
-            assert file_size > 0, "Favicon file should not be empty"
-            assert file_size < 1024 * 1024, f"Favicon file suspiciously large: {file_size} bytes"
-
-            # Check for common image magic bytes
-            favicon_data = favicon_file.read_bytes()
-            # ICO, PNG, GIF, JPEG, or WebP
-            is_image = (
-                favicon_data[:4] == b'\x00\x00\x01\x00' or  # ICO
-                favicon_data[:8] == b'\x89PNG\r\n\x1a\n' or  # PNG
-                favicon_data[:3] == b'GIF' or  # GIF
-                favicon_data[:2] == b'\xff\xd8' or  # JPEG
-                favicon_data[8:12] == b'WEBP'  # WebP
-            )
-            assert is_image, "Favicon file should be a valid image format"
-        else:
-            # Failed as expected
-            assert result_json['status'] == 'failed', f"Should report failure: {result_json}"
-
-
-def test_config_timeout_honored():
-    """Test that TIMEOUT config is respected."""
-
-    check_result = subprocess.run(
-        [sys.executable, '-c', 'import requests'],
-        capture_output=True
-    )
-    if check_result.returncode != 0:
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Set very short timeout (but example.com should still succeed)
-        import os
-        env = os.environ.copy()
-        env['TIMEOUT'] = '5'
-
-        result = subprocess.run(
-            [sys.executable, str(FAVICON_HOOK), '--url', TEST_URL, '--snapshot-id', 'testtimeout'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=30
-        )
-
-        # Should complete (success or fail, but not hang)
-        assert result.returncode in (0, 1), "Should complete without hanging"
-
-
-def test_config_user_agent():
-    """Test that USER_AGENT config is used."""
-
-    check_result = subprocess.run(
-        [sys.executable, '-c', 'import requests'],
-        capture_output=True
-    )
-    if check_result.returncode != 0:
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Set custom user agent
-        import os
-        env = os.environ.copy()
-        env['USER_AGENT'] = 'TestBot/1.0'
-
-        result = subprocess.run(
-            [sys.executable, str(FAVICON_HOOK), '--url', TEST_URL, '--snapshot-id', 'testua'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=60
-        )
-
-        # Should succeed (example.com doesn't block)
-        if result.returncode == 0:
-            # Parse clean JSONL output
-            result_json = None
-            for line in result.stdout.strip().split('\n'):
-                line = line.strip()
-                if line.startswith('{'):
-                    pass
-                    try:
-                        record = json.loads(line)
-                        if record.get('type') == 'ArchiveResult':
-                            result_json = record
-                            break
-                    except json.JSONDecodeError:
-                        pass
-
-            if result_json:
-                assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-
-def test_handles_https_urls():
-    """Test that HTTPS URLs work correctly."""
-
-    check_result = subprocess.run(
-        [sys.executable, '-c', 'import requests'],
-        capture_output=True
-    )
-    if check_result.returncode != 0:
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        result = subprocess.run(
-            [sys.executable, str(FAVICON_HOOK), '--url', 'https://example.org', '--snapshot-id', 'testhttps'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            timeout=60
-        )
-
-        if result.returncode == 0:
-            favicon_file = tmpdir / 'favicon.ico'
-            if favicon_file.exists():
-                assert favicon_file.stat().st_size > 0
-
-
-def test_handles_missing_favicon_gracefully():
-    """Test that favicon plugin handles sites without favicons gracefully.
-
-    Note: The plugin falls back to Google's favicon service, which generates
-    a generic icon even if the site doesn't have one, so extraction usually succeeds.
-    """
-
-    check_result = subprocess.run(
-        [sys.executable, '-c', 'import requests'],
-        capture_output=True
-    )
-    if check_result.returncode != 0:
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Try a URL that likely doesn't have a favicon
-        result = subprocess.run(
-            [sys.executable, str(FAVICON_HOOK), '--url', 'https://example.com/nonexistent', '--snapshot-id', 'test404'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            timeout=60
-        )
-
-        # May succeed (Google fallback) or fail gracefully
-        assert result.returncode in (0, 1), "Should complete (may succeed or fail)"
-
-        if result.returncode != 0:
-            combined = result.stdout + result.stderr
-            assert 'No favicon found' in combined or 'ERROR=' in combined
-
-
-def test_reports_missing_requests_library():
-    """Test that script reports error when requests library is missing."""
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Run with PYTHONPATH cleared to simulate missing requests
-        import os
-        env = os.environ.copy()
-        # Keep only minimal PATH, clear PYTHONPATH
-        env['PYTHONPATH'] = '/nonexistent'
-
-        result = subprocess.run(
-            [sys.executable, '-S', str(FAVICON_HOOK), '--url', TEST_URL, '--snapshot-id', 'test123'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env
-        )
-
-        # Should fail and report missing requests
-        if result.returncode != 0:
-            combined = result.stdout + result.stderr
-            # May report missing requests or other import errors
-            assert 'requests' in combined.lower() or 'import' in combined.lower() or 'ERROR=' in combined
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/forumdl/config.json b/archivebox/plugins/forumdl/config.json
deleted file mode 100644
index 9e9ea10a..00000000
--- a/archivebox/plugins/forumdl/config.json
+++ /dev/null
@@ -1,51 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "FORUMDL_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_FORUMDL", "USE_FORUMDL"],
-      "description": "Enable forum downloading with forum-dl"
-    },
-    "FORUMDL_BINARY": {
-      "type": "string",
-      "default": "forum-dl",
-      "description": "Path to forum-dl binary"
-    },
-    "FORUMDL_TIMEOUT": {
-      "type": "integer",
-      "default": 3600,
-      "minimum": 30,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for forum downloads in seconds"
-    },
-    "FORUMDL_OUTPUT_FORMAT": {
-      "type": "string",
-      "default": "jsonl",
-      "enum": ["jsonl", "warc", "mbox", "maildir", "mh", "mmdf", "babyl"],
-      "description": "Output format for forum downloads"
-    },
-    "FORUMDL_CHECK_SSL_VALIDITY": {
-      "type": "boolean",
-      "default": true,
-      "x-fallback": "CHECK_SSL_VALIDITY",
-      "description": "Whether to verify SSL certificates"
-    },
-    "FORUMDL_ARGS": {
-      "type": "array",
-      "items": {"type": "string"},
-      "default": [],
-      "x-aliases": ["FORUMDL_DEFAULT_ARGS"],
-      "description": "Default forum-dl arguments"
-    },
-    "FORUMDL_ARGS_EXTRA": {
-      "type": "array",
-      "items": {"type": "string"},
-      "default": [],
-      "x-aliases": ["FORUMDL_EXTRA_ARGS"],
-      "description": "Extra arguments to append to forum-dl command"
-    }
-  }
-}
diff --git a/archivebox/plugins/forumdl/forum-dl-wrapper.py b/archivebox/plugins/forumdl/forum-dl-wrapper.py
deleted file mode 100755
index 2b53ca99..00000000
--- a/archivebox/plugins/forumdl/forum-dl-wrapper.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/usr/bin/env python3
-"""
-Wrapper for forum-dl that applies Pydantic v2 compatibility patches.
-
-This wrapper fixes forum-dl 0.3.0's incompatibility with Pydantic v2 by monkey-patching
-the JsonlWriter class to use model_dump_json() instead of the deprecated json(models_as_dict=False).
-"""
-
-import sys
-
-# Apply Pydantic v2 compatibility patch BEFORE importing forum_dl
-try:
-    from forum_dl.writers.jsonl import JsonlWriter
-    from pydantic import BaseModel
-
-    # Check if we're using Pydantic v2
-    if hasattr(BaseModel, 'model_dump_json'):
-        def _patched_serialize_entry(self, entry):
-            """Use Pydantic v2's model_dump_json() instead of deprecated json(models_as_dict=False)"""
-            return entry.model_dump_json()
-
-        JsonlWriter._serialize_entry = _patched_serialize_entry
-except (ImportError, AttributeError):
-    # forum-dl not installed or already compatible - no patch needed
-    pass
-
-# Now import and run forum-dl's main function
-from forum_dl import main
-
-if __name__ == '__main__':
-    sys.exit(main())
diff --git a/archivebox/plugins/forumdl/on_Crawl__25_forumdl_install.py b/archivebox/plugins/forumdl/on_Crawl__25_forumdl_install.py
deleted file mode 100755
index b30ca715..00000000
--- a/archivebox/plugins/forumdl/on_Crawl__25_forumdl_install.py
+++ /dev/null
@@ -1,81 +0,0 @@
-#!/usr/bin/env python3
-"""
-Emit forum-dl Binary dependency for the crawl.
-"""
-
-import json
-import os
-import sys
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def output_binary(name: str, binproviders: str, overrides: dict | None = None):
-    """Output Binary JSONL record for a dependency."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,
-        'machine_id': machine_id,
-    }
-    if overrides:
-        record['overrides'] = overrides
-    print(json.dumps(record))
-
-
-def main():
-    forumdl_enabled = get_env_bool('FORUMDL_ENABLED', True)
-
-    if not forumdl_enabled:
-        sys.exit(0)
-
-    output_binary(
-        name='forum-dl',
-        binproviders='pip,env',
-        overrides={
-            'pip': {
-                'packages': [
-                    '--no-deps',
-                    '--prefer-binary',
-                    'forum-dl',
-                    'chardet==5.2.0',
-                    'pydantic',
-                    'pydantic-core',
-                    'typing-extensions',
-                    'annotated-types',
-                    'typing-inspection',
-                    'beautifulsoup4',
-                    'soupsieve',
-                    'lxml',
-                    'requests',
-                    'urllib3',
-                    'certifi',
-                    'idna',
-                    'charset-normalizer',
-                    'tenacity',
-                    'python-dateutil',
-                    'six',
-                    'html2text',
-                    'warcio',
-                ]
-            }
-        },
-    )
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/forumdl/on_Snapshot__04_forumdl.bg.py b/archivebox/plugins/forumdl/on_Snapshot__04_forumdl.bg.py
deleted file mode 100755
index d19e7e16..00000000
--- a/archivebox/plugins/forumdl/on_Snapshot__04_forumdl.bg.py
+++ /dev/null
@@ -1,266 +0,0 @@
-#!/usr/bin/env python3
-"""
-Download forum content from a URL using forum-dl.
-
-Usage: on_Snapshot__04_forumdl.bg.py --url=<url> --snapshot-id=<uuid>
-Output: Downloads forum content to $PWD/
-
-Environment variables:
-    FORUMDL_ENABLED: Enable forum downloading (default: True)
-    FORUMDL_BINARY: Path to forum-dl binary (default: forum-dl)
-    FORUMDL_TIMEOUT: Timeout in seconds (x-fallback: TIMEOUT)
-    FORUMDL_OUTPUT_FORMAT: Output format (default: jsonl)
-    FORUMDL_CHECK_SSL_VALIDITY: Whether to verify SSL certs (x-fallback: CHECK_SSL_VALIDITY)
-    FORUMDL_ARGS: Default forum-dl arguments (JSON array)
-    FORUMDL_ARGS_EXTRA: Extra arguments to append (JSON array)
-"""
-
-import json
-import os
-import shutil
-import subprocess
-import sys
-import threading
-from pathlib import Path
-
-import rich_click as click
-
-
-# Monkey patch forum-dl for Pydantic v2 compatibility
-# forum-dl 0.3.0 uses deprecated json(models_as_dict=False) which doesn't work in Pydantic v2
-try:
-    from forum_dl.writers.jsonl import JsonlWriter
-    from pydantic import BaseModel
-
-    # Check if we're using Pydantic v2 (has model_dump_json)
-    if hasattr(BaseModel, 'model_dump_json'):
-        # Patch JsonlWriter to use Pydantic v2 API
-        original_serialize = JsonlWriter._serialize_entry
-
-        def _patched_serialize_entry(self, entry):
-            # Use Pydantic v2's model_dump_json() instead of deprecated json(models_as_dict=False)
-            return entry.model_dump_json()
-
-        JsonlWriter._serialize_entry = _patched_serialize_entry
-except (ImportError, AttributeError):
-    # forum-dl not installed or already compatible
-    pass
-
-
-# Extractor metadata
-PLUGIN_NAME = 'forumdl'
-BIN_NAME = 'forum-dl'
-BIN_PROVIDERS = 'pip,env'
-OUTPUT_DIR = '.'
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def get_env_int(name: str, default: int = 0) -> int:
-    try:
-        return int(get_env(name, str(default)))
-    except ValueError:
-        return default
-
-
-def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
-    """Parse a JSON array from environment variable."""
-    val = get_env(name, '')
-    if not val:
-        return default if default is not None else []
-    try:
-        result = json.loads(val)
-        if isinstance(result, list):
-            return [str(item) for item in result]
-        return default if default is not None else []
-    except json.JSONDecodeError:
-        return default if default is not None else []
-
-
-def get_binary_shebang(binary_path: str) -> str | None:
-    """Return interpreter from shebang line if present (e.g., /path/to/python)."""
-    try:
-        with open(binary_path, 'r', encoding='utf-8') as f:
-            first_line = f.readline().strip()
-            if first_line.startswith('#!'):
-                return first_line[2:].strip().split(' ')[0]
-    except Exception:
-        pass
-    return None
-
-
-def resolve_binary_path(binary: str) -> str | None:
-    """Resolve binary to an absolute path if possible."""
-    if not binary:
-        return None
-    if Path(binary).is_file():
-        return binary
-    return shutil.which(binary)
-
-
-
-def save_forum(url: str, binary: str) -> tuple[bool, str | None, str]:
-    """
-    Download forum using forum-dl.
-
-    Returns: (success, output_path, error_message)
-    """
-    # Get config from env (with FORUMDL_ prefix, x-fallback handled by config loader)
-    timeout = get_env_int('FORUMDL_TIMEOUT') or get_env_int('TIMEOUT', 3600)
-    check_ssl = get_env_bool('FORUMDL_CHECK_SSL_VALIDITY', True) if get_env('FORUMDL_CHECK_SSL_VALIDITY') else get_env_bool('CHECK_SSL_VALIDITY', True)
-    forumdl_args = get_env_array('FORUMDL_ARGS', [])
-    forumdl_args_extra = get_env_array('FORUMDL_ARGS_EXTRA', [])
-    output_format = get_env('FORUMDL_OUTPUT_FORMAT', 'jsonl')
-
-    # Output directory is current directory (hook already runs in output dir)
-    output_dir = Path(OUTPUT_DIR)
-
-    # Build output filename based on format
-    if output_format == 'warc':
-        output_file = output_dir / 'forum.warc.gz'
-    elif output_format == 'jsonl':
-        output_file = output_dir / 'forum.jsonl'
-    elif output_format == 'maildir':
-        output_file = output_dir / 'forum'  # maildir is a directory
-    elif output_format in ('mbox', 'mh', 'mmdf', 'babyl'):
-        output_file = output_dir / f'forum.{output_format}'
-    else:
-        output_file = output_dir / f'forum.{output_format}'
-
-    # Use our Pydantic v2 compatible wrapper if available, otherwise fall back to binary
-    wrapper_path = Path(__file__).parent / 'forum-dl-wrapper.py'
-    resolved_binary = resolve_binary_path(binary) or binary
-    if wrapper_path.exists():
-        forumdl_python = get_binary_shebang(resolved_binary) or sys.executable
-        cmd = [forumdl_python, str(wrapper_path), *forumdl_args, '-f', output_format, '-o', str(output_file)]
-    else:
-        cmd = [resolved_binary, *forumdl_args, '-f', output_format, '-o', str(output_file)]
-
-    if not check_ssl:
-        cmd.append('--no-check-certificate')
-
-    if forumdl_args_extra:
-        cmd.extend(forumdl_args_extra)
-
-    cmd.append(url)
-
-    try:
-        print(f'[forumdl] Starting download (timeout={timeout}s)', file=sys.stderr)
-        output_lines: list[str] = []
-        process = subprocess.Popen(
-            cmd,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            text=True,
-            bufsize=1,
-        )
-
-        def _read_output() -> None:
-            if not process.stdout:
-                return
-            for line in process.stdout:
-                output_lines.append(line)
-                sys.stderr.write(line)
-
-        reader = threading.Thread(target=_read_output, daemon=True)
-        reader.start()
-
-        try:
-            process.wait(timeout=timeout)
-        except subprocess.TimeoutExpired:
-            process.kill()
-            reader.join(timeout=1)
-            return False, None, f'Timed out after {timeout} seconds'
-
-        reader.join(timeout=1)
-        combined_output = ''.join(output_lines)
-
-        # Check if output file was created
-        if output_file.exists() and output_file.stat().st_size > 0:
-            return True, str(output_file), ''
-        else:
-            stderr = combined_output
-
-            # These are NOT errors - page simply has no downloadable forum content
-            stderr_lower = stderr.lower()
-            if 'unsupported url' in stderr_lower:
-                return True, None, ''  # Not a forum site - success, no output
-            if 'no content' in stderr_lower:
-                return True, None, ''  # No forum found - success, no output
-            if 'extractornotfounderror' in stderr_lower:
-                return True, None, ''  # No forum extractor for this URL - success, no output
-            if process.returncode == 0:
-                return True, None, ''  # forum-dl exited cleanly, just no forum - success
-
-            # These ARE errors - something went wrong
-            if '404' in stderr:
-                return False, None, '404 Not Found'
-            if '403' in stderr:
-                return False, None, '403 Forbidden'
-            if 'unable to extract' in stderr_lower:
-                return False, None, 'Unable to extract forum info'
-
-            return False, None, f'forum-dl error: {stderr}'
-
-    except subprocess.TimeoutExpired:
-        return False, None, f'Timed out after {timeout} seconds'
-    except Exception as e:
-        return False, None, f'{type(e).__name__}: {e}'
-
-
-@click.command()
-@click.option('--url', required=True, help='URL to download forum from')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
-def main(url: str, snapshot_id: str):
-    """Download forum content from a URL using forum-dl."""
-
-    output = None
-    status = 'failed'
-    error = ''
-
-    try:
-        # Check if forum-dl is enabled
-        if not get_env_bool('FORUMDL_ENABLED', True):
-            print('Skipping forum-dl (FORUMDL_ENABLED=False)', file=sys.stderr)
-            # Temporary failure (config disabled) - NO JSONL emission
-            sys.exit(0)
-
-        # Get binary from environment
-        binary = get_env('FORUMDL_BINARY', 'forum-dl')
-
-        # Run extraction
-        success, output, error = save_forum(url, binary)
-
-        if success:
-            # Success - emit ArchiveResult
-            result = {
-                'type': 'ArchiveResult',
-                'status': 'succeeded',
-                'output_str': output or ''
-            }
-            print(json.dumps(result))
-            sys.exit(0)
-        else:
-            # Transient error - emit NO JSONL
-            print(f'ERROR: {error}', file=sys.stderr)
-            sys.exit(1)
-
-    except Exception as e:
-        # Transient error - emit NO JSONL
-        print(f'ERROR: {type(e).__name__}: {e}', file=sys.stderr)
-        sys.exit(1)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/forumdl/templates/card.html b/archivebox/plugins/forumdl/templates/card.html
deleted file mode 100644
index 24000949..00000000
--- a/archivebox/plugins/forumdl/templates/card.html
+++ /dev/null
@@ -1,7 +0,0 @@
-<!-- Forum thumbnail - shows icon placeholder -->
-<div class="extractor-thumbnail forumdl-thumbnail" style="width: 100%; height: 100px; overflow: hidden; background: #1a1a1a; display: flex; align-items: center; justify-content: center;">
-    <div style="display: flex; flex-direction: column; align-items: center; color: #888; font-size: 12px;">
-        <span style="font-size: 32px;">💬</span>
-        <span>Forum</span>
-    </div>
-</div>
diff --git a/archivebox/plugins/forumdl/templates/full.html b/archivebox/plugins/forumdl/templates/full.html
deleted file mode 100644
index 85413866..00000000
--- a/archivebox/plugins/forumdl/templates/full.html
+++ /dev/null
@@ -1,147 +0,0 @@
-<!-- Fullscreen forum view - renders JSONL forum posts -->
-<!DOCTYPE html>
-<html>
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Forum Thread</title>
-    <style>
-        body {
-            margin: 0;
-            padding: 20px;
-            background: #0d1117;
-            color: #c9d1d9;
-            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif;
-            line-height: 1.6;
-        }
-        .header {
-            max-width: 1000px;
-            margin: 0 auto 30px;
-            text-align: center;
-            padding: 20px;
-            border-bottom: 1px solid #30363d;
-        }
-        .icon {
-            font-size: 48px;
-            margin-bottom: 10px;
-        }
-        h1 {
-            margin: 0;
-            font-size: 28px;
-            color: #f0f6fc;
-        }
-        .container {
-            max-width: 1000px;
-            margin: 0 auto;
-        }
-        .post {
-            background: #161b22;
-            border: 1px solid #30363d;
-            border-radius: 6px;
-            margin-bottom: 16px;
-            padding: 16px;
-            transition: border-color 0.2s;
-        }
-        .post:hover {
-            border-color: #58a6ff;
-        }
-        .post-header {
-            display: flex;
-            justify-content: space-between;
-            align-items: center;
-            margin-bottom: 12px;
-            padding-bottom: 12px;
-            border-bottom: 1px solid #21262d;
-        }
-        .post-author {
-            font-weight: 600;
-            color: #58a6ff;
-            font-size: 14px;
-        }
-        .post-date {
-            color: #8b949e;
-            font-size: 12px;
-        }
-        .post-title {
-            margin: 0 0 12px 0;
-            font-size: 18px;
-            font-weight: 600;
-            color: #f0f6fc;
-        }
-        .post-content {
-            color: #c9d1d9;
-            word-wrap: break-word;
-        }
-        .post-content img {
-            max-width: 100%;
-            height: auto;
-            border-radius: 4px;
-        }
-        .post-content a {
-            color: #58a6ff;
-            text-decoration: none;
-        }
-        .post-content a:hover {
-            text-decoration: underline;
-        }
-        .loading {
-            text-align: center;
-            padding: 40px;
-            color: #8b949e;
-        }
-    </style>
-</head>
-<body>
-    <div class="header">
-        <div class="icon">💬</div>
-        <h1>Forum Thread</h1>
-    </div>
-    <div class="container">
-        <div id="forum-posts" class="loading">Loading posts...</div>
-    </div>
-    <script>
-        (async function() {
-            try {
-                const response = await fetch('{{ output_path }}');
-                const text = await response.text();
-                const posts = text.trim().split('\n').filter(line => line).map(line => JSON.parse(line));
-                const container = document.getElementById('forum-posts');
-                container.innerHTML = '';
-                container.className = '';
-
-                posts.forEach(post => {
-                    const postDiv = document.createElement('div');
-                    postDiv.className = 'post';
-
-                    const author = post.author || 'Anonymous';
-                    const date = post.date ? new Date(post.date).toLocaleString() : '';
-                    const title = post.title || '';
-                    const content = post.content || post.body || '';
-
-                    postDiv.innerHTML = `
-                        <div class="post-header">
-                            <span class="post-author">${escapeHtml(author)}</span>
-                            <span class="post-date">${escapeHtml(date)}</span>
-                        </div>
-                        ${title ? `<h2 class="post-title">${escapeHtml(title)}</h2>` : ''}
-                        <div class="post-content">${content}</div>
-                    `;
-                    container.appendChild(postDiv);
-                });
-
-                if (posts.length === 0) {
-                    container.innerHTML = '<div class="loading">No posts found</div>';
-                }
-            } catch(e) {
-                document.getElementById('forum-posts').innerHTML = '<div class="loading">Error loading posts: ' + e.message + '</div>';
-            }
-        })();
-
-        function escapeHtml(text) {
-            const div = document.createElement('div');
-            div.textContent = text;
-            return div.innerHTML;
-        }
-    </script>
-</body>
-</html>
diff --git a/archivebox/plugins/forumdl/templates/icon.html b/archivebox/plugins/forumdl/templates/icon.html
deleted file mode 100644
index 01cace0d..00000000
--- a/archivebox/plugins/forumdl/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--forumdl" title="Forum"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M4 5h16v10H7l-3 3V5z"/></svg></span>
diff --git a/archivebox/plugins/forumdl/tests/test_forumdl.py b/archivebox/plugins/forumdl/tests/test_forumdl.py
deleted file mode 100644
index 18a692c9..00000000
--- a/archivebox/plugins/forumdl/tests/test_forumdl.py
+++ /dev/null
@@ -1,317 +0,0 @@
-"""
-Integration tests for forumdl plugin
-
-Tests verify:
-    pass
-1. Hook script exists
-2. Dependencies installed via validation hooks
-3. Verify deps with abx-pkg
-4. Forum extraction works on forum URLs
-5. JSONL output is correct
-6. Config options work
-7. Handles non-forum URLs gracefully
-"""
-
-import json
-import os
-import subprocess
-import sys
-import tempfile
-import time
-import uuid
-from pathlib import Path
-import pytest
-
-PLUGIN_DIR = Path(__file__).parent.parent
-PLUGINS_ROOT = PLUGIN_DIR.parent
-FORUMDL_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_forumdl.*'), None)
-TEST_URL = 'https://example.com'
-
-# Module-level cache for binary path
-_forumdl_binary_path = None
-_forumdl_lib_root = None
-
-def get_forumdl_binary_path():
-    """Get the installed forum-dl binary path from cache or by running installation."""
-    global _forumdl_binary_path
-    if _forumdl_binary_path:
-        return _forumdl_binary_path
-
-    # Try to find forum-dl binary using abx-pkg
-    from abx_pkg import Binary, PipProvider, EnvProvider, BinProviderOverrides
-
-    try:
-        binary = Binary(
-            name='forum-dl',
-            binproviders=[PipProvider(), EnvProvider()]
-        ).load()
-
-        if binary and binary.abspath:
-            _forumdl_binary_path = str(binary.abspath)
-            return _forumdl_binary_path
-    except Exception:
-        pass
-
-    # If not found, try to install via pip using the crawl hook overrides
-    pip_hook = PLUGINS_ROOT / 'pip' / 'on_Binary__11_pip_install.py'
-    crawl_hook = PLUGIN_DIR / 'on_Crawl__25_forumdl_install.py'
-    if pip_hook.exists():
-        binary_id = str(uuid.uuid4())
-        machine_id = str(uuid.uuid4())
-        overrides = None
-
-        if crawl_hook.exists():
-            crawl_result = subprocess.run(
-                [sys.executable, str(crawl_hook)],
-                capture_output=True,
-                text=True,
-                timeout=30,
-            )
-            for crawl_line in crawl_result.stdout.strip().split('\n'):
-                if crawl_line.strip().startswith('{'):
-                    try:
-                        crawl_record = json.loads(crawl_line)
-                        if crawl_record.get('type') == 'Binary' and crawl_record.get('name') == 'forum-dl':
-                            overrides = crawl_record.get('overrides')
-                            break
-                    except json.JSONDecodeError:
-                        continue
-
-        # Create a persistent temp LIB_DIR for the pip provider
-        import platform
-        global _forumdl_lib_root
-        if not _forumdl_lib_root:
-            _forumdl_lib_root = tempfile.mkdtemp(prefix='forumdl-lib-')
-        machine = platform.machine().lower()
-        system = platform.system().lower()
-        if machine in ('arm64', 'aarch64'):
-            machine = 'arm64'
-        elif machine in ('x86_64', 'amd64'):
-            machine = 'x86_64'
-        machine_type = f"{machine}-{system}"
-        lib_dir = Path(_forumdl_lib_root) / 'lib' / machine_type
-        lib_dir.mkdir(parents=True, exist_ok=True)
-        env = os.environ.copy()
-        env['LIB_DIR'] = str(lib_dir)
-        env['DATA_DIR'] = str(Path(_forumdl_lib_root) / 'data')
-
-        cmd = [
-            sys.executable, str(pip_hook),
-            '--binary-id', binary_id,
-            '--machine-id', machine_id,
-            '--name', 'forum-dl'
-        ]
-        if overrides:
-            cmd.append(f'--overrides={json.dumps(overrides)}')
-
-        install_result = subprocess.run(
-            cmd,
-            capture_output=True,
-            text=True,
-            timeout=300,
-            env=env,
-        )
-
-        # Parse Binary from pip installation
-        for install_line in install_result.stdout.strip().split('\n'):
-            if install_line.strip():
-                try:
-                    install_record = json.loads(install_line)
-                    if install_record.get('type') == 'Binary' and install_record.get('name') == 'forum-dl':
-                        _forumdl_binary_path = install_record.get('abspath')
-                        return _forumdl_binary_path
-                except json.JSONDecodeError:
-                    pass
-
-    return None
-
-
-def test_hook_script_exists():
-    """Verify on_Snapshot hook exists."""
-    assert FORUMDL_HOOK.exists(), f"Hook not found: {FORUMDL_HOOK}"
-
-
-def test_verify_deps_with_abx_pkg():
-    """Verify forum-dl is installed by calling the REAL installation hooks."""
-    binary_path = get_forumdl_binary_path()
-    if not binary_path:
-        assert False, (
-            "forum-dl installation failed. Install hook should install forum-dl automatically. "
-            "Note: forum-dl has a dependency on cchardet which may not compile on Python 3.14+ "
-            "due to removed longintrepr.h header."
-        )
-    assert Path(binary_path).is_file(), f"Binary path must be a valid file: {binary_path}"
-
-
-def test_handles_non_forum_url():
-    """Test that forum-dl extractor handles non-forum URLs gracefully via hook."""
-    import os
-
-    binary_path = get_forumdl_binary_path()
-    if not binary_path:
-        pass
-    assert Path(binary_path).is_file(), f"Binary must be a valid file: {binary_path}"
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        env = os.environ.copy()
-        env['FORUMDL_BINARY'] = binary_path
-
-        # Run forum-dl extraction hook on non-forum URL
-        result = subprocess.run(
-            [sys.executable, str(FORUMDL_HOOK), '--url', 'https://example.com', '--snapshot-id', 'test789'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=60
-        )
-
-        # Should exit 0 even for non-forum URL (graceful handling)
-        assert result.returncode == 0, f"Should handle non-forum URL gracefully: {result.stderr}"
-
-        # Parse clean JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                pass
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed even for non-forum URL: {result_json}"
-
-
-def test_config_save_forumdl_false_skips():
-    """Test that FORUMDL_ENABLED=False exits without emitting JSONL."""
-    import os
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        env = os.environ.copy()
-        env['FORUMDL_ENABLED'] = 'False'
-
-        result = subprocess.run(
-            [sys.executable, str(FORUMDL_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=30
-        )
-
-        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
-
-        # Feature disabled - temporary failure, should NOT emit JSONL
-        assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
-
-        # Should NOT emit any JSONL
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
-
-
-def test_config_timeout():
-    """Test that FORUMDL_TIMEOUT config is respected."""
-    import os
-
-    binary_path = get_forumdl_binary_path()
-    if not binary_path:
-        pass
-    assert Path(binary_path).is_file(), f"Binary must be a valid file: {binary_path}"
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        env = os.environ.copy()
-        env['FORUMDL_BINARY'] = binary_path
-        env['FORUMDL_TIMEOUT'] = '5'
-
-        start_time = time.time()
-        result = subprocess.run(
-            [sys.executable, str(FORUMDL_HOOK), '--url', 'https://example.com', '--snapshot-id', 'testtimeout'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=10  # Should complete in 5s, use 10s as safety margin
-        )
-        elapsed_time = time.time() - start_time
-
-        assert result.returncode == 0, f"Should complete without hanging: {result.stderr}"
-        # Allow 1 second overhead for subprocess startup and Python interpreter
-        assert elapsed_time <= 6.0, f"Should complete within 6 seconds (5s timeout + 1s overhead), took {elapsed_time:.2f}s"
-
-
-def test_real_forum_url():
-    """Test that forum-dl extracts content from a real HackerNews thread with jsonl output.
-
-    Uses our Pydantic v2 compatible wrapper to fix forum-dl 0.3.0's incompatibility.
-    """
-    import os
-
-    binary_path = get_forumdl_binary_path()
-    assert binary_path, "forum-dl binary not available"
-    assert Path(binary_path).is_file(), f"Binary must be a valid file: {binary_path}"
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Use HackerNews - one of the most reliable forum-dl extractors
-        forum_url = 'https://news.ycombinator.com/item?id=1'
-
-        env = os.environ.copy()
-        env['FORUMDL_BINARY'] = binary_path
-        env['FORUMDL_TIMEOUT'] = '60'
-        env['FORUMDL_OUTPUT_FORMAT'] = 'jsonl'  # Use jsonl format
-        # HTML output could be added via: env['FORUMDL_ARGS_EXTRA'] = json.dumps(['--files-output', './files'])
-
-        start_time = time.time()
-        result = subprocess.run(
-            [sys.executable, str(FORUMDL_HOOK), '--url', forum_url, '--snapshot-id', 'testforum'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=90
-        )
-        elapsed_time = time.time() - start_time
-
-        # Should succeed with our Pydantic v2 wrapper
-        assert result.returncode == 0, f"Should extract forum successfully: {result.stderr}"
-
-        # Parse JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, f"Should have ArchiveResult JSONL output. stdout: {result.stdout}"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-        # Check that forum files were downloaded
-        output_files = list(tmpdir.glob('**/*'))
-        forum_files = [f for f in output_files if f.is_file()]
-
-        assert len(forum_files) > 0, f"Should have downloaded at least one forum file. Files: {output_files}"
-
-        # Verify the JSONL file has content
-        jsonl_file = tmpdir / 'forum.jsonl'
-        assert jsonl_file.exists(), "Should have created forum.jsonl"
-        assert jsonl_file.stat().st_size > 0, "forum.jsonl should not be empty"
-
-        print(f"Successfully extracted {len(forum_files)} file(s) in {elapsed_time:.2f}s")
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/gallerydl/config.json b/archivebox/plugins/gallerydl/config.json
deleted file mode 100644
index 522a4b22..00000000
--- a/archivebox/plugins/gallerydl/config.json
+++ /dev/null
@@ -1,54 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "GALLERYDL_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_GALLERYDL", "USE_GALLERYDL"],
-      "description": "Enable gallery downloading with gallery-dl"
-    },
-    "GALLERYDL_BINARY": {
-      "type": "string",
-      "default": "gallery-dl",
-      "description": "Path to gallery-dl binary"
-    },
-    "GALLERYDL_TIMEOUT": {
-      "type": "integer",
-      "default": 3600,
-      "minimum": 30,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for gallery downloads in seconds"
-    },
-    "GALLERYDL_COOKIES_FILE": {
-      "type": "string",
-      "default": "",
-      "x-fallback": "COOKIES_FILE",
-      "description": "Path to cookies file"
-    },
-    "GALLERYDL_CHECK_SSL_VALIDITY": {
-      "type": "boolean",
-      "default": true,
-      "x-fallback": "CHECK_SSL_VALIDITY",
-      "description": "Whether to verify SSL certificates"
-    },
-    "GALLERYDL_ARGS": {
-      "type": "array",
-      "items": {"type": "string"},
-      "default": [
-        "--write-metadata",
-        "--write-info-json"
-      ],
-      "x-aliases": ["GALLERYDL_DEFAULT_ARGS"],
-      "description": "Default gallery-dl arguments"
-    },
-    "GALLERYDL_ARGS_EXTRA": {
-      "type": "array",
-      "items": {"type": "string"},
-      "default": [],
-      "x-aliases": ["GALLERYDL_EXTRA_ARGS"],
-      "description": "Extra arguments to append to gallery-dl command"
-    }
-  }
-}
diff --git a/archivebox/plugins/gallerydl/on_Crawl__20_gallerydl_install.py b/archivebox/plugins/gallerydl/on_Crawl__20_gallerydl_install.py
deleted file mode 100755
index 06d95f4d..00000000
--- a/archivebox/plugins/gallerydl/on_Crawl__20_gallerydl_install.py
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/usr/bin/env python3
-"""
-Emit gallery-dl Binary dependency for the crawl.
-"""
-
-import json
-import os
-import sys
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def output_binary(name: str, binproviders: str):
-    """Output Binary JSONL record for a dependency."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,
-        'machine_id': machine_id,
-    }
-    print(json.dumps(record))
-
-
-def main():
-    gallerydl_enabled = get_env_bool('GALLERYDL_ENABLED', True)
-
-    if not gallerydl_enabled:
-        sys.exit(0)
-
-    output_binary(name='gallery-dl', binproviders='pip,brew,apt,env')
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/gallerydl/on_Snapshot__03_gallerydl.bg.py b/archivebox/plugins/gallerydl/on_Snapshot__03_gallerydl.bg.py
deleted file mode 100755
index fc5d951c..00000000
--- a/archivebox/plugins/gallerydl/on_Snapshot__03_gallerydl.bg.py
+++ /dev/null
@@ -1,261 +0,0 @@
-#!/usr/bin/env python3
-"""
-Download image galleries from a URL using gallery-dl.
-
-Usage: on_Snapshot__03_gallerydl.bg.py --url=<url> --snapshot-id=<uuid>
-Output: Downloads gallery images to $PWD/gallerydl/
-
-Environment variables:
-    GALLERYDL_ENABLED: Enable gallery-dl gallery extraction (default: True)
-    GALLERYDL_BINARY: Path to gallery-dl binary (default: gallery-dl)
-    GALLERYDL_TIMEOUT: Timeout in seconds (x-fallback: TIMEOUT)
-    GALLERYDL_COOKIES_FILE: Path to cookies file (x-fallback: COOKIES_FILE)
-    GALLERYDL_CHECK_SSL_VALIDITY: Whether to verify SSL certs (x-fallback: CHECK_SSL_VALIDITY)
-    GALLERYDL_ARGS: Default gallery-dl arguments (JSON array)
-    GALLERYDL_ARGS_EXTRA: Extra arguments to append (JSON array)
-"""
-
-import json
-import os
-import subprocess
-import sys
-import threading
-from pathlib import Path
-
-import rich_click as click
-
-
-# Extractor metadata
-PLUGIN_NAME = 'gallerydl'
-BIN_NAME = 'gallery-dl'
-BIN_PROVIDERS = 'pip,env'
-OUTPUT_DIR = '.'
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def get_env_int(name: str, default: int = 0) -> int:
-    try:
-        return int(get_env(name, str(default)))
-    except ValueError:
-        return default
-
-
-def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
-    """Parse a JSON array from environment variable."""
-    val = get_env(name, '')
-    if not val:
-        return default if default is not None else []
-    try:
-        result = json.loads(val)
-        if isinstance(result, list):
-            return [str(item) for item in result]
-        return default if default is not None else []
-    except json.JSONDecodeError:
-        return default if default is not None else []
-
-
-STATICFILE_DIR = '../staticfile'
-
-def has_staticfile_output() -> bool:
-    """Check if staticfile extractor already downloaded this URL."""
-    staticfile_dir = Path(STATICFILE_DIR)
-    if not staticfile_dir.exists():
-        return False
-    stdout_log = staticfile_dir / 'stdout.log'
-    if not stdout_log.exists():
-        return False
-    for line in stdout_log.read_text(errors='ignore').splitlines():
-        line = line.strip()
-        if not line.startswith('{'):
-            continue
-        try:
-            record = json.loads(line)
-        except json.JSONDecodeError:
-            continue
-        if record.get('type') == 'ArchiveResult' and record.get('status') == 'succeeded':
-            return True
-    return False
-
-
-def save_gallery(url: str, binary: str) -> tuple[bool, str | None, str]:
-    """
-    Download gallery using gallery-dl.
-
-    Returns: (success, output_path, error_message)
-    """
-    # Get config from env (with GALLERYDL_ prefix, x-fallback handled by config loader)
-    timeout = get_env_int('GALLERYDL_TIMEOUT') or get_env_int('TIMEOUT', 3600)
-    check_ssl = get_env_bool('GALLERYDL_CHECK_SSL_VALIDITY', True) if get_env('GALLERYDL_CHECK_SSL_VALIDITY') else get_env_bool('CHECK_SSL_VALIDITY', True)
-    gallerydl_args = get_env_array('GALLERYDL_ARGS', [])
-    gallerydl_args_extra = get_env_array('GALLERYDL_ARGS_EXTRA', [])
-    cookies_file = get_env('GALLERYDL_COOKIES_FILE') or get_env('COOKIES_FILE', '')
-
-    # Output directory is current directory (hook already runs in output dir)
-    output_dir = Path(OUTPUT_DIR)
-
-    # Build command
-    # Use -D for exact directory (flat structure) instead of -d (nested structure)
-    cmd = [
-        binary,
-        *gallerydl_args,
-        '-D', str(output_dir),
-    ]
-
-    if not check_ssl:
-        cmd.append('--no-check-certificate')
-
-    if cookies_file and Path(cookies_file).exists():
-        cmd.extend(['-C', cookies_file])
-
-    if gallerydl_args_extra:
-        cmd.extend(gallerydl_args_extra)
-
-    cmd.append(url)
-
-    try:
-        print(f'[gallerydl] Starting download (timeout={timeout}s)', file=sys.stderr)
-        output_lines: list[str] = []
-        process = subprocess.Popen(
-            cmd,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            text=True,
-            bufsize=1,
-        )
-
-        def _read_output() -> None:
-            if not process.stdout:
-                return
-            for line in process.stdout:
-                output_lines.append(line)
-                sys.stderr.write(line)
-
-        reader = threading.Thread(target=_read_output, daemon=True)
-        reader.start()
-
-        try:
-            process.wait(timeout=timeout)
-        except subprocess.TimeoutExpired:
-            process.kill()
-            reader.join(timeout=1)
-            return False, None, f'Timed out after {timeout} seconds'
-
-        reader.join(timeout=1)
-        combined_output = ''.join(output_lines)
-
-        # Check if any gallery files were downloaded (search recursively)
-        gallery_extensions = (
-            '.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.svg',
-            '.mp4', '.webm', '.mkv', '.avi', '.mov', '.flv',
-            '.json', '.txt', '.zip',
-        )
-
-        downloaded_files = [
-            f for f in output_dir.rglob('*')
-            if f.is_file() and f.suffix.lower() in gallery_extensions
-        ]
-
-        if downloaded_files:
-            # Return first image file, or first file if no images
-            image_files = [
-                f for f in downloaded_files
-                if f.suffix.lower() in ('.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp')
-            ]
-            output = str(image_files[0]) if image_files else str(downloaded_files[0])
-            return True, output, ''
-        else:
-            stderr = combined_output
-
-            # These are NOT errors - page simply has no downloadable gallery
-            # Return success with no output (legitimate "nothing to download")
-            stderr_lower = stderr.lower()
-            if 'unsupported url' in stderr_lower:
-                return True, None, ''  # Not a gallery site - success, no output
-            if 'no results' in stderr_lower:
-                return True, None, ''  # No gallery found - success, no output
-            if process.returncode == 0:
-                return True, None, ''  # gallery-dl exited cleanly, just no gallery - success
-
-            # These ARE errors - something went wrong
-            if '404' in stderr:
-                return False, None, '404 Not Found'
-            if '403' in stderr:
-                return False, None, '403 Forbidden'
-            if 'unable to extract' in stderr_lower:
-                return False, None, 'Unable to extract gallery info'
-
-            return False, None, f'gallery-dl error: {stderr}'
-
-    except subprocess.TimeoutExpired:
-        return False, None, f'Timed out after {timeout} seconds'
-    except Exception as e:
-        return False, None, f'{type(e).__name__}: {e}'
-
-
-@click.command()
-@click.option('--url', required=True, help='URL to download gallery from')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
-def main(url: str, snapshot_id: str):
-    """Download image gallery from a URL using gallery-dl."""
-
-    output = None
-    status = 'failed'
-    error = ''
-
-    try:
-        # Check if gallery-dl is enabled
-        if not get_env_bool('GALLERYDL_ENABLED', True):
-            print('Skipping gallery-dl (GALLERYDL_ENABLED=False)', file=sys.stderr)
-            # Temporary failure (config disabled) - NO JSONL emission
-            sys.exit(0)
-
-        # Check if staticfile extractor already handled this (permanent skip)
-        if has_staticfile_output():
-            print(f'Skipping gallery-dl - staticfile extractor already downloaded this', file=sys.stderr)
-            print(json.dumps({
-                'type': 'ArchiveResult',
-                'status': 'skipped',
-                'output_str': 'staticfile already handled',
-            }))
-            sys.exit(0)
-
-        # Get binary from environment
-        binary = get_env('GALLERYDL_BINARY', 'gallery-dl')
-
-        # Run extraction
-        success, output, error = save_gallery(url, binary)
-
-        if success:
-            # Success - emit ArchiveResult
-            result = {
-                'type': 'ArchiveResult',
-                'status': 'succeeded',
-                'output_str': output or ''
-            }
-            print(json.dumps(result))
-            sys.exit(0)
-        else:
-            # Transient error - emit NO JSONL
-            print(f'ERROR: {error}', file=sys.stderr)
-            sys.exit(1)
-
-    except Exception as e:
-        # Transient error - emit NO JSONL
-        print(f'ERROR: {type(e).__name__}: {e}', file=sys.stderr)
-        sys.exit(1)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/gallerydl/templates/card.html b/archivebox/plugins/gallerydl/templates/card.html
deleted file mode 100644
index 32ea0fe0..00000000
--- a/archivebox/plugins/gallerydl/templates/card.html
+++ /dev/null
@@ -1,11 +0,0 @@
-<!-- Gallery thumbnail - shows first image or placeholder -->
-<div class="extractor-thumbnail gallerydl-thumbnail" style="width: 100%; height: 100px; overflow: hidden; background: #1a1a1a; display: flex; align-items: center; justify-content: center;">
-    <img src="{{ output_path }}"
-         style="width: 100%; height: 100px; object-fit: contain;"
-         alt="Gallery thumbnail"
-         onerror="this.style.display='none'; this.nextElementSibling.style.display='flex';">
-    <div style="display: none; flex-direction: column; align-items: center; color: #888; font-size: 12px;">
-        <span style="font-size: 32px;">🖼️</span>
-        <span>Gallery</span>
-    </div>
-</div>
diff --git a/archivebox/plugins/gallerydl/templates/full.html b/archivebox/plugins/gallerydl/templates/full.html
deleted file mode 100644
index bf06ceb4..00000000
--- a/archivebox/plugins/gallerydl/templates/full.html
+++ /dev/null
@@ -1,28 +0,0 @@
-<!-- Fullscreen gallery view - shows image in full size -->
-<!DOCTYPE html>
-<html>
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Gallery</title>
-    <style>
-        body {
-            margin: 0;
-            padding: 0;
-            background: #000;
-            display: flex;
-            align-items: center;
-            justify-content: center;
-            min-height: 100vh;
-        }
-        img {
-            max-width: 100%;
-            max-height: 100vh;
-            object-fit: contain;
-        }
-    </style>
-</head>
-<body>
-    <img src="{{ output_path }}" alt="Gallery image">
-</body>
-</html>
diff --git a/archivebox/plugins/gallerydl/templates/icon.html b/archivebox/plugins/gallerydl/templates/icon.html
deleted file mode 100644
index a8ef89e7..00000000
--- a/archivebox/plugins/gallerydl/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--gallerydl" title="Gallery"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><rect x="3" y="5" width="18" height="14" rx="2"/><circle cx="8" cy="10" r="1.5" fill="currentColor" stroke="none"/><path d="M21 17l-5-5-5 5"/></svg></span>
diff --git a/archivebox/plugins/gallerydl/tests/test_gallerydl.py b/archivebox/plugins/gallerydl/tests/test_gallerydl.py
deleted file mode 100644
index 7feedb1e..00000000
--- a/archivebox/plugins/gallerydl/tests/test_gallerydl.py
+++ /dev/null
@@ -1,190 +0,0 @@
-"""
-Integration tests for gallerydl plugin
-
-Tests verify:
-    pass
-1. Hook script exists
-2. Dependencies installed via validation hooks
-3. Verify deps with abx-pkg
-4. Gallery extraction works on gallery URLs
-5. JSONL output is correct
-6. Config options work
-7. Handles non-gallery URLs gracefully
-"""
-
-import json
-import subprocess
-import sys
-import tempfile
-import time
-from pathlib import Path
-import pytest
-
-PLUGIN_DIR = Path(__file__).parent.parent
-PLUGINS_ROOT = PLUGIN_DIR.parent
-GALLERYDL_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_gallerydl.*'), None)
-TEST_URL = 'https://example.com'
-
-def test_hook_script_exists():
-    """Verify on_Snapshot hook exists."""
-    assert GALLERYDL_HOOK.exists(), f"Hook not found: {GALLERYDL_HOOK}"
-
-
-def test_verify_deps_with_abx_pkg():
-    """Verify gallery-dl is available via abx-pkg."""
-    from abx_pkg import Binary, PipProvider, EnvProvider, BinProviderOverrides
-
-    missing_binaries = []
-
-    # Verify gallery-dl is available
-    gallerydl_binary = Binary(name='gallery-dl', binproviders=[PipProvider(), EnvProvider()])
-    gallerydl_loaded = gallerydl_binary.load()
-    if not (gallerydl_loaded and gallerydl_loaded.abspath):
-        missing_binaries.append('gallery-dl')
-
-    if missing_binaries:
-        pass
-
-
-def test_handles_non_gallery_url():
-    """Test that gallery-dl extractor handles non-gallery URLs gracefully via hook."""
-    # Prerequisites checked by earlier test
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Run gallery-dl extraction hook on non-gallery URL
-        result = subprocess.run(
-            [sys.executable, str(GALLERYDL_HOOK), '--url', 'https://example.com', '--snapshot-id', 'test789'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            timeout=60
-        )
-
-        # Should exit 0 even for non-gallery URL
-        assert result.returncode == 0, f"Should handle non-gallery URL gracefully: {result.stderr}"
-
-        # Parse clean JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                pass
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-
-def test_config_save_gallery_dl_false_skips():
-    """Test that GALLERYDL_ENABLED=False exits without emitting JSONL."""
-    import os
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        env = os.environ.copy()
-        env['GALLERYDL_ENABLED'] = 'False'
-
-        result = subprocess.run(
-            [sys.executable, str(GALLERYDL_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=30
-        )
-
-        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
-
-        # Feature disabled - temporary failure, should NOT emit JSONL
-        assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
-
-        # Should NOT emit any JSONL
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
-
-
-def test_config_timeout():
-    """Test that GALLERY_DL_TIMEOUT config is respected."""
-    import os
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        env = os.environ.copy()
-        env['GALLERY_DL_TIMEOUT'] = '5'
-
-        start_time = time.time()
-        result = subprocess.run(
-            [sys.executable, str(GALLERYDL_HOOK), '--url', 'https://example.com', '--snapshot-id', 'testtimeout'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=10  # Should complete in 5s, use 10s as safety margin
-        )
-        elapsed_time = time.time() - start_time
-
-        assert result.returncode == 0, f"Should complete without hanging: {result.stderr}"
-        # Allow 1 second overhead for subprocess startup and Python interpreter
-        assert elapsed_time <= 6.0, f"Should complete within 6 seconds (5s timeout + 1s overhead), took {elapsed_time:.2f}s"
-
-
-def test_real_gallery_url():
-    """Test that gallery-dl can extract images from a real Flickr gallery URL."""
-    import os
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Use a real Flickr photo page
-        gallery_url = 'https://www.flickr.com/photos/gregorydolivet/55002388567/in/explore-2025-12-25/'
-
-        env = os.environ.copy()
-        env['GALLERY_DL_TIMEOUT'] = '60'  # Give it time to download
-
-        start_time = time.time()
-        result = subprocess.run(
-            [sys.executable, str(GALLERYDL_HOOK), '--url', gallery_url, '--snapshot-id', 'testflickr'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=90
-        )
-        elapsed_time = time.time() - start_time
-
-        # Should succeed
-        assert result.returncode == 0, f"Should extract gallery successfully: {result.stderr}"
-
-        # Parse JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, f"Should have ArchiveResult JSONL output. stdout: {result.stdout}"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-        # Check that some files were downloaded
-        output_files = list(tmpdir.glob('**/*'))
-        image_files = [f for f in output_files if f.is_file() and f.suffix.lower() in ('.jpg', '.jpeg', '.png', '.gif', '.webp')]
-
-        assert len(image_files) > 0, f"Should have downloaded at least one image. Files: {output_files}"
-
-        print(f"Successfully extracted {len(image_files)} image(s) in {elapsed_time:.2f}s")
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/git/config.json b/archivebox/plugins/git/config.json
deleted file mode 100644
index da0a3b02..00000000
--- a/archivebox/plugins/git/config.json
+++ /dev/null
@@ -1,44 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "GIT_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_GIT", "USE_GIT"],
-      "description": "Enable git repository cloning"
-    },
-    "GIT_BINARY": {
-      "type": "string",
-      "default": "git",
-      "description": "Path to git binary"
-    },
-    "GIT_TIMEOUT": {
-      "type": "integer",
-      "default": 120,
-      "minimum": 10,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for git operations in seconds"
-    },
-    "GIT_DOMAINS": {
-      "type": "string",
-      "default": "github.com,gitlab.com,bitbucket.org,gist.github.com,codeberg.org,gitea.com,git.sr.ht",
-      "description": "Comma-separated list of domains to treat as git repositories"
-    },
-    "GIT_ARGS": {
-      "type": "array",
-      "items": {"type": "string"},
-      "default": ["clone", "--depth=1", "--recursive"],
-      "x-aliases": ["GIT_DEFAULT_ARGS"],
-      "description": "Default git arguments"
-    },
-    "GIT_ARGS_EXTRA": {
-      "type": "array",
-      "items": {"type": "string"},
-      "default": [],
-      "x-aliases": ["GIT_EXTRA_ARGS"],
-      "description": "Extra arguments to append to git command"
-    }
-  }
-}
diff --git a/archivebox/plugins/git/on_Crawl__05_git_install.py b/archivebox/plugins/git/on_Crawl__05_git_install.py
deleted file mode 100755
index e090d546..00000000
--- a/archivebox/plugins/git/on_Crawl__05_git_install.py
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/usr/bin/env python3
-"""
-Emit git Binary dependency for the crawl.
-"""
-
-import json
-import os
-import sys
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def output_binary(name: str, binproviders: str):
-    """Output Binary JSONL record for a dependency."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,
-        'machine_id': machine_id,
-    }
-    print(json.dumps(record))
-
-
-def main():
-    git_enabled = get_env_bool('GIT_ENABLED', True)
-
-    if not git_enabled:
-        sys.exit(0)
-
-    output_binary(name='git', binproviders='apt,brew,env')
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/git/on_Snapshot__05_git.bg.py b/archivebox/plugins/git/on_Snapshot__05_git.bg.py
deleted file mode 100644
index c124ddbe..00000000
--- a/archivebox/plugins/git/on_Snapshot__05_git.bg.py
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/usr/bin/env python3
-"""
-Clone a git repository from a URL.
-
-Usage: on_Snapshot__05_git.bg.py --url=<url> --snapshot-id=<uuid>
-Output: Clones repository to $PWD/repo
-
-Environment variables:
-    GIT_BINARY: Path to git binary
-    GIT_TIMEOUT: Timeout in seconds (default: 120)
-    GIT_ARGS: Default git arguments (JSON array, default: ["clone", "--depth=1", "--recursive"])
-    GIT_ARGS_EXTRA: Extra arguments to append (JSON array, default: [])
-
-    # Fallback to ARCHIVING_CONFIG values if GIT_* not set:
-    TIMEOUT: Fallback timeout
-"""
-
-import json
-import os
-import subprocess
-import sys
-from pathlib import Path
-
-import rich_click as click
-
-
-# Extractor metadata
-PLUGIN_NAME = 'git'
-BIN_NAME = 'git'
-BIN_PROVIDERS = 'apt,brew,env'
-OUTPUT_DIR = '.'
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-
-def get_env_int(name: str, default: int = 0) -> int:
-    try:
-        return int(get_env(name, str(default)))
-    except ValueError:
-        return default
-
-
-def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
-    """Parse a JSON array from environment variable."""
-    val = get_env(name, '')
-    if not val:
-        return default if default is not None else []
-    try:
-        result = json.loads(val)
-        if isinstance(result, list):
-            return [str(item) for item in result]
-        return default if default is not None else []
-    except json.JSONDecodeError:
-        return default if default is not None else []
-
-
-def is_git_url(url: str) -> bool:
-    """Check if URL looks like a git repository."""
-    git_patterns = [
-        '.git',
-        'github.com',
-        'gitlab.com',
-        'bitbucket.org',
-        'git://',
-        'ssh://git@',
-    ]
-    return any(p in url.lower() for p in git_patterns)
-
-
-def clone_git(url: str, binary: str) -> tuple[bool, str | None, str]:
-    """
-    Clone git repository.
-
-    Returns: (success, output_path, error_message)
-    """
-    timeout = get_env_int('GIT_TIMEOUT') or get_env_int('TIMEOUT', 120)
-    git_args = get_env_array('GIT_ARGS', ["clone", "--depth=1", "--recursive"])
-    git_args_extra = get_env_array('GIT_ARGS_EXTRA', [])
-
-    cmd = [binary, *git_args, *git_args_extra, url, OUTPUT_DIR]
-
-    try:
-        result = subprocess.run(cmd, timeout=timeout)
-
-        if result.returncode == 0 and Path(OUTPUT_DIR).is_dir():
-            return True, OUTPUT_DIR, ''
-        else:
-            return False, None, f'git clone failed (exit={result.returncode})'
-
-    except subprocess.TimeoutExpired:
-        return False, None, f'Timed out after {timeout} seconds'
-    except Exception as e:
-        return False, None, f'{type(e).__name__}: {e}'
-
-
-@click.command()
-@click.option('--url', required=True, help='Git repository URL')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
-def main(url: str, snapshot_id: str):
-    """Clone a git repository from a URL."""
-
-    output = None
-    status = 'failed'
-    error = ''
-
-    try:
-        # Check if URL looks like a git repo
-        if not is_git_url(url):
-            print(f'Skipping git clone for non-git URL: {url}', file=sys.stderr)
-            print(json.dumps({
-                'type': 'ArchiveResult',
-                'status': 'skipped',
-                'output_str': 'Not a git URL',
-            }))
-            sys.exit(0)
-
-        # Get binary from environment
-        binary = get_env('GIT_BINARY', 'git')
-
-        # Run extraction
-        success, output, error = clone_git(url, binary)
-        status = 'succeeded' if success else 'failed'
-
-    except Exception as e:
-        error = f'{type(e).__name__}: {e}'
-        status = 'failed'
-
-    if error:
-        print(f'ERROR: {error}', file=sys.stderr)
-
-    # Output clean JSONL (no RESULT_JSON= prefix)
-    result = {
-        'type': 'ArchiveResult',
-        'status': status,
-        'output_str': output or error or '',
-    }
-    print(json.dumps(result))
-
-    sys.exit(0 if status == 'succeeded' else 1)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/git/templates/card.html b/archivebox/plugins/git/templates/card.html
deleted file mode 100644
index 3148d5b9..00000000
--- a/archivebox/plugins/git/templates/card.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<!-- Git thumbnail - shows git repository icon and info -->
-<div class="extractor-thumbnail git-thumbnail" style="width: 100%; height: 100px; overflow: hidden; background: #f6f8fa; display: flex; flex-direction: column; align-items: center; justify-content: center; padding: 10px;">
-    <span style="font-size: 32px;">📂</span>
-    <span style="font-size: 11px; color: #586069; margin-top: 4px;">Git Repository</span>
-</div>
diff --git a/archivebox/plugins/git/templates/icon.html b/archivebox/plugins/git/templates/icon.html
deleted file mode 100644
index e16f0231..00000000
--- a/archivebox/plugins/git/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--git" title="Git"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><circle cx="6" cy="6" r="2"/><circle cx="6" cy="18" r="2"/><circle cx="18" cy="12" r="2"/><path d="M8 6h5a3 3 0 0 1 3 3v1"/><path d="M8 18h5a3 3 0 0 0 3-3v-1"/></svg></span>
diff --git a/archivebox/plugins/git/tests/test_git.py b/archivebox/plugins/git/tests/test_git.py
deleted file mode 100644
index c7449495..00000000
--- a/archivebox/plugins/git/tests/test_git.py
+++ /dev/null
@@ -1,130 +0,0 @@
-"""
-Integration tests for git plugin
-
-Tests verify:
-    pass
-1. Validate hook checks for git binary
-2. Verify deps with abx-pkg
-3. Standalone git extractor execution
-"""
-
-import json
-import shutil
-import subprocess
-import sys
-import tempfile
-import time
-from pathlib import Path
-import pytest
-
-PLUGIN_DIR = Path(__file__).parent.parent
-GIT_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_git.*'), None)
-TEST_URL = 'https://github.com/ArchiveBox/abx-pkg.git'
-
-def test_hook_script_exists():
-    assert GIT_HOOK.exists()
-
-def test_verify_deps_with_abx_pkg():
-    """Verify git is available via abx-pkg."""
-    from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides
-
-    git_binary = Binary(name='git', binproviders=[AptProvider(), BrewProvider(), EnvProvider()])
-    git_loaded = git_binary.load()
-
-    assert git_loaded and git_loaded.abspath, "git is required for git plugin tests"
-
-def test_reports_missing_git():
-    with tempfile.TemporaryDirectory() as tmpdir:
-        env = {'PATH': '/nonexistent'}
-        result = subprocess.run(
-            [sys.executable, str(GIT_HOOK), '--url', TEST_URL, '--snapshot-id', 'test123'],
-            cwd=tmpdir, capture_output=True, text=True, env=env
-        )
-        if result.returncode != 0:
-            combined = result.stdout + result.stderr
-            assert 'DEPENDENCY_NEEDED' in combined or 'git' in combined.lower() or 'ERROR=' in combined
-
-def test_handles_non_git_url():
-    assert shutil.which('git'), "git binary not available"
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        result = subprocess.run(
-            [sys.executable, str(GIT_HOOK), '--url', 'https://example.com', '--snapshot-id', 'test789'],
-            cwd=tmpdir, capture_output=True, text=True, timeout=30
-        )
-        # Should fail or skip for non-git URL
-        assert result.returncode in (0, 1)
-
-        # Parse clean JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                pass
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        if result_json:
-            # Should report failure or skip for non-git URL
-            assert result_json['status'] in ['failed', 'skipped'], f"Should fail or skip: {result_json}"
-
-
-def test_real_git_repo():
-    """Test that git can clone a real GitHub repository."""
-    import os
-
-    assert shutil.which('git'), "git binary not available"
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Use a real but small GitHub repository
-        git_url = 'https://github.com/ArchiveBox/abx-pkg'
-
-        env = os.environ.copy()
-        env['GIT_TIMEOUT'] = '120'  # Give it time to clone
-
-        start_time = time.time()
-        result = subprocess.run(
-            [sys.executable, str(GIT_HOOK), '--url', git_url, '--snapshot-id', 'testgit'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=180
-        )
-        elapsed_time = time.time() - start_time
-
-        # Should succeed
-        assert result.returncode == 0, f"Should clone repository successfully: {result.stderr}"
-
-        # Parse JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, f"Should have ArchiveResult JSONL output. stdout: {result.stdout}"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-        # Check that the git repo was cloned
-        git_dirs = list(tmpdir.glob('**/.git'))
-        assert len(git_dirs) > 0, f"Should have cloned a git repository. Contents: {list(tmpdir.rglob('*'))}"
-
-        print(f"Successfully cloned repository in {elapsed_time:.2f}s")
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/hashes/config.json b/archivebox/plugins/hashes/config.json
deleted file mode 100644
index b57db14a..00000000
--- a/archivebox/plugins/hashes/config.json
+++ /dev/null
@@ -1,20 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "HASHES_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_HASHES", "USE_HASHES"],
-      "description": "Enable merkle tree hash generation"
-    },
-    "HASHES_TIMEOUT": {
-      "type": "integer",
-      "default": 30,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for merkle tree generation in seconds"
-    }
-  }
-}
diff --git a/archivebox/plugins/hashes/on_Snapshot__93_hashes.py b/archivebox/plugins/hashes/on_Snapshot__93_hashes.py
deleted file mode 100755
index 2738d85f..00000000
--- a/archivebox/plugins/hashes/on_Snapshot__93_hashes.py
+++ /dev/null
@@ -1,185 +0,0 @@
-#!/usr/bin/env python3
-"""
-Create a hashed Merkle tree of all archived outputs.
-
-This plugin runs after all extractors complete (priority 93) and generates
-a cryptographic Merkle hash tree of all files in the snapshot directory.
-
-Output: hashes.json containing root_hash, tree structure, file list, metadata
-
-Usage: on_Snapshot__93_hashes.py --url=<url> --snapshot-id=<uuid>
-
-Environment variables:
-    SAVE_HASHES: Enable hash merkle tree generation (default: true)
-    DATA_DIR: ArchiveBox data directory
-    ARCHIVE_DIR: Archive output directory
-"""
-
-import os
-import sys
-import json
-import hashlib
-from pathlib import Path
-from datetime import datetime, timezone
-from typing import Dict, List, Optional, Tuple, Any
-
-import click
-
-
-def sha256_file(filepath: Path) -> str:
-    """Compute SHA256 hash of a file."""
-    h = hashlib.sha256()
-    try:
-        with open(filepath, 'rb') as f:
-            while chunk := f.read(65536):
-                h.update(chunk)
-        return h.hexdigest()
-    except (OSError, PermissionError):
-        return '0' * 64
-
-
-def sha256_data(data: bytes) -> str:
-    """Compute SHA256 hash of raw data."""
-    return hashlib.sha256(data).hexdigest()
-
-
-def collect_files(snapshot_dir: Path, exclude_dirs: Optional[List[str]] = None) -> List[Tuple[Path, str, int]]:
-    """Recursively collect all files in snapshot directory."""
-    exclude_dirs = exclude_dirs or ['hashes', '.git', '__pycache__']
-    files = []
-
-    for root, dirs, filenames in os.walk(snapshot_dir):
-        dirs[:] = [d for d in dirs if d not in exclude_dirs]
-
-        for filename in filenames:
-            filepath = Path(root) / filename
-            rel_path = filepath.relative_to(snapshot_dir)
-
-            if filepath.is_symlink():
-                continue
-
-            file_hash = sha256_file(filepath)
-            file_size = filepath.stat().st_size if filepath.exists() else 0
-            files.append((rel_path, file_hash, file_size))
-
-    files.sort(key=lambda x: str(x[0]))
-    return files
-
-
-def build_merkle_tree(file_hashes: List[str]) -> Tuple[str, List[List[str]]]:
-    """Build a Merkle tree from a list of leaf hashes."""
-    if not file_hashes:
-        return sha256_data(b''), [[]]
-
-    tree_levels = [file_hashes.copy()]
-
-    while len(tree_levels[-1]) > 1:
-        current_level = tree_levels[-1]
-        next_level = []
-
-        for i in range(0, len(current_level), 2):
-            left = current_level[i]
-            if i + 1 < len(current_level):
-                right = current_level[i + 1]
-                combined = left + right
-            else:
-                combined = left + left
-
-            parent_hash = sha256_data(combined.encode('utf-8'))
-            next_level.append(parent_hash)
-
-        tree_levels.append(next_level)
-
-    root_hash = tree_levels[-1][0]
-    return root_hash, tree_levels
-
-
-def create_hashes(snapshot_dir: Path) -> Dict[str, Any]:
-    """Create a complete Merkle hash tree of all files in snapshot directory."""
-    files = collect_files(snapshot_dir)
-    file_hashes = [file_hash for _, file_hash, _ in files]
-    root_hash, tree_levels = build_merkle_tree(file_hashes)
-    total_size = sum(size for _, _, size in files)
-
-    file_list = [
-        {'path': str(path), 'hash': file_hash, 'size': size}
-        for path, file_hash, size in files
-    ]
-
-    return {
-        'root_hash': root_hash,
-        'tree_levels': tree_levels,
-        'files': file_list,
-        'metadata': {
-            'timestamp': datetime.now(timezone.utc).isoformat(),
-            'file_count': len(files),
-            'total_size': total_size,
-            'tree_depth': len(tree_levels),
-        },
-    }
-
-
-@click.command()
-@click.option('--url', required=True, help='URL being archived')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
-def main(url: str, snapshot_id: str):
-    """Generate Merkle tree of all archived outputs."""
-    status = 'failed'
-    output = None
-    error = ''
-    root_hash = None
-    file_count = 0
-
-    try:
-        # Check if enabled
-        save_hashes = os.getenv('HASHES_ENABLED', 'true').lower() in ('true', '1', 'yes', 'on')
-
-        if not save_hashes:
-            status = 'skipped'
-            click.echo(json.dumps({'status': status, 'output': 'HASHES_ENABLED=false'}))
-            sys.exit(0)
-
-        # Working directory is the extractor output dir (e.g., <snapshot>/hashes/)
-        # Parent is the snapshot directory
-        output_dir = Path.cwd()
-        snapshot_dir = output_dir.parent
-
-        if not snapshot_dir.exists():
-            raise FileNotFoundError(f'Snapshot directory not found: {snapshot_dir}')
-
-        # Ensure output directory exists
-        output_dir.mkdir(exist_ok=True)
-        output_path = output_dir / 'hashes.json'
-
-        # Generate Merkle tree
-        merkle_data = create_hashes(snapshot_dir)
-
-        # Write output
-        with open(output_path, 'w', encoding='utf-8') as f:
-            json.dump(merkle_data, f, indent=2)
-
-        status = 'succeeded'
-        output = 'hashes.json'
-        root_hash = merkle_data['root_hash']
-        file_count = merkle_data['metadata']['file_count']
-
-    except Exception as e:
-        error = f'{type(e).__name__}: {e}'
-        status = 'failed'
-        click.echo(f'Error: {error}', err=True)
-
-    # Print JSON result for hook runner
-    result = {
-        'status': status,
-        'output': output,
-        'error': error or None,
-        'root_hash': root_hash,
-        'file_count': file_count,
-    }
-    click.echo(json.dumps(result))
-
-    sys.exit(0 if status in ('succeeded', 'skipped') else 1)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/hashes/templates/icon.html b/archivebox/plugins/hashes/templates/icon.html
deleted file mode 100644
index 211930f0..00000000
--- a/archivebox/plugins/hashes/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--hashes" title="Authenticity Hashes"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="5" r="2"/><circle cx="6" cy="18" r="2"/><circle cx="18" cy="18" r="2"/><path d="M12 7v6"/><path d="M12 13l-4 3"/><path d="M12 13l4 3"/></svg></span>
diff --git a/archivebox/plugins/hashes/tests/test_hashes.py b/archivebox/plugins/hashes/tests/test_hashes.py
deleted file mode 100644
index 0eb7d7f1..00000000
--- a/archivebox/plugins/hashes/tests/test_hashes.py
+++ /dev/null
@@ -1,157 +0,0 @@
-"""
-Tests for the hashes plugin.
-
-Tests the real merkle tree generation with actual files.
-"""
-
-import json
-import os
-import subprocess
-import sys
-import tempfile
-from pathlib import Path
-
-import pytest
-from django.test import TestCase
-
-
-# Get the path to the hashes hook
-PLUGIN_DIR = Path(__file__).parent.parent
-HASHES_HOOK = PLUGIN_DIR / 'on_Snapshot__93_hashes.py'
-
-
-class TestHashesPlugin(TestCase):
-    """Test the hashes plugin."""
-
-    def test_hashes_hook_exists(self):
-        """Hashes hook script should exist."""
-        self.assertTrue(HASHES_HOOK.exists(), f"Hook not found: {HASHES_HOOK}")
-
-    def test_hashes_generates_tree_for_files(self):
-        """Hashes hook should generate merkle tree for files in snapshot directory."""
-        with tempfile.TemporaryDirectory() as temp_dir:
-            # Create a mock snapshot directory structure
-            snapshot_dir = Path(temp_dir) / 'snapshot'
-            snapshot_dir.mkdir()
-
-            # Create output directory for hashes
-            output_dir = snapshot_dir / 'hashes'
-            output_dir.mkdir()
-
-            # Create some test files
-            (snapshot_dir / 'index.html').write_text('<html><body>Test</body></html>')
-            (snapshot_dir / 'screenshot.png').write_bytes(b'\x89PNG\r\n\x1a\n' + b'\x00' * 100)
-
-            subdir = snapshot_dir / 'media'
-            subdir.mkdir()
-            (subdir / 'video.mp4').write_bytes(b'\x00\x00\x00\x18ftypmp42')
-
-            # Run the hook from the output directory
-            env = os.environ.copy()
-            env['HASHES_ENABLED'] = 'true'
-
-            result = subprocess.run(
-                [
-                    sys.executable, str(HASHES_HOOK),
-                    '--url=https://example.com',
-                    '--snapshot-id=test-snapshot',
-                ],
-                capture_output=True,
-                text=True,
-                cwd=str(output_dir),  # Hook expects to run from output dir
-                env=env,
-                timeout=30
-            )
-
-            # Should succeed
-            self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}")
-
-            # Check output file exists
-            output_file = output_dir / 'hashes.json'
-            self.assertTrue(output_file.exists(), "hashes.json not created")
-
-            # Parse and verify output
-            with open(output_file) as f:
-                data = json.load(f)
-
-            self.assertIn('root_hash', data)
-            self.assertIn('files', data)
-            self.assertIn('metadata', data)
-
-            # Should have indexed our test files
-            file_paths = [f['path'] for f in data['files']]
-            self.assertIn('index.html', file_paths)
-            self.assertIn('screenshot.png', file_paths)
-
-            # Verify metadata
-            self.assertGreater(data['metadata']['file_count'], 0)
-            self.assertGreater(data['metadata']['total_size'], 0)
-
-    def test_hashes_skips_when_disabled(self):
-        """Hashes hook should skip when HASHES_ENABLED=false."""
-        with tempfile.TemporaryDirectory() as temp_dir:
-            snapshot_dir = Path(temp_dir) / 'snapshot'
-            snapshot_dir.mkdir()
-            output_dir = snapshot_dir / 'hashes'
-            output_dir.mkdir()
-
-            env = os.environ.copy()
-            env['HASHES_ENABLED'] = 'false'
-
-            result = subprocess.run(
-                [
-                    sys.executable, str(HASHES_HOOK),
-                    '--url=https://example.com',
-                    '--snapshot-id=test-snapshot',
-                ],
-                capture_output=True,
-                text=True,
-                cwd=str(output_dir),
-                env=env,
-                timeout=30
-            )
-
-            # Should succeed (exit 0) but skip
-            self.assertEqual(result.returncode, 0)
-            self.assertIn('skipped', result.stdout)
-
-    def test_hashes_handles_empty_directory(self):
-        """Hashes hook should handle empty snapshot directory."""
-        with tempfile.TemporaryDirectory() as temp_dir:
-            snapshot_dir = Path(temp_dir) / 'snapshot'
-            snapshot_dir.mkdir()
-            output_dir = snapshot_dir / 'hashes'
-            output_dir.mkdir()
-
-            env = os.environ.copy()
-            env['HASHES_ENABLED'] = 'true'
-
-            result = subprocess.run(
-                [
-                    sys.executable, str(HASHES_HOOK),
-                    '--url=https://example.com',
-                    '--snapshot-id=test-snapshot',
-                ],
-                capture_output=True,
-                text=True,
-                cwd=str(output_dir),
-                env=env,
-                timeout=30
-            )
-
-            # Should succeed even with empty directory
-            self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}")
-
-            # Check output file exists
-            output_file = output_dir / 'hashes.json'
-            self.assertTrue(output_file.exists())
-
-            with open(output_file) as f:
-                data = json.load(f)
-
-            # Should have empty file list
-            self.assertEqual(data['metadata']['file_count'], 0)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/headers/config.json b/archivebox/plugins/headers/config.json
deleted file mode 100644
index a0068f6e..00000000
--- a/archivebox/plugins/headers/config.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "required_plugins": ["chrome"],
-  "properties": {
-    "HEADERS_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_HEADERS", "USE_HEADERS"],
-      "description": "Enable HTTP headers capture"
-    },
-    "HEADERS_TIMEOUT": {
-      "type": "integer",
-      "default": 30,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for headers capture in seconds"
-    }
-  }
-}
diff --git a/archivebox/plugins/headers/on_Snapshot__27_headers.bg.js b/archivebox/plugins/headers/on_Snapshot__27_headers.bg.js
deleted file mode 100644
index 7ca72994..00000000
--- a/archivebox/plugins/headers/on_Snapshot__27_headers.bg.js
+++ /dev/null
@@ -1,247 +0,0 @@
-#!/usr/bin/env node
-/**
- * Capture original request + response headers for the main navigation.
- *
- * This hook sets up CDP listeners BEFORE chrome_navigate loads the page,
- * then waits for navigation to complete. It records the first top-level
- * request headers and the corresponding response headers (with :status).
- *
- * Usage: on_Snapshot__27_headers.bg.js --url=<url> --snapshot-id=<uuid>
- * Output: Writes headers.json
- */
-
-const fs = require('fs');
-const path = require('path');
-
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-
-const puppeteer = require('puppeteer-core');
-
-// Import shared utilities from chrome_utils.js
-const {
-    getEnvBool,
-    getEnvInt,
-    parseArgs,
-    connectToPage,
-    waitForPageLoaded,
-} = require('../chrome/chrome_utils.js');
-
-const PLUGIN_NAME = 'headers';
-const OUTPUT_DIR = '.';
-const OUTPUT_FILE = 'headers.json';
-const CHROME_SESSION_DIR = '../chrome';
-const CHROME_SESSION_REQUIRED_ERROR = 'No Chrome session found (chrome plugin must run first)';
-
-let browser = null;
-let page = null;
-let client = null;
-let shuttingDown = false;
-let headersWritten = false;
-
-let requestId = null;
-let requestUrl = null;
-let requestHeaders = null;
-let responseHeaders = null;
-let responseStatus = null;
-let responseStatusText = null;
-let responseUrl = null;
-let originalUrl = null;
-
-function getFinalUrl() {
-    const finalUrlFile = path.join(CHROME_SESSION_DIR, 'final_url.txt');
-    if (fs.existsSync(finalUrlFile)) {
-        return fs.readFileSync(finalUrlFile, 'utf8').trim();
-    }
-    return page ? page.url() : null;
-}
-
-function writeHeadersFile() {
-    if (headersWritten) return;
-    if (!responseHeaders) return;
-
-    const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
-    const responseHeadersWithStatus = {
-        ...(responseHeaders || {}),
-    };
-
-    if (responseStatus !== null && responseStatus !== undefined &&
-        responseHeadersWithStatus[':status'] === undefined) {
-        responseHeadersWithStatus[':status'] = String(responseStatus);
-    }
-
-    const record = {
-        url: requestUrl || originalUrl,
-        final_url: getFinalUrl(),
-        status: responseStatus !== undefined ? responseStatus : null,
-        request_headers: requestHeaders || {},
-        response_headers: responseHeadersWithStatus,
-        headers: responseHeadersWithStatus, // backwards compatibility
-    };
-
-    if (responseStatusText) {
-        record.statusText = responseStatusText;
-    }
-    if (responseUrl) {
-        record.response_url = responseUrl;
-    }
-
-    fs.writeFileSync(outputPath, JSON.stringify(record, null, 2));
-    headersWritten = true;
-}
-
-async function setupListener(url) {
-    const timeout = getEnvInt('HEADERS_TIMEOUT', getEnvInt('TIMEOUT', 30)) * 1000;
-    const cdpFile = path.join(CHROME_SESSION_DIR, 'cdp_url.txt');
-    const targetIdFile = path.join(CHROME_SESSION_DIR, 'target_id.txt');
-    const pidFile = path.join(CHROME_SESSION_DIR, 'chrome.pid');
-
-    if (!fs.existsSync(cdpFile) || !fs.existsSync(targetIdFile) || !fs.existsSync(pidFile)) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-    try {
-        const pid = parseInt(fs.readFileSync(pidFile, 'utf8').trim(), 10);
-        if (!pid || Number.isNaN(pid)) throw new Error('Invalid pid');
-        process.kill(pid, 0);
-    } catch (e) {
-        throw new Error(CHROME_SESSION_REQUIRED_ERROR);
-    }
-
-    const { browser, page } = await connectToPage({
-        chromeSessionDir: CHROME_SESSION_DIR,
-        timeoutMs: timeout,
-        puppeteer,
-    });
-
-    client = await page.target().createCDPSession();
-    await client.send('Network.enable');
-
-    client.on('Network.requestWillBeSent', (params) => {
-        try {
-            if (requestId && !responseHeaders && params.redirectResponse && params.requestId === requestId) {
-                responseHeaders = params.redirectResponse.headers || {};
-                responseStatus = params.redirectResponse.status || null;
-                responseStatusText = params.redirectResponse.statusText || null;
-                responseUrl = params.redirectResponse.url || null;
-                writeHeadersFile();
-            }
-
-            if (requestId) return;
-            if (params.type && params.type !== 'Document') return;
-            if (!params.request || !params.request.url) return;
-            if (!params.request.url.startsWith('http')) return;
-
-            requestId = params.requestId;
-            requestUrl = params.request.url;
-            requestHeaders = params.request.headers || {};
-        } catch (e) {
-            // Ignore errors
-        }
-    });
-
-    client.on('Network.responseReceived', (params) => {
-        try {
-            if (!requestId || params.requestId !== requestId || responseHeaders) return;
-            const response = params.response || {};
-            responseHeaders = response.headers || {};
-            responseStatus = response.status || null;
-            responseStatusText = response.statusText || null;
-            responseUrl = response.url || null;
-            writeHeadersFile();
-        } catch (e) {
-            // Ignore errors
-        }
-    });
-
-    return { browser, page };
-}
-
-function emitResult(status = 'succeeded', outputStr = OUTPUT_FILE) {
-    if (shuttingDown) return;
-    shuttingDown = true;
-
-    console.log(JSON.stringify({
-        type: 'ArchiveResult',
-        status,
-        output_str: outputStr,
-    }));
-}
-
-async function handleShutdown(signal) {
-    console.error(`\nReceived ${signal}, emitting final results...`);
-    if (!headersWritten) {
-        writeHeadersFile();
-    }
-    if (headersWritten) {
-        emitResult('succeeded', OUTPUT_FILE);
-    } else {
-        emitResult('failed', 'No headers captured');
-    }
-
-    if (browser) {
-        try {
-            browser.disconnect();
-        } catch (e) {}
-    }
-    process.exit(headersWritten ? 0 : 1);
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-    const snapshotId = args.snapshot_id;
-
-    if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__27_headers.bg.js --url=<url> --snapshot-id=<uuid>');
-        process.exit(1);
-    }
-
-    originalUrl = url;
-
-    if (!getEnvBool('HEADERS_ENABLED', true)) {
-        console.error('Skipping (HEADERS_ENABLED=False)');
-        console.log(JSON.stringify({type: 'ArchiveResult', status: 'skipped', output_str: 'HEADERS_ENABLED=False'}));
-        process.exit(0);
-    }
-
-    try {
-        // Set up listeners BEFORE navigation
-        const connection = await setupListener(url);
-        browser = connection.browser;
-        page = connection.page;
-
-        // Register signal handlers for graceful shutdown
-        process.on('SIGTERM', () => handleShutdown('SIGTERM'));
-        process.on('SIGINT', () => handleShutdown('SIGINT'));
-
-        // Wait for chrome_navigate to complete (non-fatal)
-        try {
-            const timeout = getEnvInt('HEADERS_TIMEOUT', getEnvInt('TIMEOUT', 30)) * 1000;
-            await waitForPageLoaded(CHROME_SESSION_DIR, timeout * 4, 200);
-        } catch (e) {
-            console.error(`WARN: ${e.message}`);
-        }
-
-        // Keep alive until SIGTERM
-        await new Promise(() => {});
-        return;
-
-    } catch (e) {
-        const errorMessage = (e && e.message)
-            ? `${e.name || 'Error'}: ${e.message}`
-            : String(e || 'Unknown error');
-        console.error(`ERROR: ${errorMessage}`);
-
-        console.log(JSON.stringify({
-            type: 'ArchiveResult',
-            status: 'failed',
-            output_str: errorMessage,
-        }));
-        process.exit(1);
-    }
-}
-
-main().catch(e => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
diff --git a/archivebox/plugins/headers/templates/icon.html b/archivebox/plugins/headers/templates/icon.html
deleted file mode 100644
index f693e709..00000000
--- a/archivebox/plugins/headers/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--headers" title="Headers"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><circle cx="4" cy="7" r="1" fill="currentColor" stroke="none"/><circle cx="4" cy="12" r="1" fill="currentColor" stroke="none"/><circle cx="4" cy="17" r="1" fill="currentColor" stroke="none"/><path d="M7 7h13"/><path d="M7 12h13"/><path d="M7 17h13"/></svg></span>
diff --git a/archivebox/plugins/headers/tests/test_headers.py b/archivebox/plugins/headers/tests/test_headers.py
deleted file mode 100644
index 09ec86fb..00000000
--- a/archivebox/plugins/headers/tests/test_headers.py
+++ /dev/null
@@ -1,409 +0,0 @@
-"""
-Integration tests for headers plugin
-
-Tests verify:
-    pass
-1. Plugin script exists and is executable
-2. Node.js is available
-3. Headers extraction works for real example.com
-4. Output JSON contains actual HTTP headers
-5. Config options work (TIMEOUT, USER_AGENT)
-"""
-
-import json
-import shutil
-import subprocess
-import tempfile
-import time
-from pathlib import Path
-
-import pytest
-
-from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-    CHROME_NAVIGATE_HOOK,
-    get_test_env,
-    chrome_session,
-)
-
-PLUGIN_DIR = Path(__file__).parent.parent
-HEADERS_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_headers.*'), None)
-TEST_URL = 'https://example.com'
-
-def normalize_root_url(url: str) -> str:
-    return url.rstrip('/')
-
-def run_headers_capture(headers_dir, snapshot_chrome_dir, env, url, snapshot_id):
-    hook_proc = subprocess.Popen(
-        ['node', str(HEADERS_HOOK), f'--url={url}', f'--snapshot-id={snapshot_id}'],
-        cwd=headers_dir,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-        text=True,
-        env=env,
-    )
-
-    nav_result = subprocess.run(
-        ['node', str(CHROME_NAVIGATE_HOOK), f'--url={url}', f'--snapshot-id={snapshot_id}'],
-        cwd=snapshot_chrome_dir,
-        capture_output=True,
-        text=True,
-        timeout=120,
-        env=env,
-    )
-
-    headers_file = headers_dir / 'headers.json'
-    for _ in range(60):
-        if headers_file.exists() and headers_file.stat().st_size > 0:
-            break
-        time.sleep(1)
-
-    if hook_proc.poll() is None:
-        hook_proc.terminate()
-        try:
-            stdout, stderr = hook_proc.communicate(timeout=5)
-        except subprocess.TimeoutExpired:
-            hook_proc.kill()
-            stdout, stderr = hook_proc.communicate()
-    else:
-        stdout, stderr = hook_proc.communicate()
-
-    return hook_proc.returncode, stdout, stderr, nav_result, headers_file
-
-
-def test_hook_script_exists():
-    """Verify hook script exists."""
-    assert HEADERS_HOOK.exists(), f"Hook script not found: {HEADERS_HOOK}"
-
-
-def test_node_is_available():
-    """Test that Node.js is available on the system."""
-    result = subprocess.run(
-        ['which', 'node'],
-        capture_output=True,
-        text=True
-    )
-
-    if result.returncode != 0:
-        pass
-
-    binary_path = result.stdout.strip()
-    assert Path(binary_path).exists(), f"Binary should exist at {binary_path}"
-
-    # Test that node is executable and get version
-    result = subprocess.run(
-        ['node', '--version'],
-        capture_output=True,
-        text=True,
-        timeout=10
-    ,
-            env=get_test_env())
-    assert result.returncode == 0, f"node not executable: {result.stderr}"
-    assert result.stdout.startswith('v'), f"Unexpected node version format: {result.stdout}"
-
-
-def test_extracts_headers_from_example_com():
-    """Test full workflow: extract headers from real example.com."""
-
-    # Check node is available
-    if not shutil.which('node'):
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        with chrome_session(tmpdir, test_url=TEST_URL, navigate=False) as (_process, _pid, snapshot_chrome_dir, env):
-            headers_dir = snapshot_chrome_dir.parent / 'headers'
-            headers_dir.mkdir(exist_ok=True)
-
-            result = run_headers_capture(
-                headers_dir,
-                snapshot_chrome_dir,
-                env,
-                TEST_URL,
-                'test789',
-            )
-
-        hook_code, stdout, stderr, nav_result, headers_file = result
-        assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
-        assert hook_code == 0, f"Extraction failed: {stderr}"
-
-        # Parse clean JSONL output
-        result_json = None
-        for line in stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                pass
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-        # Verify output file exists (hook writes to current directory)
-        assert headers_file.exists(), "headers.json not created"
-
-        # Verify headers JSON contains REAL example.com response
-        headers_data = json.loads(headers_file.read_text())
-
-        assert 'url' in headers_data, "Should have url field"
-        assert normalize_root_url(headers_data['url']) == normalize_root_url(TEST_URL), f"URL should be {TEST_URL}"
-
-        assert 'status' in headers_data, "Should have status field"
-        assert headers_data['status'] in [200, 301, 302], \
-            f"Should have valid HTTP status, got {headers_data['status']}"
-
-        assert 'request_headers' in headers_data, "Should have request_headers field"
-        assert isinstance(headers_data['request_headers'], dict), "Request headers should be a dict"
-
-        assert 'response_headers' in headers_data, "Should have response_headers field"
-        assert isinstance(headers_data['response_headers'], dict), "Response headers should be a dict"
-        assert len(headers_data['response_headers']) > 0, "Response headers dict should not be empty"
-
-        assert 'headers' in headers_data, "Should have headers field"
-        assert isinstance(headers_data['headers'], dict), "Headers should be a dict"
-
-        # Verify common HTTP headers are present
-        headers_lower = {k.lower(): v for k, v in headers_data['response_headers'].items()}
-        assert 'content-type' in headers_lower or 'content-length' in headers_lower, \
-            "Should have at least one common HTTP header"
-
-        assert headers_data['response_headers'].get(':status') == str(headers_data['status']), \
-            "Response headers should include :status pseudo header"
-
-
-def test_headers_output_structure():
-    """Test that headers plugin produces correctly structured output."""
-
-    if not shutil.which('node'):
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        with chrome_session(tmpdir, test_url=TEST_URL, navigate=False) as (_process, _pid, snapshot_chrome_dir, env):
-            headers_dir = snapshot_chrome_dir.parent / 'headers'
-            headers_dir.mkdir(exist_ok=True)
-
-            result = run_headers_capture(
-                headers_dir,
-                snapshot_chrome_dir,
-                env,
-                TEST_URL,
-                'testformat',
-            )
-
-        hook_code, stdout, stderr, nav_result, headers_file = result
-        assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
-        assert hook_code == 0, f"Extraction failed: {stderr}"
-
-        # Parse clean JSONL output
-        result_json = None
-        for line in stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                pass
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-        # Verify output structure
-        assert headers_file.exists(), "Output headers.json not created"
-
-        output_data = json.loads(headers_file.read_text())
-
-        # Verify all required fields are present
-        assert 'url' in output_data, "Output should have url field"
-        assert 'status' in output_data, "Output should have status field"
-        assert 'request_headers' in output_data, "Output should have request_headers field"
-        assert 'response_headers' in output_data, "Output should have response_headers field"
-        assert 'headers' in output_data, "Output should have headers field"
-
-        # Verify data types
-        assert isinstance(output_data['status'], int), "Status should be integer"
-        assert isinstance(output_data['request_headers'], dict), "Request headers should be dict"
-        assert isinstance(output_data['response_headers'], dict), "Response headers should be dict"
-        assert isinstance(output_data['headers'], dict), "Headers should be dict"
-
-        # Verify example.com returns expected headers
-        assert normalize_root_url(output_data['url']) == normalize_root_url(TEST_URL)
-        assert output_data['status'] in [200, 301, 302]
-
-
-def test_fails_without_chrome_session():
-    """Test that headers plugin fails when chrome session is missing."""
-
-    if not shutil.which('node'):
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Run headers extraction
-        result = subprocess.run(
-            ['node', str(HEADERS_HOOK), f'--url={TEST_URL}', '--snapshot-id=testhttp'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            timeout=60
-        ,
-            env=get_test_env())
-
-        assert result.returncode != 0, "Should fail without chrome session"
-        assert 'No Chrome session found (chrome plugin must run first)' in (result.stdout + result.stderr)
-
-
-def test_config_timeout_honored():
-    """Test that TIMEOUT config is respected."""
-
-    if not shutil.which('node'):
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Set very short timeout (but example.com should still succeed)
-        import os
-        env_override = os.environ.copy()
-        env_override['TIMEOUT'] = '5'
-
-        with chrome_session(tmpdir, test_url=TEST_URL, navigate=False) as (_process, _pid, snapshot_chrome_dir, env):
-            headers_dir = snapshot_chrome_dir.parent / 'headers'
-            headers_dir.mkdir(exist_ok=True)
-            env.update(env_override)
-
-            result = run_headers_capture(
-                headers_dir,
-                snapshot_chrome_dir,
-                env,
-                TEST_URL,
-                'testtimeout',
-            )
-
-        # Should complete (success or fail, but not hang)
-        hook_code, _stdout, _stderr, nav_result, _headers_file = result
-        assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
-        assert hook_code in (0, 1), "Should complete without hanging"
-
-
-def test_config_user_agent():
-    """Test that USER_AGENT config is used."""
-
-    if not shutil.which('node'):
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Set custom user agent
-        import os
-        env_override = os.environ.copy()
-        env_override['USER_AGENT'] = 'TestBot/1.0'
-
-        with chrome_session(tmpdir, test_url=TEST_URL, navigate=False) as (_process, _pid, snapshot_chrome_dir, env):
-            headers_dir = snapshot_chrome_dir.parent / 'headers'
-            headers_dir.mkdir(exist_ok=True)
-            env.update(env_override)
-
-            result = run_headers_capture(
-                headers_dir,
-                snapshot_chrome_dir,
-                env,
-                TEST_URL,
-                'testua',
-            )
-
-        # Should succeed (example.com doesn't block)
-        hook_code, stdout, _stderr, nav_result, _headers_file = result
-        assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
-        if hook_code == 0:
-            # Parse clean JSONL output
-            result_json = None
-            for line in stdout.strip().split('\n'):
-                line = line.strip()
-                if line.startswith('{'):
-                    pass
-                    try:
-                        record = json.loads(line)
-                        if record.get('type') == 'ArchiveResult':
-                            result_json = record
-                            break
-                    except json.JSONDecodeError:
-                        pass
-
-            assert result_json, "Should have ArchiveResult JSONL output"
-            assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-
-def test_handles_https_urls():
-    """Test that HTTPS URLs work correctly."""
-
-    if not shutil.which('node'):
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        with chrome_session(tmpdir, test_url='https://example.org', navigate=False) as (_process, _pid, snapshot_chrome_dir, env):
-            headers_dir = snapshot_chrome_dir.parent / 'headers'
-            headers_dir.mkdir(exist_ok=True)
-            result = run_headers_capture(
-                headers_dir,
-                snapshot_chrome_dir,
-                env,
-                'https://example.org',
-                'testhttps',
-            )
-
-        hook_code, _stdout, _stderr, nav_result, headers_file = result
-        assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
-        if hook_code == 0:
-            if headers_file.exists():
-                output_data = json.loads(headers_file.read_text())
-                assert normalize_root_url(output_data['url']) == normalize_root_url('https://example.org')
-                assert output_data['status'] in [200, 301, 302]
-
-
-def test_handles_404_gracefully():
-    """Test that headers plugin handles 404s gracefully."""
-
-    if not shutil.which('node'):
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        with chrome_session(tmpdir, test_url='https://example.com/nonexistent-page-404', navigate=False) as (_process, _pid, snapshot_chrome_dir, env):
-            headers_dir = snapshot_chrome_dir.parent / 'headers'
-            headers_dir.mkdir(exist_ok=True)
-            result = run_headers_capture(
-                headers_dir,
-                snapshot_chrome_dir,
-                env,
-                'https://example.com/nonexistent-page-404',
-                'test404',
-            )
-
-        # May succeed or fail depending on server behavior
-        # If it succeeds, verify 404 status is captured
-        hook_code, _stdout, _stderr, nav_result, headers_file = result
-        assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
-        if hook_code == 0:
-            if headers_file.exists():
-                output_data = json.loads(headers_file.read_text())
-                assert output_data['status'] == 404, "Should capture 404 status"
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/htmltotext/config.json b/archivebox/plugins/htmltotext/config.json
deleted file mode 100644
index 7f9e644a..00000000
--- a/archivebox/plugins/htmltotext/config.json
+++ /dev/null
@@ -1,20 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "HTMLTOTEXT_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_HTMLTOTEXT", "USE_HTMLTOTEXT"],
-      "description": "Enable HTML to text conversion"
-    },
-    "HTMLTOTEXT_TIMEOUT": {
-      "type": "integer",
-      "default": 30,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for HTML to text conversion in seconds"
-    }
-  }
-}
diff --git a/archivebox/plugins/htmltotext/on_Snapshot__58_htmltotext.py b/archivebox/plugins/htmltotext/on_Snapshot__58_htmltotext.py
deleted file mode 100644
index 30134446..00000000
--- a/archivebox/plugins/htmltotext/on_Snapshot__58_htmltotext.py
+++ /dev/null
@@ -1,161 +0,0 @@
-#!/usr/bin/env python3
-"""
-Convert HTML to plain text for search indexing.
-
-This extractor reads HTML from other extractors (wget, singlefile, dom)
-and converts it to plain text for full-text search.
-
-Usage: on_Snapshot__htmltotext.py --url=<url> --snapshot-id=<uuid>
-Output: Writes htmltotext.txt to $PWD
-
-Environment variables:
-    TIMEOUT: Timeout in seconds (not used, but kept for consistency)
-
-Note: This extractor does not require any external binaries.
-      It uses Python's built-in html.parser module.
-"""
-
-import json
-import os
-import re
-import sys
-from html.parser import HTMLParser
-from pathlib import Path
-
-import rich_click as click
-
-
-# Extractor metadata
-PLUGIN_NAME = 'htmltotext'
-OUTPUT_DIR = '.'
-OUTPUT_FILE = 'htmltotext.txt'
-
-
-class HTMLTextExtractor(HTMLParser):
-    """Extract text content from HTML, ignoring scripts/styles."""
-
-    def __init__(self):
-        super().__init__()
-        self.result = []
-        self.skip_tags = {'script', 'style', 'head', 'meta', 'link', 'noscript'}
-        self.current_tag = None
-
-    def handle_starttag(self, tag, attrs):
-        self.current_tag = tag.lower()
-
-    def handle_endtag(self, tag):
-        self.current_tag = None
-
-    def handle_data(self, data):
-        if self.current_tag not in self.skip_tags:
-            text = data.strip()
-            if text:
-                self.result.append(text)
-
-    def get_text(self) -> str:
-        return ' '.join(self.result)
-
-
-def html_to_text(html: str) -> str:
-    """Convert HTML to plain text."""
-    parser = HTMLTextExtractor()
-    try:
-        parser.feed(html)
-        return parser.get_text()
-    except Exception:
-        # Fallback: strip HTML tags with regex
-        text = re.sub(r'<script[^>]*>.*?</script>', '', html, flags=re.DOTALL | re.IGNORECASE)
-        text = re.sub(r'<style[^>]*>.*?</style>', '', text, flags=re.DOTALL | re.IGNORECASE)
-        text = re.sub(r'<[^>]+>', ' ', text)
-        text = re.sub(r'\s+', ' ', text)
-        return text.strip()
-
-
-def find_html_source() -> str | None:
-    """Find HTML content from other extractors in the snapshot directory."""
-    # Hooks run in snapshot_dir, sibling extractor outputs are in subdirectories
-    search_patterns = [
-        'singlefile/singlefile.html',
-        '*_singlefile/singlefile.html',
-        'singlefile/*.html',
-        '*_singlefile/*.html',
-        'dom/output.html',
-        '*_dom/output.html',
-        'dom/*.html',
-        '*_dom/*.html',
-        'wget/**/*.html',
-        '*_wget/**/*.html',
-        'wget/**/*.htm',
-        '*_wget/**/*.htm',
-    ]
-
-    for base in (Path.cwd(), Path.cwd().parent):
-        for pattern in search_patterns:
-            matches = list(base.glob(pattern))
-            for match in matches:
-                if match.is_file() and match.stat().st_size > 0:
-                    try:
-                        return match.read_text(errors='ignore')
-                    except Exception:
-                        continue
-
-    return None
-
-
-def extract_htmltotext(url: str) -> tuple[bool, str | None, str]:
-    """
-    Extract plain text from HTML sources.
-
-    Returns: (success, output_path, error_message)
-    """
-    # Find HTML source from other extractors
-    html_content = find_html_source()
-    if not html_content:
-        return False, None, 'No HTML source found (run singlefile, dom, or wget first)'
-
-    # Convert HTML to text
-    text = html_to_text(html_content)
-
-    if not text or len(text) < 10:
-        return False, None, 'No meaningful text extracted from HTML'
-
-    # Output directory is current directory (hook already runs in output dir)
-    output_dir = Path(OUTPUT_DIR)
-    output_path = output_dir / OUTPUT_FILE
-    output_path.write_text(text, encoding='utf-8')
-
-    return True, str(output_path), ''
-
-
-@click.command()
-@click.option('--url', required=True, help='URL that was archived')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
-def main(url: str, snapshot_id: str):
-    """Convert HTML to plain text for search indexing."""
-
-    try:
-        # Run extraction
-        success, output, error = extract_htmltotext(url)
-
-        if success:
-            # Success - emit ArchiveResult
-            result = {
-                'type': 'ArchiveResult',
-                'status': 'succeeded',
-                'output_str': output or ''
-            }
-            print(json.dumps(result))
-            sys.exit(0)
-        else:
-            # Transient error - emit NO JSONL
-            print(f'ERROR: {error}', file=sys.stderr)
-            sys.exit(1)
-
-    except Exception as e:
-        # Transient error - emit NO JSONL
-        print(f'ERROR: {type(e).__name__}: {e}', file=sys.stderr)
-        sys.exit(1)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/htmltotext/templates/icon.html b/archivebox/plugins/htmltotext/templates/icon.html
deleted file mode 100644
index d1c8c78d..00000000
--- a/archivebox/plugins/htmltotext/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--htmltotext" title="HTML to Text"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M4 7h16"/><path d="M4 12h12"/><path d="M4 17h14"/></svg></span>
diff --git a/archivebox/plugins/htmltotext/tests/test_htmltotext.py b/archivebox/plugins/htmltotext/tests/test_htmltotext.py
deleted file mode 100644
index 7d59fdd1..00000000
--- a/archivebox/plugins/htmltotext/tests/test_htmltotext.py
+++ /dev/null
@@ -1,84 +0,0 @@
-"""
-Integration tests for htmltotext plugin
-
-Tests verify standalone htmltotext extractor execution.
-"""
-
-import json
-import subprocess
-import sys
-import tempfile
-from pathlib import Path
-import pytest
-
-PLUGIN_DIR = Path(__file__).parent.parent
-HTMLTOTEXT_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_htmltotext.*'), None)
-TEST_URL = 'https://example.com'
-
-def test_hook_script_exists():
-    assert HTMLTOTEXT_HOOK.exists()
-
-def test_extracts_text_from_html():
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-        # Create HTML source
-        (tmpdir / 'singlefile').mkdir()
-        (tmpdir / 'singlefile' / 'singlefile.html').write_text('<html><body><h1>Example Domain</h1><p>This domain is for examples.</p></body></html>')
-
-        result = subprocess.run(
-            [sys.executable, str(HTMLTOTEXT_HOOK), '--url', TEST_URL, '--snapshot-id', 'test789'],
-            cwd=tmpdir, capture_output=True, text=True, timeout=30
-        )
-
-        assert result.returncode == 0, f"Extraction failed: {result.stderr}"
-
-        # Parse clean JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-        # Verify output file (hook writes to current directory)
-        output_file = tmpdir / 'htmltotext.txt'
-        assert output_file.exists(), f"htmltotext.txt not created. Files: {list(tmpdir.iterdir())}"
-        content = output_file.read_text()
-        assert len(content) > 0, "Content should not be empty"
-        assert 'Example Domain' in content, "Should contain text from HTML"
-
-def test_fails_gracefully_without_html():
-    with tempfile.TemporaryDirectory() as tmpdir:
-        result = subprocess.run(
-            [sys.executable, str(HTMLTOTEXT_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
-            cwd=tmpdir, capture_output=True, text=True, timeout=30
-        )
-
-        # Should exit with non-zero or emit failure JSONL
-        # Parse clean JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        if result_json:
-            # Should report failure or skip since no HTML source
-            assert result_json['status'] in ['failed', 'skipped'], f"Should fail or skip without HTML: {result_json}"
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/infiniscroll/config.json b/archivebox/plugins/infiniscroll/config.json
deleted file mode 100644
index 5954ff11..00000000
--- a/archivebox/plugins/infiniscroll/config.json
+++ /dev/null
@@ -1,51 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "required_plugins": ["chrome"],
-  "properties": {
-    "INFINISCROLL_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_INFINISCROLL", "USE_INFINISCROLL"],
-      "description": "Enable infinite scroll page expansion"
-    },
-    "INFINISCROLL_TIMEOUT": {
-      "type": "integer",
-      "default": 120,
-      "minimum": 10,
-      "x-fallback": "TIMEOUT",
-      "description": "Maximum timeout for scrolling in seconds"
-    },
-    "INFINISCROLL_SCROLL_DELAY": {
-      "type": "integer",
-      "default": 2000,
-      "minimum": 500,
-      "description": "Delay between scrolls in milliseconds"
-    },
-    "INFINISCROLL_SCROLL_DISTANCE": {
-      "type": "integer",
-      "default": 1600,
-      "minimum": 100,
-      "description": "Distance to scroll per step in pixels"
-    },
-    "INFINISCROLL_SCROLL_LIMIT": {
-      "type": "integer",
-      "default": 10,
-      "minimum": 1,
-      "maximum": 100,
-      "description": "Maximum number of scroll steps"
-    },
-    "INFINISCROLL_MIN_HEIGHT": {
-      "type": "integer",
-      "default": 16000,
-      "minimum": 1000,
-      "description": "Minimum page height to scroll to in pixels"
-    },
-    "INFINISCROLL_EXPAND_DETAILS": {
-      "type": "boolean",
-      "default": true,
-      "description": "Expand <details> elements and click 'load more' buttons for comments"
-    }
-  }
-}
diff --git a/archivebox/plugins/infiniscroll/on_Snapshot__45_infiniscroll.js b/archivebox/plugins/infiniscroll/on_Snapshot__45_infiniscroll.js
deleted file mode 100755
index 8275d61c..00000000
--- a/archivebox/plugins/infiniscroll/on_Snapshot__45_infiniscroll.js
+++ /dev/null
@@ -1,427 +0,0 @@
-#!/usr/bin/env node
-/**
- * Scroll the page down to trigger infinite scroll / lazy loading.
- *
- * Scrolls down 1 page at a time, up to INFINISCROLL_SCROLL_LIMIT times,
- * ensuring at least INFINISCROLL_MIN_HEIGHT (default 16,000px) is reached.
- * Stops early if no new content loads after a scroll.
- *
- * Optionally expands <details> elements and clicks "load more" buttons.
- *
- * Usage: on_Snapshot__45_infiniscroll.js --url=<url> --snapshot-id=<uuid>
- * Output: JSONL with scroll stats (no files created)
- *
- * Environment variables:
- *     INFINISCROLL_ENABLED: Enable/disable (default: true)
- *     INFINISCROLL_TIMEOUT: Max timeout in seconds (default: 120)
- *     INFINISCROLL_SCROLL_DELAY: Delay between scrolls in ms (default: 2000)
- *     INFINISCROLL_SCROLL_DISTANCE: Pixels per scroll (default: 1600)
- *     INFINISCROLL_SCROLL_LIMIT: Max scroll iterations (default: 10)
- *     INFINISCROLL_MIN_HEIGHT: Min page height to reach in px (default: 16000)
- *     INFINISCROLL_EXPAND_DETAILS: Expand <details> and comments (default: true)
- */
-
-const fs = require('fs');
-const path = require('path');
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-
-const {
-    getEnv,
-    getEnvBool,
-    getEnvInt,
-} = require('../chrome/chrome_utils.js');
-
-// Check if infiniscroll is enabled BEFORE requiring puppeteer
-if (!getEnvBool('INFINISCROLL_ENABLED', true)) {
-    console.error('Skipping infiniscroll (INFINISCROLL_ENABLED=False)');
-    process.exit(0);
-}
-
-const puppeteer = require('puppeteer-core');
-
-const PLUGIN_NAME = 'infiniscroll';
-const CHROME_SESSION_DIR = '../chrome';
-const CHROME_SESSION_REQUIRED_ERROR = 'No Chrome session found (chrome plugin must run first)';
-
-function parseArgs() {
-    const args = {};
-    process.argv.slice(2).forEach(arg => {
-        if (arg.startsWith('--')) {
-            const [key, ...valueParts] = arg.slice(2).split('=');
-            args[key.replace(/-/g, '_')] = valueParts.join('=') || true;
-        }
-    });
-    return args;
-}
-
-function getCdpUrl() {
-    const cdpFile = path.join(CHROME_SESSION_DIR, 'cdp_url.txt');
-    if (fs.existsSync(cdpFile)) {
-        return fs.readFileSync(cdpFile, 'utf8').trim();
-    }
-    return null;
-}
-
-function getPageId() {
-    const targetIdFile = path.join(CHROME_SESSION_DIR, 'target_id.txt');
-    if (fs.existsSync(targetIdFile)) {
-        return fs.readFileSync(targetIdFile, 'utf8').trim();
-    }
-    return null;
-}
-
-async function waitForChromeTabLoaded(timeoutMs = 60000) {
-    const navigationFile = path.join(CHROME_SESSION_DIR, 'navigation.json');
-    const startTime = Date.now();
-
-    while (Date.now() - startTime < timeoutMs) {
-        if (fs.existsSync(navigationFile)) {
-            return true;
-        }
-        await new Promise(resolve => setTimeout(resolve, 100));
-    }
-    return false;
-}
-
-function sleep(ms) {
-    return new Promise(resolve => setTimeout(resolve, ms));
-}
-
-/**
- * Expand <details> elements and click "load more" buttons for comments.
- * Based on archivebox.ts expandComments function.
- */
-async function expandDetails(page, options = {}) {
-    const {
-        timeout = 30000,
-        limit = 500,
-        delay = 500,
-    } = options;
-
-    const startTime = Date.now();
-
-    // First, expand all <details> elements
-    const detailsExpanded = await page.evaluate(() => {
-        let count = 0;
-        // Generic <details> elements
-        document.querySelectorAll('details:not([open])').forEach(el => {
-            el.open = true;
-            count++;
-        });
-        // Github README details sections
-        document.querySelectorAll('article details:not([open])').forEach(el => {
-            el.open = true;
-            count++;
-        });
-        // Github issue discussion hidden comments
-        document.querySelectorAll('div.js-discussion details:not(.details-overlay):not([open])').forEach(el => {
-            el.open = true;
-            count++;
-        });
-        // HedgeDoc/Markdown details sections
-        document.querySelectorAll('.markdown-body details:not([open])').forEach(el => {
-            el.open = true;
-            count++;
-        });
-        return count;
-    });
-
-    if (detailsExpanded > 0) {
-        console.error(`Expanded ${detailsExpanded} <details> elements`);
-    }
-
-    // Then click "load more" buttons for comments
-    const numExpanded = await page.evaluate(async ({ timeout, limit, delay }) => {
-        // Helper to find elements by XPath
-        function getElementsByXPath(xpath) {
-            const results = [];
-            const xpathResult = document.evaluate(
-                xpath,
-                document,
-                null,
-                XPathResult.ORDERED_NODE_ITERATOR_TYPE,
-                null
-            );
-            let node;
-            while ((node = xpathResult.iterateNext()) != null) {
-                results.push(node);
-            }
-            return results;
-        }
-
-        const wait = (ms) => new Promise(res => setTimeout(res, ms));
-
-        // Find all "load more" type buttons/links
-        const getLoadMoreLinks = () => [
-            // Reddit (new)
-            ...document.querySelectorAll('faceplate-partial[loading=action]'),
-            // Reddit (old) - show more replies
-            ...document.querySelectorAll('a[onclick^="return morechildren"]'),
-            // Reddit (old) - show hidden replies
-            ...document.querySelectorAll('a[onclick^="return togglecomment"]'),
-            // Twitter/X - show more replies
-            ...getElementsByXPath("//*[text()='Show more replies']"),
-            ...getElementsByXPath("//*[text()='Show replies']"),
-            // Generic "load more" / "show more" buttons
-            ...getElementsByXPath("//*[contains(text(),'Load more')]"),
-            ...getElementsByXPath("//*[contains(text(),'Show more')]"),
-            // Hacker News
-            ...document.querySelectorAll('a.morelink'),
-        ];
-
-        let expanded = 0;
-        let loadMoreLinks = getLoadMoreLinks();
-        const startTime = Date.now();
-
-        while (loadMoreLinks.length > 0) {
-            for (const link of loadMoreLinks) {
-                // Skip certain elements
-                if (link.slot === 'children') continue;
-
-                try {
-                    link.scrollIntoView({ behavior: 'smooth' });
-                    link.click();
-                    expanded++;
-                    await wait(delay);
-                } catch (e) {
-                    // Ignore click errors
-                }
-
-                // Check limits
-                if (expanded >= limit) return expanded;
-                if (Date.now() - startTime >= timeout) return expanded;
-            }
-
-            // Check for new load more links after clicking
-            await wait(delay);
-            loadMoreLinks = getLoadMoreLinks();
-        }
-
-        return expanded;
-    }, { timeout, limit, delay });
-
-    if (numExpanded > 0) {
-        console.error(`Clicked ${numExpanded} "load more" buttons`);
-    }
-
-    return {
-        detailsExpanded,
-        commentsExpanded: numExpanded,
-        total: detailsExpanded + numExpanded,
-    };
-}
-
-async function scrollDown(page, options = {}) {
-    const {
-        timeout = 120000,
-        scrollDelay = 2000,
-        scrollDistance = 1600,
-        scrollLimit = 10,
-        minHeight = 16000,
-    } = options;
-
-    const startTime = Date.now();
-
-    // Get page height using multiple methods (some pages use different scroll containers)
-    const getPageHeight = () => page.evaluate(() => {
-        return Math.max(
-            document.body.scrollHeight || 0,
-            document.body.offsetHeight || 0,
-            document.documentElement.scrollHeight || 0,
-            document.documentElement.offsetHeight || 0
-        );
-    });
-
-    const startingHeight = await getPageHeight();
-    let lastHeight = startingHeight;
-    let scrollCount = 0;
-    let scrollPosition = 0;
-
-    console.error(`Initial page height: ${startingHeight}px`);
-
-    // Scroll to top first
-    await page.evaluate(() => {
-        window.scrollTo({ top: 0, left: 0, behavior: 'smooth' });
-    });
-    await sleep(500);
-
-    while (scrollCount < scrollLimit) {
-        // Check timeout
-        const elapsed = Date.now() - startTime;
-        if (elapsed >= timeout) {
-            console.error(`Timeout reached after ${scrollCount} scrolls`);
-            break;
-        }
-
-        scrollPosition = (scrollCount + 1) * scrollDistance;
-        console.error(`Scrolling down ${scrollCount + 1}x ${scrollDistance}px... (${scrollPosition}/${lastHeight})`);
-
-        await page.evaluate((yOffset) => {
-            window.scrollTo({ top: yOffset, left: 0, behavior: 'smooth' });
-        }, scrollPosition);
-
-        scrollCount++;
-        await sleep(scrollDelay);
-
-        // Check if new content was added (infinite scroll detection)
-        const newHeight = await getPageHeight();
-        const addedPx = newHeight - lastHeight;
-
-        if (addedPx > 0) {
-            console.error(`Detected infini-scrolling: ${lastHeight}+${addedPx} => ${newHeight}`);
-        } else if (scrollPosition >= newHeight + scrollDistance) {
-            // Reached the bottom
-            if (scrollCount > 2) {
-                console.error(`Reached bottom of page at ${newHeight}px`);
-                break;
-            }
-        }
-
-        lastHeight = newHeight;
-
-        // Check if we've reached minimum height and can stop
-        if (lastHeight >= minHeight && scrollPosition >= lastHeight) {
-            console.error(`Reached minimum height target (${minHeight}px)`);
-            break;
-        }
-    }
-
-    // Scroll to absolute bottom
-    if (scrollPosition < lastHeight) {
-        await page.evaluate(() => {
-            window.scrollTo({ top: document.documentElement.scrollHeight, left: 0, behavior: 'smooth' });
-        });
-        await sleep(scrollDelay);
-    }
-
-    // Scroll back to top
-    console.error(`Reached bottom of page at ${lastHeight}px, scrolling back to top...`);
-    await page.evaluate(() => {
-        window.scrollTo({ top: 0, left: 0, behavior: 'smooth' });
-    });
-    await sleep(scrollDelay);
-
-    const totalElapsed = Date.now() - startTime;
-
-    return {
-        scrollCount,
-        finalHeight: lastHeight,
-        startingHeight,
-        elapsedMs: totalElapsed,
-    };
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-    const snapshotId = args.snapshot_id;
-
-    if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__45_infiniscroll.js --url=<url> --snapshot-id=<uuid>');
-        process.exit(1);
-    }
-
-    const timeout = getEnvInt('INFINISCROLL_TIMEOUT', 120) * 1000;
-    const scrollDelay = getEnvInt('INFINISCROLL_SCROLL_DELAY', 2000);
-    const scrollDistance = getEnvInt('INFINISCROLL_SCROLL_DISTANCE', 1600);
-    const scrollLimit = getEnvInt('INFINISCROLL_SCROLL_LIMIT', 10);
-    const minHeight = getEnvInt('INFINISCROLL_MIN_HEIGHT', 16000);
-    const expandDetailsEnabled = getEnvBool('INFINISCROLL_EXPAND_DETAILS', true);
-
-    const cdpUrl = getCdpUrl();
-    if (!cdpUrl) {
-        console.error(CHROME_SESSION_REQUIRED_ERROR);
-        process.exit(1);
-    }
-
-    // Wait for page to be loaded
-    const pageLoaded = await waitForChromeTabLoaded(60000);
-    if (!pageLoaded) {
-        console.error('ERROR: Page not loaded after 60s (chrome_navigate must complete first)');
-        process.exit(1);
-    }
-
-    let browser = null;
-    try {
-        browser = await puppeteer.connect({ browserWSEndpoint: cdpUrl });
-
-        const pages = await browser.pages();
-        if (pages.length === 0) {
-            throw new Error('No pages found in browser');
-        }
-
-        // Find the right page by target ID
-        const targetId = getPageId();
-        let page = null;
-        if (targetId) {
-            page = pages.find(p => {
-                const target = p.target();
-                return target && target._targetId === targetId;
-            });
-        }
-        if (!page) {
-            page = pages[pages.length - 1];
-        }
-
-        console.error(`Starting infinite scroll on ${url}`);
-
-        // Expand <details> and comments before scrolling (if enabled)
-        let expandResult = { total: 0, detailsExpanded: 0, commentsExpanded: 0 };
-        if (expandDetailsEnabled) {
-            console.error('Expanding <details> and comments...');
-            expandResult = await expandDetails(page, {
-                timeout: Math.min(timeout / 4, 30000),
-                limit: 500,
-                delay: scrollDelay / 4,
-            });
-        }
-
-        const result = await scrollDown(page, {
-            timeout,
-            scrollDelay,
-            scrollDistance,
-            scrollLimit,
-            minHeight,
-        });
-
-        // Expand again after scrolling (new content may have loaded)
-        if (expandDetailsEnabled) {
-            const expandResult2 = await expandDetails(page, {
-                timeout: Math.min(timeout / 4, 30000),
-                limit: 500,
-                delay: scrollDelay / 4,
-            });
-            expandResult.total += expandResult2.total;
-            expandResult.detailsExpanded += expandResult2.detailsExpanded;
-            expandResult.commentsExpanded += expandResult2.commentsExpanded;
-        }
-
-        browser.disconnect();
-
-        const elapsedSec = (result.elapsedMs / 1000).toFixed(1);
-        const finalHeightStr = result.finalHeight.toLocaleString();
-        const addedHeight = result.finalHeight - result.startingHeight;
-        const addedStr = addedHeight > 0 ? `+${addedHeight.toLocaleString()}px new content` : 'no new content';
-        const expandStr = expandResult.total > 0 ? `, expanded ${expandResult.total}` : '';
-        const outputStr = `scrolled to ${finalHeightStr}px (${addedStr}${expandStr}) over ${elapsedSec}s`;
-
-        console.error(`Success: ${outputStr}`);
-        console.log(JSON.stringify({
-            type: 'ArchiveResult',
-            status: 'succeeded',
-            output_str: outputStr,
-        }));
-        process.exit(0);
-
-    } catch (e) {
-        if (browser) browser.disconnect();
-        console.error(`ERROR: ${e.name}: ${e.message}`);
-        process.exit(1);
-    }
-}
-
-main().catch(e => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
diff --git a/archivebox/plugins/infiniscroll/templates/icon.html b/archivebox/plugins/infiniscroll/templates/icon.html
deleted file mode 100644
index 7de95bf4..00000000
--- a/archivebox/plugins/infiniscroll/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--infiniscroll" title="Infinite Scroll"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M12 5v9"/><path d="M8 10l4 4 4-4"/><circle cx="6" cy="19" r="1" fill="currentColor" stroke="none"/><circle cx="12" cy="19" r="1" fill="currentColor" stroke="none"/><circle cx="18" cy="19" r="1" fill="currentColor" stroke="none"/></svg></span>
diff --git a/archivebox/plugins/infiniscroll/tests/test_infiniscroll.py b/archivebox/plugins/infiniscroll/tests/test_infiniscroll.py
deleted file mode 100644
index a2c1cb58..00000000
--- a/archivebox/plugins/infiniscroll/tests/test_infiniscroll.py
+++ /dev/null
@@ -1,245 +0,0 @@
-"""
-Integration tests for infiniscroll plugin
-
-Tests verify:
-1. Hook script exists
-2. Dependencies installed via chrome validation hooks
-3. Verify deps with abx-pkg
-4. INFINISCROLL_ENABLED=False skips without JSONL
-5. Fails gracefully when no chrome session exists
-6. Full integration test: scrolls page and outputs stats
-7. Config options work (scroll limit, min height)
-"""
-
-import json
-import os
-import re
-import subprocess
-import time
-import tempfile
-from pathlib import Path
-
-import pytest
-
-# Import shared Chrome test helpers
-from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-    get_test_env,
-    chrome_session,
-)
-
-
-PLUGIN_DIR = Path(__file__).parent.parent
-INFINISCROLL_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_infiniscroll.*'), None)
-TEST_URL = 'https://www.singsing.movie/'
-
-
-def test_hook_script_exists():
-    """Verify on_Snapshot hook exists."""
-    assert INFINISCROLL_HOOK is not None, "Infiniscroll hook not found"
-    assert INFINISCROLL_HOOK.exists(), f"Hook not found: {INFINISCROLL_HOOK}"
-
-
-def test_verify_deps_with_abx_pkg():
-    """Verify dependencies are available via abx-pkg after hook installation."""
-    from abx_pkg import Binary, EnvProvider, BinProviderOverrides
-
-    EnvProvider.model_rebuild()
-
-    # Verify node is available
-    node_binary = Binary(name='node', binproviders=[EnvProvider()])
-    node_loaded = node_binary.load()
-    assert node_loaded and node_loaded.abspath, "Node.js required for infiniscroll plugin"
-
-
-def test_config_infiniscroll_disabled_skips():
-    """Test that INFINISCROLL_ENABLED=False exits without emitting JSONL."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-        env = get_test_env()
-        env['INFINISCROLL_ENABLED'] = 'False'
-
-        result = subprocess.run(
-            ['node', str(INFINISCROLL_HOOK), f'--url={TEST_URL}', '--snapshot-id=test-disabled'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=30
-        )
-
-        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
-        assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
-
-        # Should NOT emit any JSONL
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, got: {jsonl_lines}"
-
-
-def test_fails_gracefully_without_chrome_session():
-    """Test that hook fails gracefully when no chrome session exists."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-        infiniscroll_dir = tmpdir / 'snapshot' / 'infiniscroll'
-        infiniscroll_dir.mkdir(parents=True, exist_ok=True)
-
-        result = subprocess.run(
-            ['node', str(INFINISCROLL_HOOK), f'--url={TEST_URL}', '--snapshot-id=test-no-chrome'],
-            cwd=infiniscroll_dir,
-            capture_output=True,
-            text=True,
-            env=get_test_env(),
-            timeout=30
-        )
-
-        # Should fail (exit 1) when no chrome session
-        assert result.returncode != 0, "Should fail when no chrome session exists"
-        # Error could be about chrome/CDP not found, or puppeteer module missing
-        err_lower = result.stderr.lower()
-        assert any(x in err_lower for x in ['chrome', 'cdp', 'puppeteer', 'module']), \
-            f"Should mention chrome/CDP/puppeteer in error: {result.stderr}"
-
-
-def test_scrolls_page_and_outputs_stats():
-    """Integration test: scroll page and verify JSONL output format."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        with chrome_session(
-            Path(tmpdir),
-            crawl_id='test-infiniscroll',
-            snapshot_id='snap-infiniscroll',
-            test_url=TEST_URL,
-        ) as (chrome_launch_process, chrome_pid, snapshot_chrome_dir, env):
-            # Create infiniscroll output directory (sibling to chrome)
-            infiniscroll_dir = snapshot_chrome_dir.parent / 'infiniscroll'
-            infiniscroll_dir.mkdir()
-
-            # Run infiniscroll hook
-            env['INFINISCROLL_SCROLL_LIMIT'] = '3'  # Limit scrolls for faster test
-            env['INFINISCROLL_SCROLL_DELAY'] = '500'  # Faster scrolling
-            env['INFINISCROLL_MIN_HEIGHT'] = '1000'  # Lower threshold for test
-
-            result = subprocess.run(
-                ['node', str(INFINISCROLL_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-infiniscroll'],
-                cwd=str(infiniscroll_dir),
-                capture_output=True,
-                text=True,
-                timeout=60,
-                env=env
-            )
-
-            assert result.returncode == 0, f"Infiniscroll failed: {result.stderr}\nStdout: {result.stdout}"
-
-            # Parse JSONL output
-            result_json = None
-            for line in result.stdout.strip().split('\n'):
-                line = line.strip()
-                if line.startswith('{'):
-                    try:
-                        record = json.loads(line)
-                        if record.get('type') == 'ArchiveResult':
-                            result_json = record
-                            break
-                    except json.JSONDecodeError:
-                        pass
-
-            assert result_json is not None, f"Should have ArchiveResult JSONL output. Stdout: {result.stdout}"
-            assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-            # Verify output_str format: "scrolled to X,XXXpx (+Y,YYYpx new content) over Z.Zs"
-            output_str = result_json.get('output_str', '')
-            assert output_str.startswith('scrolled to'), f"output_str should start with 'scrolled to': {output_str}"
-            assert 'px' in output_str, f"output_str should contain pixel count: {output_str}"
-            assert re.search(r'over \d+(\.\d+)?s', output_str), f"output_str should contain duration: {output_str}"
-
-            # Verify no files created in output directory
-            output_files = list(infiniscroll_dir.iterdir())
-            assert len(output_files) == 0, f"Should not create any files, but found: {output_files}"
-
-
-def test_config_scroll_limit_honored():
-    """Test that INFINISCROLL_SCROLL_LIMIT config is respected."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        with chrome_session(
-            Path(tmpdir),
-            crawl_id='test-scroll-limit',
-            snapshot_id='snap-limit',
-            test_url=TEST_URL,
-        ) as (chrome_launch_process, chrome_pid, snapshot_chrome_dir, env):
-
-            infiniscroll_dir = snapshot_chrome_dir.parent / 'infiniscroll'
-            infiniscroll_dir.mkdir()
-
-            # Set scroll limit to 2 (use env from setup_chrome_session)
-            env['INFINISCROLL_SCROLL_LIMIT'] = '2'
-            env['INFINISCROLL_SCROLL_DELAY'] = '500'
-            env['INFINISCROLL_MIN_HEIGHT'] = '100000'  # High threshold so limit kicks in
-
-            result = subprocess.run(
-                ['node', str(INFINISCROLL_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-limit'],
-                cwd=str(infiniscroll_dir),
-                capture_output=True,
-                text=True,
-                timeout=60,
-                env=env
-            )
-
-            assert result.returncode == 0, f"Infiniscroll failed: {result.stderr}"
-
-            # Parse output and verify scroll count
-            result_json = None
-            for line in result.stdout.strip().split('\n'):
-                if line.strip().startswith('{'):
-                    try:
-                        record = json.loads(line)
-                        if record.get('type') == 'ArchiveResult':
-                            result_json = record
-                            break
-                    except json.JSONDecodeError:
-                        pass
-
-            assert result_json is not None, "Should have JSONL output"
-            output_str = result_json.get('output_str', '')
-
-            # Verify output format and that it completed (scroll limit enforced internally)
-            assert output_str.startswith('scrolled to'), f"Should have valid output_str: {output_str}"
-            assert result_json['status'] == 'succeeded', f"Should succeed with scroll limit: {result_json}"
-
-
-
-def test_config_timeout_honored():
-    """Test that INFINISCROLL_TIMEOUT config is respected."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        with chrome_session(
-            Path(tmpdir),
-            crawl_id='test-timeout',
-            snapshot_id='snap-timeout',
-            test_url=TEST_URL,
-        ) as (chrome_launch_process, chrome_pid, snapshot_chrome_dir, env):
-
-            infiniscroll_dir = snapshot_chrome_dir.parent / 'infiniscroll'
-            infiniscroll_dir.mkdir()
-
-            # Set very short timeout (use env from setup_chrome_session)
-            env['INFINISCROLL_TIMEOUT'] = '3'  # 3 seconds
-            env['INFINISCROLL_SCROLL_DELAY'] = '2000'  # 2s delay - timeout should trigger
-            env['INFINISCROLL_SCROLL_LIMIT'] = '100'  # High limit
-            env['INFINISCROLL_MIN_HEIGHT'] = '100000'
-
-            start_time = time.time()
-            result = subprocess.run(
-                ['node', str(INFINISCROLL_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-timeout'],
-                cwd=str(infiniscroll_dir),
-                capture_output=True,
-                text=True,
-                timeout=30,
-                env=env
-            )
-            elapsed = time.time() - start_time
-
-            # Should complete within reasonable time (timeout + buffer)
-            assert elapsed < 15, f"Should respect timeout, took {elapsed:.1f}s"
-            assert result.returncode == 0, f"Should complete even with timeout: {result.stderr}"
-
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/istilldontcareaboutcookies/config.json b/archivebox/plugins/istilldontcareaboutcookies/config.json
deleted file mode 100644
index 44c488b0..00000000
--- a/archivebox/plugins/istilldontcareaboutcookies/config.json
+++ /dev/null
@@ -1,14 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "required_plugins": ["chrome"],
-  "properties": {
-    "ISTILLDONTCAREABOUTCOOKIES_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["USE_ISTILLDONTCAREABOUTCOOKIES"],
-      "description": "Enable I Still Don't Care About Cookies browser extension"
-    }
-  }
-}
diff --git a/archivebox/plugins/istilldontcareaboutcookies/on_Crawl__81_install_istilldontcareaboutcookies_extension.js b/archivebox/plugins/istilldontcareaboutcookies/on_Crawl__81_install_istilldontcareaboutcookies_extension.js
deleted file mode 100755
index ab29cdac..00000000
--- a/archivebox/plugins/istilldontcareaboutcookies/on_Crawl__81_install_istilldontcareaboutcookies_extension.js
+++ /dev/null
@@ -1,115 +0,0 @@
-#!/usr/bin/env node
-/**
- * I Still Don't Care About Cookies Extension Plugin
- *
- * Installs and configures the "I still don't care about cookies" Chrome extension
- * for automatic cookie consent banner dismissal during page archiving.
- *
- * Extension: https://chromewebstore.google.com/detail/edibdbjcniadpccecjdfdjjppcpchdlm
- *
- * Priority: 81 - Must install before Chrome session starts at Crawl level
- * Hook: on_Crawl (runs once per crawl, not per snapshot)
- *
- * This extension automatically:
- * - Dismisses cookie consent popups
- * - Removes cookie banners
- * - Accepts necessary cookies to proceed with browsing
- * - Works on thousands of websites out of the box
- */
-
-const path = require('path');
-const fs = require('fs');
-
-// Import extension utilities
-const extensionUtils = require('../chrome/chrome_utils.js');
-
-// Extension metadata
-const EXTENSION = {
-    webstore_id: 'edibdbjcniadpccecjdfdjjppcpchdlm',
-    name: 'istilldontcareaboutcookies',
-};
-
-// Get extensions directory from environment or use default
-const EXTENSIONS_DIR = process.env.CHROME_EXTENSIONS_DIR ||
-    path.join(process.env.DATA_DIR || './data', 'personas', process.env.ACTIVE_PERSONA || 'Default', 'chrome_extensions');
-
-/**
- * Install the I Still Don't Care About Cookies extension
- */
-async function installCookiesExtension() {
-    console.log('[*] Installing I Still Don\'t Care About Cookies extension...');
-
-    // Install the extension
-    const extension = await extensionUtils.loadOrInstallExtension(EXTENSION, EXTENSIONS_DIR);
-
-    if (!extension) {
-        console.error('[❌] Failed to install I Still Don\'t Care About Cookies extension');
-        return null;
-    }
-
-    console.log('[+] I Still Don\'t Care About Cookies extension installed');
-    console.log('[+] Cookie banners will be automatically dismissed during archiving');
-
-    return extension;
-}
-
-/**
- * Note: This extension works out of the box with no configuration needed.
- * It automatically detects and dismisses cookie banners on page load.
- */
-
-/**
- * Main entry point - install extension before archiving
- */
-async function main() {
-    // Check if extension is already cached
-    const cacheFile = path.join(EXTENSIONS_DIR, 'istilldontcareaboutcookies.extension.json');
-
-    if (fs.existsSync(cacheFile)) {
-        try {
-            const cached = JSON.parse(fs.readFileSync(cacheFile, 'utf-8'));
-            const manifestPath = path.join(cached.unpacked_path, 'manifest.json');
-
-            if (fs.existsSync(manifestPath)) {
-                console.log('[*] I Still Don\'t Care About Cookies extension already installed (using cache)');
-                return cached;
-            }
-        } catch (e) {
-            // Cache file corrupted, re-install
-            console.warn('[⚠️] Extension cache corrupted, re-installing...');
-        }
-    }
-
-    // Install extension
-    const extension = await installCookiesExtension();
-
-    // Export extension metadata for chrome plugin to load
-    if (extension) {
-        // Write extension info to a cache file that chrome plugin can read
-        await fs.promises.mkdir(EXTENSIONS_DIR, { recursive: true });
-        await fs.promises.writeFile(
-            cacheFile,
-            JSON.stringify(extension, null, 2)
-        );
-        console.log(`[+] Extension metadata written to ${cacheFile}`);
-    }
-
-    return extension;
-}
-
-// Export functions for use by other plugins
-module.exports = {
-    EXTENSION,
-    installCookiesExtension,
-};
-
-// Run if executed directly
-if (require.main === module) {
-    main().then(() => {
-        console.log('[✓] I Still Don\'t Care About Cookies extension setup complete');
-        process.exit(0);
-    }).catch(err => {
-        console.error('[❌] I Still Don\'t Care About Cookies extension setup failed:', err);
-        process.exit(1);
-    });
-}
diff --git a/archivebox/plugins/istilldontcareaboutcookies/templates/icon.html b/archivebox/plugins/istilldontcareaboutcookies/templates/icon.html
deleted file mode 100644
index e69de29b..00000000
diff --git a/archivebox/plugins/istilldontcareaboutcookies/tests/test_istilldontcareaboutcookies.py b/archivebox/plugins/istilldontcareaboutcookies/tests/test_istilldontcareaboutcookies.py
deleted file mode 100644
index 1371b5c7..00000000
--- a/archivebox/plugins/istilldontcareaboutcookies/tests/test_istilldontcareaboutcookies.py
+++ /dev/null
@@ -1,641 +0,0 @@
-"""
-Unit tests for istilldontcareaboutcookies plugin
-
-Tests invoke the plugin hook as an external process and verify outputs/side effects.
-"""
-
-import json
-import os
-import signal
-import subprocess
-import tempfile
-import time
-from pathlib import Path
-
-import pytest
-
-from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-    setup_test_env,
-    get_test_env,
-    launch_chromium_session,
-    kill_chromium_session,
-    CHROME_LAUNCH_HOOK,
-    PLUGINS_ROOT,
-)
-
-
-PLUGIN_DIR = Path(__file__).parent.parent
-INSTALL_SCRIPT = next(PLUGIN_DIR.glob('on_Crawl__*_install_istilldontcareaboutcookies_extension.*'), None)
-
-
-def test_install_script_exists():
-    """Verify install script exists"""
-    assert INSTALL_SCRIPT.exists(), f"Install script not found: {INSTALL_SCRIPT}"
-
-
-def test_extension_metadata():
-    """Test that extension has correct metadata"""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        env = os.environ.copy()
-        env["CHROME_EXTENSIONS_DIR"] = str(Path(tmpdir) / "chrome_extensions")
-
-        result = subprocess.run(
-            ["node", "-e", f"const ext = require('{INSTALL_SCRIPT}'); console.log(JSON.stringify(ext.EXTENSION))"],
-            capture_output=True,
-            text=True,
-            env=env
-        )
-
-        assert result.returncode == 0, f"Failed to load extension metadata: {result.stderr}"
-
-        metadata = json.loads(result.stdout)
-        assert metadata["webstore_id"] == "edibdbjcniadpccecjdfdjjppcpchdlm"
-        assert metadata["name"] == "istilldontcareaboutcookies"
-
-
-def test_install_creates_cache():
-    """Test that install creates extension cache"""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        ext_dir = Path(tmpdir) / "chrome_extensions"
-        ext_dir.mkdir(parents=True)
-
-        env = os.environ.copy()
-        env["CHROME_EXTENSIONS_DIR"] = str(ext_dir)
-
-        result = subprocess.run(
-            ["node", str(INSTALL_SCRIPT)],
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=60
-        )
-
-        # Check output mentions installation
-        assert "Installing" in result.stdout or "installed" in result.stdout or "istilldontcareaboutcookies" in result.stdout
-
-        # Check cache file was created
-        cache_file = ext_dir / "istilldontcareaboutcookies.extension.json"
-        assert cache_file.exists(), "Cache file should be created"
-
-        # Verify cache content
-        cache_data = json.loads(cache_file.read_text())
-        assert cache_data["webstore_id"] == "edibdbjcniadpccecjdfdjjppcpchdlm"
-        assert cache_data["name"] == "istilldontcareaboutcookies"
-
-
-def test_install_uses_existing_cache():
-    """Test that install uses existing cache when available"""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        ext_dir = Path(tmpdir) / "chrome_extensions"
-        ext_dir.mkdir(parents=True)
-
-        # Create fake cache
-        fake_extension_dir = ext_dir / "edibdbjcniadpccecjdfdjjppcpchdlm__istilldontcareaboutcookies"
-        fake_extension_dir.mkdir(parents=True)
-
-        manifest = {"version": "1.1.8", "name": "I still don't care about cookies"}
-        (fake_extension_dir / "manifest.json").write_text(json.dumps(manifest))
-
-        env = os.environ.copy()
-        env["CHROME_EXTENSIONS_DIR"] = str(ext_dir)
-
-        result = subprocess.run(
-            ["node", str(INSTALL_SCRIPT)],
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=30
-        )
-
-        # Should use cache or install successfully
-        assert result.returncode == 0
-
-
-def test_no_configuration_required():
-    """Test that extension works without any configuration"""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        ext_dir = Path(tmpdir) / "chrome_extensions"
-        ext_dir.mkdir(parents=True)
-
-        env = os.environ.copy()
-        env["CHROME_EXTENSIONS_DIR"] = str(ext_dir)
-        # No special env vars needed - works out of the box
-
-        result = subprocess.run(
-            ["node", str(INSTALL_SCRIPT)],
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=60
-        )
-
-        # Should not require any API keys or configuration
-        assert "API" not in (result.stdout + result.stderr) or result.returncode == 0
-
-
-TEST_URL = 'https://www.filmin.es/'
-
-
-def test_extension_loads_in_chromium():
-    """Verify extension loads in Chromium by visiting its options page.
-
-    Uses Chromium with --load-extension to load the extension, then navigates
-    to chrome-extension://<id>/options.html and checks that the extension name
-    appears in the page content.
-    """
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Set up isolated env with proper directory structure
-        env = setup_test_env(tmpdir)
-        env.setdefault('CHROME_HEADLESS', 'true')
-
-        ext_dir = Path(env['CHROME_EXTENSIONS_DIR'])
-
-        # Step 1: Install the extension
-        result = subprocess.run(
-            ['node', str(INSTALL_SCRIPT)],
-            cwd=str(tmpdir),
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=60
-        )
-        assert result.returncode == 0, f"Extension install failed: {result.stderr}"
-
-        # Verify extension cache was created
-        cache_file = ext_dir / 'istilldontcareaboutcookies.extension.json'
-        assert cache_file.exists(), "Extension cache not created"
-        ext_data = json.loads(cache_file.read_text())
-        print(f"Extension installed: {ext_data.get('name')} v{ext_data.get('version')}")
-
-        # Step 2: Launch Chromium using the chrome hook (loads extensions automatically)
-        crawl_id = 'test-cookies'
-        crawl_dir = Path(env['CRAWLS_DIR']) / crawl_id
-        crawl_dir.mkdir(parents=True, exist_ok=True)
-        chrome_dir = crawl_dir / 'chrome'
-        chrome_dir.mkdir(parents=True, exist_ok=True)
-        env['CRAWL_OUTPUT_DIR'] = str(crawl_dir)
-
-        chrome_launch_process = subprocess.Popen(
-            ['node', str(CHROME_LAUNCH_HOOK), f'--crawl-id={crawl_id}'],
-            cwd=str(chrome_dir),
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-            env=env
-        )
-
-        # Wait for Chromium to launch and CDP URL to be available
-        cdp_url = None
-        for i in range(20):
-            if chrome_launch_process.poll() is not None:
-                stdout, stderr = chrome_launch_process.communicate()
-                raise RuntimeError(f"Chromium launch failed:\nStdout: {stdout}\nStderr: {stderr}")
-            cdp_file = chrome_dir / 'cdp_url.txt'
-            if cdp_file.exists():
-                cdp_url = cdp_file.read_text().strip()
-                break
-            time.sleep(1)
-
-        assert cdp_url, "Chromium CDP URL not found after 20s"
-        print(f"Chromium launched with CDP URL: {cdp_url}")
-
-        # Check that extensions were loaded
-        extensions_file = chrome_dir / 'extensions.json'
-        if extensions_file.exists():
-            loaded_exts = json.loads(extensions_file.read_text())
-            print(f"Extensions loaded: {[e.get('name') for e in loaded_exts]}")
-
-        try:
-            # Step 3: Connect to Chromium and verify extension loaded via options page
-            test_script = f'''
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-const puppeteer = require('puppeteer-core');
-
-(async () => {{
-    const browser = await puppeteer.connect({{ browserWSEndpoint: '{cdp_url}' }});
-
-    // Wait for extension to initialize
-    await new Promise(r => setTimeout(r, 2000));
-
-    // Find extension targets to get the extension ID
-    const targets = browser.targets();
-    const extTargets = targets.filter(t =>
-        t.url().startsWith('chrome-extension://') ||
-        t.type() === 'service_worker' ||
-        t.type() === 'background_page'
-    );
-
-    // Filter out Chrome's built-in extensions
-    const builtinIds = ['nkeimhogjdpnpccoofpliimaahmaaome', 'fignfifoniblkonapihmkfakmlgkbkcf',
-                       'ahfgeienlihckogmohjhadlkjgocpleb', 'mhjfbmdgcfjbbpaeojofohoefgiehjai'];
-    const customExtTargets = extTargets.filter(t => {{
-        const url = t.url();
-        if (!url.startsWith('chrome-extension://')) return false;
-        const extId = url.split('://')[1].split('/')[0];
-        return !builtinIds.includes(extId);
-    }});
-
-    console.error('Custom extension targets found:', customExtTargets.length);
-    customExtTargets.forEach(t => console.error('  -', t.type(), t.url()));
-
-    if (customExtTargets.length === 0) {{
-        console.log(JSON.stringify({{ loaded: false, error: 'No custom extension targets found' }}));
-        browser.disconnect();
-        return;
-    }}
-
-    // Get the extension ID from the first custom extension target
-    const extUrl = customExtTargets[0].url();
-    const extId = extUrl.split('://')[1].split('/')[0];
-    console.error('Extension ID:', extId);
-
-    // Try to navigate to the extension's options.html page
-    const page = await browser.newPage();
-    const optionsUrl = 'chrome-extension://' + extId + '/options.html';
-    console.error('Navigating to options page:', optionsUrl);
-
-    try {{
-        await page.goto(optionsUrl, {{ waitUntil: 'domcontentloaded', timeout: 10000 }});
-        const pageContent = await page.content();
-        const pageTitle = await page.title();
-
-        // Check if extension name appears in the page
-        const hasExtensionName = pageContent.toLowerCase().includes('cookie') ||
-                                pageContent.toLowerCase().includes('idontcareaboutcookies') ||
-                                pageTitle.toLowerCase().includes('cookie');
-
-        console.log(JSON.stringify({{
-            loaded: true,
-            extensionId: extId,
-            optionsPageLoaded: true,
-            pageTitle: pageTitle,
-            hasExtensionName: hasExtensionName,
-            contentLength: pageContent.length
-        }}));
-    }} catch (e) {{
-        // options.html may not exist, but extension is still loaded
-        console.log(JSON.stringify({{
-            loaded: true,
-            extensionId: extId,
-            optionsPageLoaded: false,
-            error: e.message
-        }}));
-    }}
-
-    browser.disconnect();
-}})();
-'''
-            script_path = tmpdir / 'test_extension.js'
-            script_path.write_text(test_script)
-
-            result = subprocess.run(
-                ['node', str(script_path)],
-                cwd=str(tmpdir),
-                capture_output=True,
-                text=True,
-                env=env,
-                timeout=90
-            )
-
-            print(f"stderr: {result.stderr}")
-            print(f"stdout: {result.stdout}")
-
-            assert result.returncode == 0, f"Test failed: {result.stderr}"
-
-            output_lines = [l for l in result.stdout.strip().split('\n') if l.startswith('{')]
-            assert output_lines, f"No JSON output: {result.stdout}"
-
-            test_result = json.loads(output_lines[-1])
-            assert test_result.get('loaded'), \
-                f"Extension should be loaded in Chromium. Result: {test_result}"
-            print(f"Extension loaded successfully: {test_result}")
-
-        finally:
-            # Clean up Chromium
-            try:
-                chrome_launch_process.send_signal(signal.SIGTERM)
-                chrome_launch_process.wait(timeout=5)
-            except:
-                pass
-            chrome_pid_file = chrome_dir / 'chrome.pid'
-            if chrome_pid_file.exists():
-                try:
-                    chrome_pid = int(chrome_pid_file.read_text().strip())
-                    os.kill(chrome_pid, signal.SIGKILL)
-                except (OSError, ValueError):
-                    pass
-
-
-def check_cookie_consent_visibility(cdp_url: str, test_url: str, env: dict, script_dir: Path) -> dict:
-    """Check if cookie consent elements are visible on a page.
-
-    Returns dict with:
-        - visible: bool - whether any cookie consent element is visible
-        - selector: str - which selector matched (if visible)
-        - elements_found: list - all cookie-related elements found in DOM
-        - html_snippet: str - snippet of the page HTML for debugging
-    """
-    test_script = f'''
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-const puppeteer = require('puppeteer-core');
-
-(async () => {{
-    const browser = await puppeteer.connect({{ browserWSEndpoint: '{cdp_url}' }});
-
-    const page = await browser.newPage();
-    await page.setUserAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
-    await page.setViewport({{ width: 1440, height: 900 }});
-
-    console.error('Navigating to {test_url}...');
-    await page.goto('{test_url}', {{ waitUntil: 'networkidle2', timeout: 30000 }});
-
-    // Wait for page to fully render and any cookie scripts to run
-    await new Promise(r => setTimeout(r, 3000));
-
-    // Check cookie consent visibility using multiple common selectors
-    const result = await page.evaluate(() => {{
-        // Common cookie consent selectors used by various consent management platforms
-        const selectors = [
-            // CookieYes
-            '.cky-consent-container', '.cky-popup-center', '.cky-overlay', '.cky-modal',
-            // OneTrust
-            '#onetrust-consent-sdk', '#onetrust-banner-sdk', '.onetrust-pc-dark-filter',
-            // Cookiebot
-            '#CybotCookiebotDialog', '#CybotCookiebotDialogBodyUnderlay',
-            // Generic cookie banners
-            '[class*="cookie-consent"]', '[class*="cookie-banner"]', '[class*="cookie-notice"]',
-            '[class*="cookie-popup"]', '[class*="cookie-modal"]', '[class*="cookie-dialog"]',
-            '[id*="cookie-consent"]', '[id*="cookie-banner"]', '[id*="cookie-notice"]',
-            '[id*="cookieconsent"]', '[id*="cookie-law"]',
-            // GDPR banners
-            '[class*="gdpr"]', '[id*="gdpr"]',
-            // Consent banners
-            '[class*="consent-banner"]', '[class*="consent-modal"]', '[class*="consent-popup"]',
-            // Privacy banners
-            '[class*="privacy-banner"]', '[class*="privacy-notice"]',
-            // Common frameworks
-            '.cc-window', '.cc-banner', '#cc-main',  // Cookie Consent by Insites
-            '.qc-cmp2-container',  // Quantcast
-            '.sp-message-container',  // SourcePoint
-        ];
-
-        const elementsFound = [];
-        let visibleElement = null;
-
-        for (const sel of selectors) {{
-            try {{
-                const elements = document.querySelectorAll(sel);
-                for (const el of elements) {{
-                    const style = window.getComputedStyle(el);
-                    const rect = el.getBoundingClientRect();
-                    const isVisible = style.display !== 'none' &&
-                                     style.visibility !== 'hidden' &&
-                                     style.opacity !== '0' &&
-                                     rect.width > 0 && rect.height > 0;
-
-                    elementsFound.push({{
-                        selector: sel,
-                        visible: isVisible,
-                        display: style.display,
-                        visibility: style.visibility,
-                        opacity: style.opacity,
-                        width: rect.width,
-                        height: rect.height
-                    }});
-
-                    if (isVisible && !visibleElement) {{
-                        visibleElement = {{ selector: sel, width: rect.width, height: rect.height }};
-                    }}
-                }}
-            }} catch (e) {{
-                // Invalid selector, skip
-            }}
-        }}
-
-        // Also grab a snippet of the HTML to help debug
-        const bodyHtml = document.body.innerHTML.slice(0, 2000);
-        const hasCookieKeyword = bodyHtml.toLowerCase().includes('cookie') ||
-                                  bodyHtml.toLowerCase().includes('consent') ||
-                                  bodyHtml.toLowerCase().includes('gdpr');
-
-        return {{
-            visible: visibleElement !== null,
-            selector: visibleElement ? visibleElement.selector : null,
-            elements_found: elementsFound,
-            has_cookie_keyword_in_html: hasCookieKeyword,
-            html_snippet: bodyHtml.slice(0, 500)
-        }};
-    }});
-
-    console.error('Cookie consent check result:', JSON.stringify({{
-        visible: result.visible,
-        selector: result.selector,
-        elements_found_count: result.elements_found.length
-    }}));
-
-    browser.disconnect();
-    console.log(JSON.stringify(result));
-}})();
-'''
-    script_path = script_dir / 'check_cookies.js'
-    script_path.write_text(test_script)
-
-    result = subprocess.run(
-        ['node', str(script_path)],
-        cwd=str(script_dir),
-        capture_output=True,
-        text=True,
-        env=env,
-        timeout=90
-    )
-
-    if result.returncode != 0:
-        raise RuntimeError(f"Cookie check script failed: {result.stderr}")
-
-    output_lines = [l for l in result.stdout.strip().split('\n') if l.startswith('{')]
-    if not output_lines:
-        raise RuntimeError(f"No JSON output from cookie check: {result.stdout}\nstderr: {result.stderr}")
-
-    return json.loads(output_lines[-1])
-
-
-def test_hides_cookie_consent_on_filmin():
-    """Live test: verify extension hides cookie consent popup on filmin.es.
-
-    This test runs TWO browser sessions:
-    1. WITHOUT extension - verifies cookie consent IS visible (baseline)
-    2. WITH extension - verifies cookie consent is HIDDEN
-
-    This ensures we're actually testing the extension's effect, not just
-    that a page happens to not have cookie consent.
-    """
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Set up isolated env with proper directory structure
-        env_base = setup_test_env(tmpdir)
-        env_base['CHROME_HEADLESS'] = 'true'
-
-        ext_dir = Path(env_base['CHROME_EXTENSIONS_DIR'])
-
-        # ============================================================
-        # STEP 1: BASELINE - Run WITHOUT extension, verify cookie consent IS visible
-        # ============================================================
-        print("\n" + "="*60)
-        print("STEP 1: BASELINE TEST (no extension)")
-        print("="*60)
-
-        data_dir = Path(env_base['DATA_DIR'])
-
-        env_no_ext = env_base.copy()
-        env_no_ext['CHROME_EXTENSIONS_DIR'] = str(data_dir / 'personas' / 'Default' / 'empty_extensions')
-        (data_dir / 'personas' / 'Default' / 'empty_extensions').mkdir(parents=True, exist_ok=True)
-
-        # Launch baseline Chromium in crawls directory
-        baseline_crawl_id = 'baseline-no-ext'
-        baseline_crawl_dir = Path(env_base['CRAWLS_DIR']) / baseline_crawl_id
-        baseline_crawl_dir.mkdir(parents=True, exist_ok=True)
-        baseline_chrome_dir = baseline_crawl_dir / 'chrome'
-        env_no_ext['CRAWL_OUTPUT_DIR'] = str(baseline_crawl_dir)
-        baseline_process = None
-
-        try:
-            baseline_process, baseline_cdp_url = launch_chromium_session(
-                env_no_ext, baseline_chrome_dir, baseline_crawl_id
-            )
-            print(f"Baseline Chromium launched: {baseline_cdp_url}")
-
-            # Wait a moment for browser to be ready
-            time.sleep(2)
-
-            baseline_result = check_cookie_consent_visibility(
-                baseline_cdp_url, TEST_URL, env_no_ext, tmpdir
-            )
-
-            print(f"Baseline result: visible={baseline_result['visible']}, "
-                  f"elements_found={len(baseline_result['elements_found'])}")
-
-            if baseline_result['elements_found']:
-                print("Elements found in baseline:")
-                for el in baseline_result['elements_found'][:5]:  # Show first 5
-                    print(f"  - {el['selector']}: visible={el['visible']}, "
-                          f"display={el['display']}, size={el['width']}x{el['height']}")
-
-        finally:
-            if baseline_process:
-                kill_chromium_session(baseline_process, baseline_chrome_dir)
-
-        # Verify baseline shows cookie consent
-        if not baseline_result['visible']:
-            # If no cookie consent visible in baseline, we can't test the extension
-            # This could happen if:
-            # - The site changed and no longer shows cookie consent
-            # - Cookie consent is region-specific
-            # - Our selectors don't match this site
-            print("\nWARNING: No cookie consent visible in baseline!")
-            print(f"HTML has cookie keywords: {baseline_result.get('has_cookie_keyword_in_html')}")
-            print(f"HTML snippet: {baseline_result.get('html_snippet', '')[:200]}")
-
-            pytest.fail(
-                f"Cannot test extension: no cookie consent visible in baseline on {TEST_URL}. "
-                f"Elements found: {len(baseline_result['elements_found'])}. "
-                f"The site may have changed or cookie consent may be region-specific."
-            )
-
-        print(f"\n✓ Baseline confirmed: Cookie consent IS visible (selector: {baseline_result['selector']})")
-
-        # ============================================================
-        # STEP 2: Install the extension
-        # ============================================================
-        print("\n" + "="*60)
-        print("STEP 2: INSTALLING EXTENSION")
-        print("="*60)
-
-        env_with_ext = env_base.copy()
-        env_with_ext['CHROME_EXTENSIONS_DIR'] = str(ext_dir)
-
-        result = subprocess.run(
-            ['node', str(INSTALL_SCRIPT)],
-            cwd=str(tmpdir),
-            capture_output=True,
-            text=True,
-            env=env_with_ext,
-            timeout=60
-        )
-        assert result.returncode == 0, f"Extension install failed: {result.stderr}"
-
-        cache_file = ext_dir / 'istilldontcareaboutcookies.extension.json'
-        assert cache_file.exists(), "Extension cache not created"
-        ext_data = json.loads(cache_file.read_text())
-        print(f"Extension installed: {ext_data.get('name')} v{ext_data.get('version')}")
-
-        # ============================================================
-        # STEP 3: Run WITH extension, verify cookie consent is HIDDEN
-        # ============================================================
-        print("\n" + "="*60)
-        print("STEP 3: TEST WITH EXTENSION")
-        print("="*60)
-
-        # Launch extension test Chromium in crawls directory
-        ext_crawl_id = 'test-with-ext'
-        ext_crawl_dir = Path(env_base['CRAWLS_DIR']) / ext_crawl_id
-        ext_crawl_dir.mkdir(parents=True, exist_ok=True)
-        ext_chrome_dir = ext_crawl_dir / 'chrome'
-        env_with_ext['CRAWL_OUTPUT_DIR'] = str(ext_crawl_dir)
-        ext_process = None
-
-        try:
-            ext_process, ext_cdp_url = launch_chromium_session(
-                env_with_ext, ext_chrome_dir, ext_crawl_id
-            )
-            print(f"Extension Chromium launched: {ext_cdp_url}")
-
-            # Check that extension was loaded
-            extensions_file = ext_chrome_dir / 'extensions.json'
-            if extensions_file.exists():
-                loaded_exts = json.loads(extensions_file.read_text())
-                print(f"Extensions loaded: {[e.get('name') for e in loaded_exts]}")
-
-            # Wait for extension to initialize
-            time.sleep(3)
-
-            ext_result = check_cookie_consent_visibility(
-                ext_cdp_url, TEST_URL, env_with_ext, tmpdir
-            )
-
-            print(f"Extension result: visible={ext_result['visible']}, "
-                  f"elements_found={len(ext_result['elements_found'])}")
-
-            if ext_result['elements_found']:
-                print("Elements found with extension:")
-                for el in ext_result['elements_found'][:5]:
-                    print(f"  - {el['selector']}: visible={el['visible']}, "
-                          f"display={el['display']}, size={el['width']}x{el['height']}")
-
-        finally:
-            if ext_process:
-                kill_chromium_session(ext_process, ext_chrome_dir)
-
-        # ============================================================
-        # STEP 4: Compare results
-        # ============================================================
-        print("\n" + "="*60)
-        print("STEP 4: COMPARISON")
-        print("="*60)
-        print(f"Baseline (no extension): cookie consent visible = {baseline_result['visible']}")
-        print(f"With extension: cookie consent visible = {ext_result['visible']}")
-
-        assert baseline_result['visible'], \
-            "Baseline should show cookie consent (this shouldn't happen, we checked above)"
-
-        assert not ext_result['visible'], \
-            f"Cookie consent should be HIDDEN by extension.\n" \
-            f"Baseline showed consent at: {baseline_result['selector']}\n" \
-            f"But with extension, consent is still visible.\n" \
-            f"Elements still visible: {[e for e in ext_result['elements_found'] if e['visible']]}"
-
-        print("\n✓ SUCCESS: Extension correctly hides cookie consent!")
-        print(f"  - Baseline showed consent at: {baseline_result['selector']}")
-        print(f"  - Extension successfully hid it")
diff --git a/archivebox/plugins/mercury/config.json b/archivebox/plugins/mercury/config.json
deleted file mode 100644
index 039c38a7..00000000
--- a/archivebox/plugins/mercury/config.json
+++ /dev/null
@@ -1,40 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "MERCURY_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_MERCURY", "USE_MERCURY"],
-      "description": "Enable Mercury text extraction"
-    },
-    "MERCURY_BINARY": {
-      "type": "string",
-      "default": "postlight-parser",
-      "x-aliases": ["POSTLIGHT_PARSER_BINARY"],
-      "description": "Path to Mercury/Postlight parser binary"
-    },
-    "MERCURY_TIMEOUT": {
-      "type": "integer",
-      "default": 30,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for Mercury in seconds"
-    },
-    "MERCURY_ARGS": {
-      "type": "array",
-      "items": {"type": "string"},
-      "default": [],
-      "x-aliases": ["MERCURY_DEFAULT_ARGS"],
-      "description": "Default Mercury parser arguments"
-    },
-    "MERCURY_ARGS_EXTRA": {
-      "type": "array",
-      "items": {"type": "string"},
-      "default": [],
-      "x-aliases": ["MERCURY_EXTRA_ARGS"],
-      "description": "Extra arguments to append to Mercury parser command"
-    }
-  }
-}
diff --git a/archivebox/plugins/mercury/on_Crawl__40_mercury_install.py b/archivebox/plugins/mercury/on_Crawl__40_mercury_install.py
deleted file mode 100755
index 7ec64d8b..00000000
--- a/archivebox/plugins/mercury/on_Crawl__40_mercury_install.py
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/usr/bin/env python3
-"""
-Emit postlight-parser Binary dependency for the crawl.
-"""
-
-import json
-import os
-import sys
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def output_binary(name: str, binproviders: str):
-    """Output Binary JSONL record for a dependency."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,
-        'overrides': {
-            'npm': {
-                'packages': ['@postlight/parser'],
-            }
-        },
-        'machine_id': machine_id,
-    }
-    print(json.dumps(record))
-
-
-def main():
-    mercury_enabled = get_env_bool('MERCURY_ENABLED', True)
-
-    if not mercury_enabled:
-        sys.exit(0)
-
-    output_binary(name='postlight-parser', binproviders='npm,env')
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/mercury/on_Snapshot__57_mercury.py b/archivebox/plugins/mercury/on_Snapshot__57_mercury.py
deleted file mode 100644
index 1af0bdb6..00000000
--- a/archivebox/plugins/mercury/on_Snapshot__57_mercury.py
+++ /dev/null
@@ -1,200 +0,0 @@
-#!/usr/bin/env python3
-"""
-Extract article content using Postlight's Mercury Parser.
-
-Usage: on_Snapshot__mercury.py --url=<url> --snapshot-id=<uuid>
-Output: Creates mercury/ directory with content.html, content.txt, article.json
-
-Environment variables:
-    MERCURY_BINARY: Path to postlight-parser binary
-    MERCURY_TIMEOUT: Timeout in seconds (default: 60)
-    MERCURY_ARGS: Default Mercury arguments (JSON array)
-    MERCURY_ARGS_EXTRA: Extra arguments to append (JSON array)
-    TIMEOUT: Fallback timeout
-
-Note: Requires postlight-parser: npm install -g @postlight/parser
-"""
-
-import html
-import json
-import os
-import subprocess
-import sys
-from pathlib import Path
-from urllib.parse import urlparse
-
-import rich_click as click
-
-
-# Extractor metadata
-PLUGIN_NAME = 'mercury'
-BIN_NAME = 'postlight-parser'
-BIN_PROVIDERS = 'npm,env'
-OUTPUT_DIR = '.'
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def get_env_int(name: str, default: int = 0) -> int:
-    try:
-        return int(get_env(name, str(default)))
-    except ValueError:
-        return default
-
-
-def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
-    """Parse a JSON array from environment variable."""
-    val = get_env(name, '')
-    if not val:
-        return default if default is not None else []
-    try:
-        result = json.loads(val)
-        if isinstance(result, list):
-            return [str(item) for item in result]
-        return default if default is not None else []
-    except json.JSONDecodeError:
-        return default if default is not None else []
-
-
-def extract_mercury(url: str, binary: str) -> tuple[bool, str | None, str]:
-    """
-    Extract article using Mercury Parser.
-
-    Returns: (success, output_path, error_message)
-    """
-    timeout = get_env_int('MERCURY_TIMEOUT') or get_env_int('TIMEOUT', 60)
-    mercury_args = get_env_array('MERCURY_ARGS', [])
-    mercury_args_extra = get_env_array('MERCURY_ARGS_EXTRA', [])
-
-    # Output directory is current directory (hook already runs in output dir)
-    output_dir = Path(OUTPUT_DIR)
-
-    try:
-        # Get text version
-        cmd_text = [binary, *mercury_args, *mercury_args_extra, url, '--format=text']
-        result_text = subprocess.run(cmd_text, stdout=subprocess.PIPE, timeout=timeout, text=True)
-        if result_text.stdout:
-            sys.stderr.write(result_text.stdout)
-            sys.stderr.flush()
-
-        if result_text.returncode != 0:
-            return False, None, f'postlight-parser failed (exit={result_text.returncode})'
-
-        try:
-            text_json = json.loads(result_text.stdout)
-        except json.JSONDecodeError:
-            return False, None, 'postlight-parser returned invalid JSON'
-
-        if text_json.get('failed'):
-            return False, None, 'Mercury was not able to extract article'
-
-        # Save text content
-        text_content = text_json.get('content', '')
-        (output_dir / 'content.txt').write_text(text_content, encoding='utf-8')
-
-        # Get HTML version
-        cmd_html = [binary, *mercury_args, *mercury_args_extra, url, '--format=html']
-        result_html = subprocess.run(cmd_html, stdout=subprocess.PIPE, timeout=timeout, text=True)
-        if result_html.stdout:
-            sys.stderr.write(result_html.stdout)
-            sys.stderr.flush()
-
-        try:
-            html_json = json.loads(result_html.stdout)
-        except json.JSONDecodeError:
-            html_json = {}
-
-        # Save HTML content and metadata
-        html_content = html_json.pop('content', '')
-        # Some sources return HTML-escaped markup inside the content blob.
-        # If it looks heavily escaped, unescape once so it renders properly.
-        if html_content:
-            escaped_count = html_content.count('&lt;') + html_content.count('&gt;')
-            tag_count = html_content.count('<')
-            if escaped_count and escaped_count > tag_count * 2:
-                html_content = html.unescape(html_content)
-        (output_dir / 'content.html').write_text(html_content, encoding='utf-8')
-
-        # Save article metadata
-        metadata = {k: v for k, v in text_json.items() if k != 'content'}
-        (output_dir / 'article.json').write_text(json.dumps(metadata, indent=2), encoding='utf-8')
-
-        # Link images/ to responses capture (if available)
-        try:
-            hostname = urlparse(url).hostname or ''
-            if hostname:
-                responses_images = (output_dir / '..' / 'responses' / 'image' / hostname / 'images').resolve()
-                link_path = output_dir / 'images'
-                if responses_images.exists() and responses_images.is_dir():
-                    if link_path.exists() or link_path.is_symlink():
-                        if link_path.is_symlink() or link_path.is_file():
-                            link_path.unlink()
-                        else:
-                            # Don't remove real directories
-                            responses_images = None
-                    if responses_images:
-                        rel_target = os.path.relpath(str(responses_images), str(output_dir))
-                        link_path.symlink_to(rel_target)
-        except Exception:
-            pass
-
-        return True, 'content.html', ''
-
-    except subprocess.TimeoutExpired:
-        return False, None, f'Timed out after {timeout} seconds'
-    except Exception as e:
-        return False, None, f'{type(e).__name__}: {e}'
-
-
-@click.command()
-@click.option('--url', required=True, help='URL to extract article from')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
-def main(url: str, snapshot_id: str):
-    """Extract article content using Postlight's Mercury Parser."""
-
-    try:
-        # Check if mercury extraction is enabled
-        if not get_env_bool('MERCURY_ENABLED', True):
-            print('Skipping mercury (MERCURY_ENABLED=False)', file=sys.stderr)
-            # Temporary failure (config disabled) - NO JSONL emission
-            sys.exit(0)
-
-        # Get binary from environment
-        binary = get_env('MERCURY_BINARY', 'postlight-parser')
-
-        # Run extraction
-        success, output, error = extract_mercury(url, binary)
-
-        if success:
-            # Success - emit ArchiveResult
-            result = {
-                'type': 'ArchiveResult',
-                'status': 'succeeded',
-                'output_str': output or ''
-            }
-            print(json.dumps(result))
-            sys.exit(0)
-        else:
-            # Transient error - emit NO JSONL
-            print(f'ERROR: {error}', file=sys.stderr)
-            sys.exit(1)
-
-    except Exception as e:
-        # Transient error - emit NO JSONL
-        print(f'ERROR: {type(e).__name__}: {e}', file=sys.stderr)
-        sys.exit(1)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/mercury/templates/card.html b/archivebox/plugins/mercury/templates/card.html
deleted file mode 100644
index cf7cdb40..00000000
--- a/archivebox/plugins/mercury/templates/card.html
+++ /dev/null
@@ -1,8 +0,0 @@
-<!-- Mercury thumbnail - shows Mercury parser extracted article content -->
-<div class="extractor-thumbnail mercury-thumbnail" style="width: 100%; height: 100px; overflow: hidden; background: #fefefe; padding: 8px; font-family: Georgia, serif; font-size: 11px; line-height: 1.4; color: #333;">
-    <iframe src="{{ output_path }}"
-            style="width: 100%; height: 300px; border: none; pointer-events: none;"
-            loading="lazy"
-            sandbox="allow-same-origin">
-    </iframe>
-</div>
diff --git a/archivebox/plugins/mercury/templates/icon.html b/archivebox/plugins/mercury/templates/icon.html
deleted file mode 100644
index bd17e0cf..00000000
--- a/archivebox/plugins/mercury/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--mercury" title="Mercury"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><rect x="3" y="5" width="18" height="14" rx="2"/><path d="M7 9h6"/><path d="M7 13h10"/><path d="M15 9h3"/></svg></span>
diff --git a/archivebox/plugins/mercury/tests/test_mercury.py b/archivebox/plugins/mercury/tests/test_mercury.py
deleted file mode 100644
index 242eb5db..00000000
--- a/archivebox/plugins/mercury/tests/test_mercury.py
+++ /dev/null
@@ -1,163 +0,0 @@
-"""
-Integration tests for mercury plugin
-
-Tests verify:
-1. Hook script exists
-2. Dependencies installed via validation hooks
-3. Verify deps with abx-pkg
-4. Mercury extraction works on https://example.com
-5. JSONL output is correct
-6. Filesystem output contains extracted content
-7. Config options work
-"""
-
-import json
-import subprocess
-import sys
-import tempfile
-from pathlib import Path
-import pytest
-
-from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-    get_plugin_dir,
-    get_hook_script,
-    PLUGINS_ROOT,
-)
-
-
-PLUGIN_DIR = get_plugin_dir(__file__)
-MERCURY_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_mercury.*')
-TEST_URL = 'https://example.com'
-
-def test_hook_script_exists():
-    """Verify on_Snapshot hook exists."""
-    assert MERCURY_HOOK.exists(), f"Hook not found: {MERCURY_HOOK}"
-
-
-def test_verify_deps_with_abx_pkg():
-    """Verify postlight-parser is available via abx-pkg."""
-    from abx_pkg import Binary, NpmProvider, EnvProvider, BinProviderOverrides
-
-    # Verify postlight-parser is available
-    mercury_binary = Binary(
-        name='postlight-parser',
-        binproviders=[NpmProvider(), EnvProvider()],
-        overrides={'npm': {'packages': ['@postlight/parser']}}
-    )
-    mercury_loaded = mercury_binary.load()
-
-    # If validate hook found it (exit 0), this should succeed
-    # If validate hook didn't find it (exit 1), this may fail unless binprovider installed it
-    if mercury_loaded and mercury_loaded.abspath:
-        assert True, "postlight-parser is available"
-    else:
-        pass
-
-def test_extracts_with_mercury_parser():
-    """Test full workflow: extract with postlight-parser from real HTML via hook."""
-    # Prerequisites checked by earlier test
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Create HTML source that mercury can parse
-        (tmpdir / 'singlefile').mkdir()
-        (tmpdir / 'singlefile' / 'singlefile.html').write_text(
-            '<html><head><title>Test Article</title></head><body>'
-            '<article><h1>Example Article</h1><p>This is test content for mercury parser.</p></article>'
-            '</body></html>'
-        )
-
-        # Run mercury extraction hook
-        result = subprocess.run(
-            [sys.executable, str(MERCURY_HOOK), '--url', TEST_URL, '--snapshot-id', 'test789'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            timeout=60
-        )
-
-        assert result.returncode == 0, f"Extraction failed: {result.stderr}"
-
-        # Parse clean JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                pass
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-        # Verify filesystem output (hook writes to current directory)
-        output_file = tmpdir / 'content.html'
-        assert output_file.exists(), "content.html not created"
-
-        content = output_file.read_text()
-        assert len(content) > 0, "Output should not be empty"
-
-def test_config_save_mercury_false_skips():
-    """Test that MERCURY_ENABLED=False exits without emitting JSONL."""
-    import os
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        env = os.environ.copy()
-        env['MERCURY_ENABLED'] = 'False'
-
-        result = subprocess.run(
-            [sys.executable, str(MERCURY_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=30
-        )
-
-        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
-
-        # Feature disabled - temporary failure, should NOT emit JSONL
-        assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
-
-        # Should NOT emit any JSONL
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
-
-
-def test_fails_gracefully_without_html():
-    """Test that mercury works even without HTML source (fetches URL directly)."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        result = subprocess.run(
-            [sys.executable, str(MERCURY_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            timeout=30
-        )
-
-        # Mercury fetches URL directly with postlight-parser, doesn't need HTML source
-        # Parse clean JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        # Mercury should succeed or fail based on network, not based on HTML source
-        assert result_json, "Should emit ArchiveResult"
-        assert result_json['status'] in ['succeeded', 'failed'], f"Should succeed or fail: {result_json}"
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/modalcloser/config.json b/archivebox/plugins/modalcloser/config.json
deleted file mode 100644
index 7e746087..00000000
--- a/archivebox/plugins/modalcloser/config.json
+++ /dev/null
@@ -1,26 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "required_plugins": ["chrome"],
-  "properties": {
-    "MODALCLOSER_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["CLOSE_MODALS", "AUTO_CLOSE_MODALS"],
-      "description": "Enable automatic modal and dialog closing"
-    },
-    "MODALCLOSER_TIMEOUT": {
-      "type": "integer",
-      "default": 1250,
-      "minimum": 100,
-      "description": "Delay before auto-closing dialogs (ms)"
-    },
-    "MODALCLOSER_POLL_INTERVAL": {
-      "type": "integer",
-      "default": 500,
-      "minimum": 100,
-      "description": "How often to check for CSS modals (ms)"
-    }
-  }
-}
diff --git a/archivebox/plugins/modalcloser/on_Snapshot__15_modalcloser.bg.js b/archivebox/plugins/modalcloser/on_Snapshot__15_modalcloser.bg.js
deleted file mode 100644
index 7f9e664b..00000000
--- a/archivebox/plugins/modalcloser/on_Snapshot__15_modalcloser.bg.js
+++ /dev/null
@@ -1,333 +0,0 @@
-#!/usr/bin/env node
-/**
- * Auto-close browser dialogs and CSS modals.
- *
- * Runs as a background script that sets up listeners BEFORE navigation,
- * so it catches modals that appear on page load.
- *
- * Handles:
- * - Browser dialogs (alert, confirm, prompt, beforeunload)
- * - Framework modals (Bootstrap, Tailwind, shadcn, Angular Material, jQuery UI, SweetAlert)
- * - Cookie consent banners, newsletter popups, age gates
- *
- * Usage: on_Snapshot__15_modalcloser.bg.js --url=<url> --snapshot-id=<uuid>
- * Output: JSONL with modal close stats (no files created)
- * Termination: Send SIGTERM to exit cleanly
- *
- * Environment variables:
- *     MODALCLOSER_ENABLED: Enable/disable (default: true)
- *     MODALCLOSER_TIMEOUT: Delay before auto-closing dialogs in ms (default: 1250)
- *     MODALCLOSER_POLL_INTERVAL: How often to check for CSS modals in ms (default: 500)
- */
-
-const fs = require('fs');
-const path = require('path');
-
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-
-// Import shared utilities from chrome_utils.js
-const {
-    getEnvBool,
-    getEnvInt,
-    parseArgs,
-    readCdpUrl,
-    readTargetId,
-} = require('../chrome/chrome_utils.js');
-
-// Check if modalcloser is enabled BEFORE requiring puppeteer
-if (!getEnvBool('MODALCLOSER_ENABLED', true)) {
-    console.error('Skipping modalcloser (MODALCLOSER_ENABLED=False)');
-    process.exit(0);
-}
-
-const puppeteer = require('puppeteer-core');
-
-const PLUGIN_NAME = 'modalcloser';
-const CHROME_SESSION_DIR = '../chrome';
-
-function sleep(ms) {
-    return new Promise(resolve => setTimeout(resolve, ms));
-}
-
-/**
- * Close CSS modals using framework-specific dismiss methods.
- * Returns the number of modals closed.
- */
-async function closeModals(page) {
-    return page.evaluate(() => {
-        let closed = 0;
-
-        // Bootstrap 4/5 - use Bootstrap's modal API
-        if (typeof bootstrap !== 'undefined' && bootstrap.Modal) {
-            document.querySelectorAll('.modal.show').forEach(el => {
-                try {
-                    const modal = bootstrap.Modal.getInstance(el);
-                    if (modal) { modal.hide(); closed++; }
-                } catch (e) {}
-            });
-        }
-
-        // Bootstrap 3 / jQuery - use jQuery modal API
-        if (typeof jQuery !== 'undefined' && jQuery.fn && jQuery.fn.modal) {
-            try {
-                const $modals = jQuery('.modal.in, .modal.show');
-                if ($modals.length > 0) {
-                    $modals.modal('hide');
-                    closed += $modals.length;
-                }
-            } catch (e) {}
-        }
-
-        // shadcn/Radix UI - fire escape key to dismiss
-        document.querySelectorAll('[data-radix-dialog-overlay], [data-state="open"][role="dialog"]').forEach(el => {
-            try {
-                el.dispatchEvent(new KeyboardEvent('keydown', { key: 'Escape', bubbles: true, cancelable: true }));
-                closed++;
-            } catch (e) {}
-        });
-
-        // Angular Material - click backdrop to dismiss
-        document.querySelectorAll('.cdk-overlay-backdrop').forEach(el => {
-            try {
-                el.click();
-                closed++;
-            } catch (e) {}
-        });
-
-        // Tailwind / Headless UI - dispatch escape key
-        document.querySelectorAll('[role="dialog"][aria-modal="true"]').forEach(el => {
-            try {
-                el.dispatchEvent(new KeyboardEvent('keydown', { key: 'Escape', bubbles: true, cancelable: true }));
-                closed++;
-            } catch (e) {}
-        });
-
-        // jQuery UI Dialog
-        if (typeof jQuery !== 'undefined' && jQuery.ui && jQuery.ui.dialog) {
-            try {
-                const $dialogs = jQuery('.ui-dialog-content');
-                if ($dialogs.length > 0) {
-                    $dialogs.dialog('close');
-                    closed += $dialogs.length;
-                }
-            } catch (e) {}
-        }
-
-        // SweetAlert2
-        if (typeof Swal !== 'undefined' && Swal.close) {
-            try { Swal.close(); closed++; } catch (e) {}
-        }
-
-        // SweetAlert 1
-        if (typeof swal !== 'undefined' && swal.close) {
-            try { swal.close(); closed++; } catch (e) {}
-        }
-
-        // Generic fallback - hide unrecognized modals with CSS
-        const genericSelectors = [
-            // CookieYes (cky)
-            '.cky-consent-container', '.cky-popup-center', '.cky-overlay', '.cky-modal', '#ckyPreferenceCenter',
-            // OneTrust
-            '#onetrust-consent-sdk', '#onetrust-banner-sdk', '.onetrust-pc-dark-filter', '#onetrust-pc-sdk',
-            // CookieBot
-            '#CybotCookiebotDialog', '#CybotCookiebotDialogBodyUnderlay', '#CookiebotWidget',
-            // Quantcast / CMP
-            '.qc-cmp-ui-container', '#qc-cmp2-container', '.qc-cmp2-summary-buttons',
-            // TrustArc / TrustE
-            '#truste-consent-track', '.truste-banner', '#truste-consent-content',
-            // Osano
-            '.osano-cm-window', '.osano-cm-dialog',
-            // Klaro
-            '.klaro .cookie-modal', '.klaro .cookie-notice',
-            // Tarteaucitron
-            '#tarteaucitronRoot', '#tarteaucitronAlertBig',
-            // Complianz (WordPress)
-            '.cmplz-cookiebanner', '#cmplz-cookiebanner-container',
-            // GDPR Cookie Consent (WordPress)
-            '#gdpr-cookie-consent-bar', '.gdpr-cookie-consent-popup',
-            // Cookie Notice (WordPress)
-            '#cookie-notice', '.cookie-notice-container',
-            // EU Cookie Law
-            '.eupopup', '#eu-cookie-law',
-            // Didomi
-            '#didomi-popup', '#didomi-host', '.didomi-popup-container',
-            // Usercentrics
-            '#usercentrics-root', '.uc-banner',
-            // Axeptio
-            '#axeptio_overlay', '#axeptio_btn',
-            // iubenda
-            '#iubenda-cs-banner', '.iubenda-cs-container',
-            // Termly
-            '.termly-consent-banner', '#termly-code-snippet-support',
-            // Borlabs Cookie (WordPress)
-            '#BorlabsCookieBox', '.BorlabsCookie',
-            // CookieFirst
-            '.cookiefirst-root', '#cookiefirst-root',
-            // CookieScript
-            '#cookiescript_injected', '.cookiescript_injected_wrapper',
-            // Civic Cookie Control
-            '#ccc', '#ccc-overlay',
-            // Generic patterns
-            '#cookie-consent', '.cookie-banner', '.cookie-notice',
-            '#cookieConsent', '.cookie-consent', '.cookies-banner',
-            '[class*="cookie"][class*="banner"]', '[class*="cookie"][class*="notice"]',
-            '[class*="cookie"][class*="popup"]', '[class*="cookie"][class*="modal"]',
-            '[class*="consent"][class*="banner"]', '[class*="consent"][class*="popup"]',
-            '[class*="gdpr"]', '[class*="privacy"][class*="banner"]',
-            // Modal overlays and backdrops
-            '.modal-overlay:not([style*="display: none"])',
-            '.modal-backdrop:not([style*="display: none"])',
-            '.overlay-visible',
-            // Popup overlays
-            '.popup-overlay', '.newsletter-popup', '.age-gate',
-            '.subscribe-popup', '.subscription-modal',
-            // Generic modal patterns
-            '[class*="modal"][class*="open"]:not(.modal-open)',
-            '[class*="modal"][class*="show"][class*="overlay"]',
-            '[class*="modal"][class*="visible"]',
-            '[class*="dialog"][class*="open"]',
-            '[class*="overlay"][class*="visible"]',
-            // Interstitials
-            '.interstitial', '.interstitial-wrapper',
-            '[class*="interstitial"]',
-        ];
-
-        genericSelectors.forEach(selector => {
-            try {
-                document.querySelectorAll(selector).forEach(el => {
-                    // Skip if already hidden
-                    const style = window.getComputedStyle(el);
-                    if (style.display === 'none' || style.visibility === 'hidden') return;
-
-                    el.style.display = 'none';
-                    el.style.visibility = 'hidden';
-                    el.style.opacity = '0';
-                    el.style.pointerEvents = 'none';
-                    closed++;
-                });
-            } catch (e) {}
-        });
-
-        // Remove body scroll lock (common pattern when modals are open)
-        try {
-            document.body.style.overflow = '';
-            document.body.style.position = '';
-            document.body.classList.remove('modal-open', 'overflow-hidden', 'no-scroll', 'scroll-locked');
-            document.documentElement.style.overflow = '';
-            document.documentElement.classList.remove('overflow-hidden', 'no-scroll');
-        } catch (e) {}
-
-        return closed;
-    });
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-    const snapshotId = args.snapshot_id;
-
-    if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__15_modalcloser.bg.js --url=<url> --snapshot-id=<uuid>');
-        process.exit(1);
-    }
-
-    const dialogTimeout = getEnvInt('MODALCLOSER_TIMEOUT', 1250);
-    const pollInterval = getEnvInt('MODALCLOSER_POLL_INTERVAL', 500);
-
-    const cdpUrl = readCdpUrl(CHROME_SESSION_DIR);
-    if (!cdpUrl) {
-        console.error('No Chrome session found (chrome plugin must run first)');
-        process.exit(1);
-    }
-
-    let browser = null;
-    let dialogsClosed = 0;
-    let cssModalsClosed = 0;
-    let running = true;
-
-    // Handle SIGTERM for clean exit
-    process.on('SIGTERM', () => {
-        running = false;
-        const total = dialogsClosed + cssModalsClosed;
-        console.error(`Modalcloser exiting: closed ${dialogsClosed} dialogs, ${cssModalsClosed} CSS modals`);
-
-        const outputStr = total > 0
-            ? `closed ${total} modals (${dialogsClosed} dialogs, ${cssModalsClosed} CSS)`
-            : 'no modals detected';
-
-        console.log(JSON.stringify({
-            type: 'ArchiveResult',
-            status: 'succeeded',
-            output_str: outputStr,
-        }));
-
-        if (browser) browser.disconnect();
-        process.exit(0);
-    });
-
-    try {
-        browser = await puppeteer.connect({ browserWSEndpoint: cdpUrl });
-
-        const pages = await browser.pages();
-        if (pages.length === 0) {
-            throw new Error('No pages found in browser');
-        }
-
-        // Find the right page by target ID
-        const targetId = readTargetId(CHROME_SESSION_DIR);
-        let page = null;
-        if (targetId) {
-            page = pages.find(p => {
-                const target = p.target();
-                return target && target._targetId === targetId;
-            });
-        }
-        if (!page) {
-            page = pages[pages.length - 1];
-        }
-
-        // console.error(`Modalcloser listening on ${url}`);
-
-        // Set up dialog handler (for JS alert/confirm/prompt/beforeunload)
-        page.on('dialog', async (dialog) => {
-            const type = dialog.type();
-            const message = dialog.message().substring(0, 100);
-            console.error(`Auto-closing dialog: ${type} - "${message}"`);
-
-            // Small delay before accepting (some pages expect a brief pause)
-            await sleep(dialogTimeout);
-            try {
-                await dialog.accept();
-                dialogsClosed++;
-            } catch (e) {
-                // Dialog may have been dismissed by page
-            }
-        });
-
-        // Poll for CSS modals
-        while (running) {
-            try {
-                const closed = await closeModals(page);
-                if (closed > 0) {
-                    console.error(`Closed ${closed} CSS modals`);
-                    cssModalsClosed += closed;
-                }
-            } catch (e) {
-                // Page may have navigated or been closed
-                if (!running) break;
-            }
-            await sleep(pollInterval);
-        }
-
-    } catch (e) {
-        if (browser) browser.disconnect();
-        console.error(`ERROR: ${e.name}: ${e.message}`);
-        process.exit(1);
-    }
-}
-
-main().catch(e => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
diff --git a/archivebox/plugins/modalcloser/templates/icon.html b/archivebox/plugins/modalcloser/templates/icon.html
deleted file mode 100644
index e58b588b..00000000
--- a/archivebox/plugins/modalcloser/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--modalcloser" title="Modal Closer"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><rect x="4" y="4" width="16" height="16" rx="3"/><path d="M9 9l6 6"/><path d="M15 9l-6 6"/></svg></span>
diff --git a/archivebox/plugins/modalcloser/tests/test_modalcloser.py b/archivebox/plugins/modalcloser/tests/test_modalcloser.py
deleted file mode 100644
index 53c62479..00000000
--- a/archivebox/plugins/modalcloser/tests/test_modalcloser.py
+++ /dev/null
@@ -1,454 +0,0 @@
-"""
-Integration tests for modalcloser plugin
-
-Tests verify:
-1. Hook script exists
-2. Dependencies installed via chrome validation hooks
-3. Verify deps with abx-pkg
-4. MODALCLOSER_ENABLED=False skips without JSONL
-5. Fails gracefully when no chrome session exists
-6. Background script runs and handles SIGTERM correctly
-7. Config options work (timeout, poll interval)
-8. Live test: hides cookie consent on filmin.es
-"""
-
-import json
-import os
-import signal
-import subprocess
-import time
-import tempfile
-from pathlib import Path
-
-import pytest
-
-# Import shared Chrome test helpers
-from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-    get_test_env,
-    chrome_session,
-)
-
-
-PLUGIN_DIR = Path(__file__).parent.parent
-MODALCLOSER_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_modalcloser.*'), None)
-TEST_URL = 'https://www.singsing.movie/'
-COOKIE_CONSENT_TEST_URL = 'https://www.filmin.es/'
-
-
-def test_hook_script_exists():
-    """Verify on_Snapshot hook exists."""
-    assert MODALCLOSER_HOOK is not None, "Modalcloser hook not found"
-    assert MODALCLOSER_HOOK.exists(), f"Hook not found: {MODALCLOSER_HOOK}"
-
-
-def test_verify_deps_with_abx_pkg():
-    """Verify dependencies are available via abx-pkg after hook installation."""
-    from abx_pkg import Binary, EnvProvider
-
-    EnvProvider.model_rebuild()
-
-    # Verify node is available
-    node_binary = Binary(name='node', binproviders=[EnvProvider()])
-    node_loaded = node_binary.load()
-    assert node_loaded and node_loaded.abspath, "Node.js required for modalcloser plugin"
-
-
-def test_config_modalcloser_disabled_skips():
-    """Test that MODALCLOSER_ENABLED=False exits without emitting JSONL."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-        env = get_test_env()
-        env['MODALCLOSER_ENABLED'] = 'False'
-
-        result = subprocess.run(
-            ['node', str(MODALCLOSER_HOOK), f'--url={TEST_URL}', '--snapshot-id=test-disabled'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=30
-        )
-
-        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
-        assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
-
-        # Should NOT emit any JSONL
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, got: {jsonl_lines}"
-
-
-def test_fails_gracefully_without_chrome_session():
-    """Test that hook fails gracefully when no chrome session exists."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-        modalcloser_dir = tmpdir / 'snapshot' / 'modalcloser'
-        modalcloser_dir.mkdir(parents=True, exist_ok=True)
-
-        result = subprocess.run(
-            ['node', str(MODALCLOSER_HOOK), f'--url={TEST_URL}', '--snapshot-id=test-no-chrome'],
-            cwd=modalcloser_dir,
-            capture_output=True,
-            text=True,
-            env=get_test_env(),
-            timeout=30
-        )
-
-        # Should fail (exit 1) when no chrome session
-        assert result.returncode != 0, "Should fail when no chrome session exists"
-        # Error could be about chrome/CDP not found, or puppeteer module missing
-        err_lower = result.stderr.lower()
-        assert any(x in err_lower for x in ['chrome', 'cdp', 'puppeteer', 'module']), \
-            f"Should mention chrome/CDP/puppeteer in error: {result.stderr}"
-
-
-def test_background_script_handles_sigterm():
-    """Test that background script runs and handles SIGTERM correctly."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        modalcloser_process = None
-        try:
-            with chrome_session(
-                Path(tmpdir),
-                crawl_id='test-modalcloser',
-                snapshot_id='snap-modalcloser',
-                test_url=TEST_URL,
-            ) as (chrome_launch_process, chrome_pid, snapshot_chrome_dir, env):
-                # Create modalcloser output directory (sibling to chrome)
-                modalcloser_dir = snapshot_chrome_dir.parent / 'modalcloser'
-                modalcloser_dir.mkdir()
-
-                # Run modalcloser as background process (use env from setup_chrome_session)
-                env['MODALCLOSER_POLL_INTERVAL'] = '200'  # Faster polling for test
-
-                modalcloser_process = subprocess.Popen(
-                    ['node', str(MODALCLOSER_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-modalcloser'],
-                    cwd=str(modalcloser_dir),
-                    stdout=subprocess.PIPE,
-                    stderr=subprocess.PIPE,
-                    text=True,
-                    env=env
-                )
-
-                # Let it run for a bit
-                time.sleep(2)
-
-                # Verify it's still running (background script)
-                assert modalcloser_process.poll() is None, "Modalcloser should still be running as background process"
-
-                # Send SIGTERM
-                modalcloser_process.send_signal(signal.SIGTERM)
-                stdout, stderr = modalcloser_process.communicate(timeout=5)
-
-                assert modalcloser_process.returncode == 0, f"Should exit 0 on SIGTERM: {stderr}"
-
-                # Parse JSONL output
-                result_json = None
-                for line in stdout.strip().split('\n'):
-                    line = line.strip()
-                    if line.startswith('{'):
-                        try:
-                            record = json.loads(line)
-                            if record.get('type') == 'ArchiveResult':
-                                result_json = record
-                                break
-                        except json.JSONDecodeError:
-                            pass
-
-                assert result_json is not None, f"Should have ArchiveResult JSONL output. Stdout: {stdout}"
-                assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-                # Verify output_str format
-                output_str = result_json.get('output_str', '')
-                assert 'modal' in output_str.lower() or 'dialog' in output_str.lower(), \
-                    f"output_str should mention modals/dialogs: {output_str}"
-
-                # Verify no files created in output directory
-                output_files = list(modalcloser_dir.iterdir())
-                assert len(output_files) == 0, f"Should not create any files, but found: {output_files}"
-
-        finally:
-            if modalcloser_process and modalcloser_process.poll() is None:
-                modalcloser_process.kill()
-
-
-def test_dialog_handler_logs_dialogs():
-    """Test that dialog handler is set up correctly."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        modalcloser_process = None
-        try:
-            with chrome_session(
-                    Path(tmpdir),
-                    crawl_id='test-dialog',
-                    snapshot_id='snap-dialog',
-                    test_url=TEST_URL,
-            ) as (chrome_launch_process, chrome_pid, snapshot_chrome_dir, env):
-
-                modalcloser_dir = snapshot_chrome_dir.parent / 'modalcloser'
-                modalcloser_dir.mkdir()
-
-                # Use env from setup_chrome_session
-                env['MODALCLOSER_TIMEOUT'] = '100'  # Fast timeout for test
-                env['MODALCLOSER_POLL_INTERVAL'] = '200'
-
-                modalcloser_process = subprocess.Popen(
-                    ['node', str(MODALCLOSER_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-dialog'],
-                    cwd=str(modalcloser_dir),
-                    stdout=subprocess.PIPE,
-                    stderr=subprocess.PIPE,
-                    text=True,
-                    env=env
-                )
-
-                # Let it run briefly
-                time.sleep(1.5)
-
-                # Verify it's running
-                assert modalcloser_process.poll() is None, "Should be running"
-
-                # Check stderr for "listening" message
-                # Note: Can't read stderr while process is running without blocking,
-                # so we just verify it exits cleanly
-                modalcloser_process.send_signal(signal.SIGTERM)
-                stdout, stderr = modalcloser_process.communicate(timeout=5)
-
-                assert 'listening' in stderr.lower() or 'modalcloser' in stderr.lower(), \
-                    f"Should log startup message: {stderr}"
-                assert modalcloser_process.returncode == 0, f"Should exit cleanly: {stderr}"
-
-        finally:
-            if modalcloser_process and modalcloser_process.poll() is None:
-                modalcloser_process.kill()
-
-
-def test_config_poll_interval():
-    """Test that MODALCLOSER_POLL_INTERVAL config is respected."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        chrome_launch_process = None
-        chrome_pid = None
-        modalcloser_process = None
-        try:
-            with chrome_session(
-                    Path(tmpdir),
-                    crawl_id='test-poll',
-                    snapshot_id='snap-poll',
-                    test_url=TEST_URL,
-            ) as (chrome_launch_process, chrome_pid, snapshot_chrome_dir, env):
-
-                modalcloser_dir = snapshot_chrome_dir.parent / 'modalcloser'
-                modalcloser_dir.mkdir()
-
-                # Set very short poll interval (use env from setup_chrome_session)
-                env['MODALCLOSER_POLL_INTERVAL'] = '100'  # 100ms
-
-                modalcloser_process = subprocess.Popen(
-                    ['node', str(MODALCLOSER_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-poll'],
-                    cwd=str(modalcloser_dir),
-                    stdout=subprocess.PIPE,
-                    stderr=subprocess.PIPE,
-                    text=True,
-                    env=env
-                )
-
-                # Run for short time
-                time.sleep(1)
-
-                # Should still be running
-                assert modalcloser_process.poll() is None, "Should still be running"
-
-                # Clean exit
-                modalcloser_process.send_signal(signal.SIGTERM)
-                stdout, stderr = modalcloser_process.communicate(timeout=5)
-
-                assert modalcloser_process.returncode == 0, f"Should exit 0: {stderr}"
-
-                # Verify JSONL output exists
-                result_json = None
-                for line in stdout.strip().split('\n'):
-                    if line.strip().startswith('{'):
-                        try:
-                            record = json.loads(line)
-                            if record.get('type') == 'ArchiveResult':
-                                result_json = record
-                                break
-                        except json.JSONDecodeError:
-                            pass
-
-                assert result_json is not None, "Should have JSONL output"
-                assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-        finally:
-            if modalcloser_process and modalcloser_process.poll() is None:
-                modalcloser_process.kill()
-
-
-def test_hides_cookie_consent_on_filmin():
-    """Live test: verify modalcloser hides cookie consent popup on filmin.es."""
-    # Create a test script that uses puppeteer directly
-    test_script = '''
-const puppeteer = require('puppeteer-core');
-
-async function closeModals(page) {
-    return page.evaluate(() => {
-        let closed = 0;
-
-        // Bootstrap 4/5
-        if (typeof bootstrap !== 'undefined' && bootstrap.Modal) {
-            document.querySelectorAll('.modal.show').forEach(el => {
-                try {
-                    const modal = bootstrap.Modal.getInstance(el);
-                    if (modal) { modal.hide(); closed++; }
-                } catch (e) {}
-            });
-        }
-
-        // Bootstrap 3 / jQuery
-        if (typeof jQuery !== 'undefined' && jQuery.fn && jQuery.fn.modal) {
-            try {
-                const $modals = jQuery('.modal.in, .modal.show');
-                if ($modals.length > 0) {
-                    $modals.modal('hide');
-                    closed += $modals.length;
-                }
-            } catch (e) {}
-        }
-
-        // Generic selectors including cookie consent
-        const genericSelectors = [
-            // CookieYes (cky) specific selectors
-            '.cky-consent-container',
-            '.cky-popup-center',
-            '.cky-overlay',
-            '.cky-modal',
-            '#ckyPreferenceCenter',
-            // Generic cookie consent
-            '#cookie-consent', '.cookie-banner', '.cookie-notice',
-            '#cookieConsent', '.cookie-consent', '.cookies-banner',
-            '[class*="cookie"][class*="banner"]',
-            '[class*="cookie"][class*="notice"]',
-            '[class*="consent"]',
-            '[class*="gdpr"]',
-            '.modal-overlay', '.modal-backdrop',
-            '.popup-overlay', '.newsletter-popup',
-        ];
-
-        genericSelectors.forEach(selector => {
-            try {
-                document.querySelectorAll(selector).forEach(el => {
-                    const style = window.getComputedStyle(el);
-                    if (style.display === 'none' || style.visibility === 'hidden') return;
-                    el.style.display = 'none';
-                    el.style.visibility = 'hidden';
-                    el.style.opacity = '0';
-                    el.style.pointerEvents = 'none';
-                    closed++;
-                });
-            } catch (e) {}
-        });
-
-        document.body.style.overflow = '';
-        document.body.classList.remove('modal-open', 'overflow-hidden', 'no-scroll');
-
-        return closed;
-    });
-}
-
-async function main() {
-    const browser = await puppeteer.launch({
-        headless: 'new',
-        executablePath: process.env.CHROME_BINARY || '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
-        args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-blink-features=AutomationControlled']
-    });
-
-    const page = await browser.newPage();
-    // Set real user agent to bypass headless detection
-    await page.setUserAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
-    await page.setViewport({ width: 1440, height: 900 });
-
-    console.error('Navigating to filmin.es...');
-    await page.goto('https://www.filmin.es/', { waitUntil: 'networkidle2', timeout: 30000 });
-
-    // Wait for cookie consent to appear
-    await new Promise(r => setTimeout(r, 3000));
-
-    // Check BEFORE
-    const before = await page.evaluate(() => {
-        const el = document.querySelector('.cky-consent-container');
-        if (!el) return { found: false };
-        const style = window.getComputedStyle(el);
-        return { found: true, display: style.display, visibility: style.visibility };
-    });
-
-    console.error('Before:', JSON.stringify(before));
-
-    // Run modal closer
-    const closed = await closeModals(page);
-    console.error('Closed:', closed, 'modals');
-
-    // Check AFTER
-    const after = await page.evaluate(() => {
-        const el = document.querySelector('.cky-consent-container');
-        if (!el) return { found: false };
-        const style = window.getComputedStyle(el);
-        return { found: true, display: style.display, visibility: style.visibility };
-    });
-
-    console.error('After:', JSON.stringify(after));
-
-    await browser.close();
-
-    // Output result as JSON for Python to parse
-    const result = {
-        before_found: before.found,
-        before_visible: before.found && before.display !== 'none' && before.visibility !== 'hidden',
-        after_hidden: !after.found || after.display === 'none' || after.visibility === 'hidden',
-        modals_closed: closed
-    };
-    console.log(JSON.stringify(result));
-}
-
-main().catch(e => {
-    console.error('Error:', e.message);
-    process.exit(1);
-});
-'''
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-        script_path = tmpdir / 'test_cookie_consent.js'
-        script_path.write_text(test_script)
-
-        env = get_test_env()
-
-        result = subprocess.run(
-            ['node', str(script_path)],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=60
-        )
-
-        print(f"stderr: {result.stderr}")
-        print(f"stdout: {result.stdout}")
-
-        assert result.returncode == 0, f"Test script failed: {result.stderr}"
-
-        # Parse the JSON output
-        output_lines = [l for l in result.stdout.strip().split('\n') if l.startswith('{')]
-        assert len(output_lines) > 0, f"No JSON output from test script. stdout: {result.stdout}"
-
-        test_result = json.loads(output_lines[-1])
-
-        # The cookie consent should have been found initially (or page changed)
-        # After running closeModals, it should be hidden
-        if test_result['before_found']:
-            assert test_result['after_hidden'], \
-                f"Cookie consent should be hidden after modalcloser. Result: {test_result}"
-            assert test_result['modals_closed'] > 0, \
-                f"Should have closed at least one modal. Result: {test_result}"
-        else:
-            # Page may have changed, just verify no errors
-            print("Cookie consent element not found (page may have changed)")
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/npm/on_Binary__10_npm_install.py b/archivebox/plugins/npm/on_Binary__10_npm_install.py
deleted file mode 100644
index f0b43893..00000000
--- a/archivebox/plugins/npm/on_Binary__10_npm_install.py
+++ /dev/null
@@ -1,131 +0,0 @@
-#!/usr/bin/env python3
-"""
-Install a binary using npm package manager.
-
-Usage: on_Binary__install_using_npm_provider.py --binary-id=<uuid> --machine-id=<uuid> --name=<name> [--custom-cmd=<cmd>]
-Output: Binary JSONL record to stdout after installation
-
-Environment variables:
-    MACHINE_ID: Machine UUID (set by orchestrator)
-    LIB_DIR: Library directory including machine type (e.g., data/lib/arm64-darwin) (required)
-"""
-
-import json
-import os
-import sys
-from pathlib import Path
-
-import rich_click as click
-from abx_pkg import Binary, NpmProvider, BinProviderOverrides
-
-# Fix pydantic forward reference issue
-NpmProvider.model_rebuild()
-
-
-@click.command()
-@click.option('--machine-id', required=True, help="Machine UUID")
-@click.option('--binary-id', required=True, help="Dependency UUID")
-@click.option('--name', required=True, help="Binary name to install")
-@click.option('--binproviders', default='*', help="Allowed providers (comma-separated)")
-@click.option('--custom-cmd', default=None, help="Custom install command")
-@click.option('--overrides', default=None, help="JSON-encoded overrides dict")
-def main(binary_id: str, machine_id: str, name: str, binproviders: str, custom_cmd: str | None, overrides: str | None):
-    """Install binary using npm."""
-
-    if binproviders != '*' and 'npm' not in binproviders.split(','):
-        click.echo(f"npm provider not allowed for {name}", err=True)
-        sys.exit(0)
-
-    # Get LIB_DIR from environment (required)
-    # Note: LIB_DIR already includes machine type (e.g., data/lib/arm64-darwin)
-    lib_dir = os.environ.get('LIB_DIR')
-
-    if not lib_dir:
-        click.echo("ERROR: LIB_DIR environment variable not set", err=True)
-        sys.exit(1)
-
-    # Structure: lib/arm64-darwin/npm (npm will create node_modules inside this)
-    npm_prefix = Path(lib_dir) / 'npm'
-    npm_prefix.mkdir(parents=True, exist_ok=True)
-
-    # Use abx-pkg NpmProvider to install binary with custom prefix
-    provider = NpmProvider(npm_prefix=npm_prefix)
-    if not provider.INSTALLER_BIN:
-        click.echo("npm not available on this system", err=True)
-        sys.exit(1)
-
-    click.echo(f"Installing {name} via npm to {npm_prefix}...", err=True)
-
-    try:
-        # Parse overrides if provided
-        overrides_dict = None
-        if overrides:
-            try:
-                overrides_dict = json.loads(overrides)
-                click.echo(f"Using custom install overrides: {overrides_dict}", err=True)
-            except json.JSONDecodeError:
-                click.echo(f"Warning: Failed to parse overrides JSON: {overrides}", err=True)
-
-        binary = Binary(name=name, binproviders=[provider], overrides=overrides_dict or {}).install()
-    except Exception as e:
-        click.echo(f"npm install failed: {e}", err=True)
-        sys.exit(1)
-
-    if not binary.abspath:
-        click.echo(f"{name} not found after npm install", err=True)
-        sys.exit(1)
-
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    # Output Binary JSONL record to stdout
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'abspath': str(binary.abspath),
-        'version': str(binary.version) if binary.version else '',
-        'sha256': binary.sha256 or '',
-        'binprovider': 'npm',
-        'machine_id': machine_id,
-        'binary_id': binary_id,
-    }
-    print(json.dumps(record))
-
-    # Emit PATH update for npm bin dirs (node_modules/.bin preferred)
-    npm_bin_dirs = [
-        str(npm_prefix / 'node_modules' / '.bin'),
-        str(npm_prefix / 'bin'),
-    ]
-    current_path = os.environ.get('PATH', '')
-    path_dirs = current_path.split(':') if current_path else []
-    new_path = current_path
-
-    for npm_bin_dir in npm_bin_dirs:
-        if npm_bin_dir and npm_bin_dir not in path_dirs:
-            new_path = f"{npm_bin_dir}:{new_path}" if new_path else npm_bin_dir
-            path_dirs.insert(0, npm_bin_dir)
-
-    print(json.dumps({
-        'type': 'Machine',
-        'config': {
-            'PATH': new_path,
-        },
-    }))
-
-    # Also emit NODE_MODULES_DIR for JS module resolution
-    node_modules_dir = str(npm_prefix / 'node_modules')
-    print(json.dumps({
-        'type': 'Machine',
-        'config': {
-            'NODE_MODULES_DIR': node_modules_dir,
-        },
-    }))
-
-    # Log human-readable info to stderr
-    click.echo(f"Installed {name} at {binary.abspath}", err=True)
-    click.echo(f"  version: {binary.version}", err=True)
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/npm/on_Crawl__00_npm_install.py b/archivebox/plugins/npm/on_Crawl__00_npm_install.py
deleted file mode 100644
index 5660dd01..00000000
--- a/archivebox/plugins/npm/on_Crawl__00_npm_install.py
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/usr/bin/env python3
-"""
-Emit node/npm Binary dependencies for the crawl.
-
-This hook runs early in the Crawl lifecycle so node/npm are installed
-before any npm-based extractors (e.g., puppeteer) run.
-"""
-
-import json
-import os
-import sys
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-
-def output_binary(name: str, binproviders: str, overrides: dict | None = None) -> None:
-    machine_id = os.environ.get('MACHINE_ID', '')
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,
-        'machine_id': machine_id,
-    }
-    if overrides:
-        record['overrides'] = overrides
-    print(json.dumps(record))
-
-
-def main() -> None:
-    output_binary(
-        name='node',
-        binproviders='apt,brew,env',
-        overrides={'apt': {'packages': ['nodejs']}},
-    )
-
-    output_binary(
-        name='npm',
-        binproviders='apt,brew,env',
-        overrides={
-            'apt': {'packages': ['nodejs', 'npm']},
-            'brew': {'packages': ['node']},
-        },
-    )
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/npm/templates/icon.html b/archivebox/plugins/npm/templates/icon.html
deleted file mode 100644
index e69de29b..00000000
diff --git a/archivebox/plugins/npm/tests/test_npm_provider.py b/archivebox/plugins/npm/tests/test_npm_provider.py
deleted file mode 100644
index 9f00d9d7..00000000
--- a/archivebox/plugins/npm/tests/test_npm_provider.py
+++ /dev/null
@@ -1,144 +0,0 @@
-"""
-Tests for the npm binary provider plugin.
-
-Tests cover:
-1. Hook script execution
-2. npm package installation
-3. PATH and NODE_MODULES_DIR updates
-4. JSONL output format
-"""
-
-import json
-import os
-import shutil
-import subprocess
-import sys
-import tempfile
-from pathlib import Path
-
-import pytest
-from django.test import TestCase
-
-
-# Get the path to the npm provider hook
-PLUGIN_DIR = Path(__file__).parent.parent
-INSTALL_HOOK = next(PLUGIN_DIR.glob('on_Binary__*_npm_install.py'), None)
-
-
-def npm_available() -> bool:
-    """Check if npm is installed."""
-    return shutil.which('npm') is not None
-
-
-class TestNpmProviderHook(TestCase):
-    """Test the npm binary provider installation hook."""
-
-    def setUp(self):
-        """Set up test environment."""
-        self.temp_dir = tempfile.mkdtemp()
-        self.lib_dir = Path(self.temp_dir) / 'lib' / 'x86_64-linux'
-        self.lib_dir.mkdir(parents=True)
-
-    def tearDown(self):
-        """Clean up."""
-        shutil.rmtree(self.temp_dir, ignore_errors=True)
-
-    def test_hook_script_exists(self):
-        """Hook script should exist."""
-        self.assertTrue(INSTALL_HOOK and INSTALL_HOOK.exists(), f"Hook not found: {INSTALL_HOOK}")
-
-    def test_hook_requires_lib_dir(self):
-        """Hook should fail when LIB_DIR is not set."""
-        env = os.environ.copy()
-        env.pop('LIB_DIR', None)  # Remove LIB_DIR
-
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=some-package',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-            ],
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=30
-        )
-
-        self.assertIn('LIB_DIR environment variable not set', result.stderr)
-        self.assertEqual(result.returncode, 1)
-
-    def test_hook_skips_when_npm_not_allowed(self):
-        """Hook should skip when npm not in allowed binproviders."""
-        env = os.environ.copy()
-        env['LIB_DIR'] = str(self.lib_dir)
-
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=some-package',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-                '--binproviders=pip,apt',  # npm not allowed
-            ],
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=30
-        )
-
-        # Should exit cleanly (code 0) when npm not allowed
-        self.assertIn('npm provider not allowed', result.stderr)
-        self.assertEqual(result.returncode, 0)
-
-    def test_hook_creates_npm_prefix(self):
-        """Hook should create npm prefix directory."""
-        assert npm_available(), "npm not installed"
-        env = os.environ.copy()
-        env['LIB_DIR'] = str(self.lib_dir)
-
-        # Even if installation fails, the npm prefix should be created
-        subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=nonexistent-xyz123',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-            ],
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=60
-        )
-
-        npm_prefix = self.lib_dir / 'npm'
-        self.assertTrue(npm_prefix.exists())
-
-    def test_hook_handles_overrides(self):
-        """Hook should accept overrides JSON."""
-        env = os.environ.copy()
-        env['LIB_DIR'] = str(self.lib_dir)
-
-        overrides = json.dumps({'npm': {'packages': ['custom-pkg']}})
-
-        # Just verify it doesn't crash with overrides
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=test-pkg',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-                f'--overrides={overrides}',
-            ],
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=60
-        )
-
-        # May fail to install, but should not crash parsing overrides
-        self.assertNotIn('Failed to parse overrides JSON', result.stderr)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/papersdl/config.json b/archivebox/plugins/papersdl/config.json
deleted file mode 100644
index 2c6eb342..00000000
--- a/archivebox/plugins/papersdl/config.json
+++ /dev/null
@@ -1,39 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "PAPERSDL_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_PAPERSDL", "USE_PAPERSDL"],
-      "description": "Enable paper downloading with papers-dl"
-    },
-    "PAPERSDL_BINARY": {
-      "type": "string",
-      "default": "papers-dl",
-      "description": "Path to papers-dl binary"
-    },
-    "PAPERSDL_TIMEOUT": {
-      "type": "integer",
-      "default": 300,
-      "minimum": 30,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for paper downloads in seconds"
-    },
-    "PAPERSDL_ARGS": {
-      "type": "array",
-      "items": {"type": "string"},
-      "default": ["fetch"],
-      "x-aliases": ["PAPERSDL_DEFAULT_ARGS"],
-      "description": "Default papers-dl arguments"
-    },
-    "PAPERSDL_ARGS_EXTRA": {
-      "type": "array",
-      "items": {"type": "string"},
-      "default": [],
-      "x-aliases": ["PAPERSDL_EXTRA_ARGS"],
-      "description": "Extra arguments to append to papers-dl command"
-    }
-  }
-}
diff --git a/archivebox/plugins/papersdl/on_Crawl__30_papersdl_install.py b/archivebox/plugins/papersdl/on_Crawl__30_papersdl_install.py
deleted file mode 100755
index 050aa23b..00000000
--- a/archivebox/plugins/papersdl/on_Crawl__30_papersdl_install.py
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/usr/bin/env python3
-"""
-Emit papers-dl Binary dependency for the crawl.
-"""
-
-import json
-import os
-import sys
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def output_binary(name: str, binproviders: str):
-    """Output Binary JSONL record for a dependency."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,
-        'machine_id': machine_id,
-    }
-    print(json.dumps(record))
-
-
-def main():
-    papersdl_enabled = get_env_bool('PAPERSDL_ENABLED', True)
-
-    if not papersdl_enabled:
-        sys.exit(0)
-
-    output_binary(name='papers-dl', binproviders='pip,env')
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/papersdl/on_Snapshot__66_papersdl.bg.py b/archivebox/plugins/papersdl/on_Snapshot__66_papersdl.bg.py
deleted file mode 100755
index 60015050..00000000
--- a/archivebox/plugins/papersdl/on_Snapshot__66_papersdl.bg.py
+++ /dev/null
@@ -1,220 +0,0 @@
-#!/usr/bin/env python3
-"""
-Download scientific papers from a URL using papers-dl.
-
-Usage: on_Snapshot__papersdl.py --url=<url> --snapshot-id=<uuid>
-Output: Downloads paper PDFs to $PWD/
-
-Environment variables:
-    PAPERSDL_BINARY: Path to papers-dl binary
-    PAPERSDL_TIMEOUT: Timeout in seconds (default: 300 for paper downloads)
-    PAPERSDL_ARGS: Default papers-dl arguments (JSON array, default: ["fetch"])
-    PAPERSDL_ARGS_EXTRA: Extra arguments to append (JSON array)
-
-    # papers-dl feature toggles
-    SAVE_PAPERSDL: Enable papers-dl paper extraction (default: True)
-
-    # Fallback to ARCHIVING_CONFIG values if PAPERSDL_* not set:
-    TIMEOUT: Fallback timeout
-"""
-
-import json
-import os
-import re
-import subprocess
-import sys
-import threading
-from pathlib import Path
-
-import rich_click as click
-
-
-# Extractor metadata
-PLUGIN_NAME = 'papersdl'
-BIN_NAME = 'papers-dl'
-BIN_PROVIDERS = 'pip,env'
-OUTPUT_DIR = '.'
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def get_env_int(name: str, default: int = 0) -> int:
-    try:
-        return int(get_env(name, str(default)))
-    except ValueError:
-        return default
-
-
-def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
-    """Parse a JSON array from environment variable."""
-    val = get_env(name, '')
-    if not val:
-        return default if default is not None else []
-    try:
-        result = json.loads(val)
-        if isinstance(result, list):
-            return [str(item) for item in result]
-        return default if default is not None else []
-    except json.JSONDecodeError:
-        return default if default is not None else []
-
-
-def extract_doi_from_url(url: str) -> str | None:
-    """Extract DOI from common paper URLs."""
-    # Match DOI pattern in URL
-    doi_pattern = r'10\.\d{4,}/[^\s]+'
-    match = re.search(doi_pattern, url)
-    if match:
-        return match.group(0)
-    return None
-
-
-def save_paper(url: str, binary: str) -> tuple[bool, str | None, str]:
-    """
-    Download paper using papers-dl.
-
-    Returns: (success, output_path, error_message)
-    """
-    # Get config from env
-    timeout = get_env_int('TIMEOUT', 300)
-    papersdl_args = get_env_array('PAPERSDL_ARGS', [])
-    papersdl_args_extra = get_env_array('PAPERSDL_ARGS_EXTRA', [])
-
-    # Output directory is current directory (hook already runs in output dir)
-    output_dir = Path(OUTPUT_DIR)
-
-    # Try to extract DOI from URL
-    doi = extract_doi_from_url(url)
-    if not doi:
-        # If no DOI found, papers-dl might handle the URL directly
-        identifier = url
-    else:
-        identifier = doi
-
-    # Build command - papers-dl <args> <identifier> -o <output_dir>
-    cmd = [binary, *papersdl_args, identifier, '-o', str(output_dir)]
-
-    if papersdl_args_extra:
-        cmd.extend(papersdl_args_extra)
-
-    try:
-        print(f'[papersdl] Starting download (timeout={timeout}s)', file=sys.stderr)
-        output_lines: list[str] = []
-        process = subprocess.Popen(
-            cmd,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            text=True,
-            bufsize=1,
-        )
-
-        def _read_output() -> None:
-            if not process.stdout:
-                return
-            for line in process.stdout:
-                output_lines.append(line)
-                sys.stderr.write(line)
-
-        reader = threading.Thread(target=_read_output, daemon=True)
-        reader.start()
-
-        try:
-            process.wait(timeout=timeout)
-        except subprocess.TimeoutExpired:
-            process.kill()
-            reader.join(timeout=1)
-            return False, None, f'Timed out after {timeout} seconds'
-
-        reader.join(timeout=1)
-        combined_output = ''.join(output_lines)
-
-        # Check if any PDF files were downloaded
-        pdf_files = list(output_dir.glob('*.pdf'))
-
-        if pdf_files:
-            # Return first PDF file
-            return True, str(pdf_files[0]), ''
-        else:
-            stderr = combined_output
-            stdout = combined_output
-
-            # These are NOT errors - page simply has no downloadable paper
-            stderr_lower = stderr.lower()
-            stdout_lower = stdout.lower()
-            if 'not found' in stderr_lower or 'not found' in stdout_lower:
-                return True, None, ''  # Paper not available - success, no output
-            if 'no results' in stderr_lower or 'no results' in stdout_lower:
-                return True, None, ''  # No paper found - success, no output
-            if process.returncode == 0:
-                return True, None, ''  # papers-dl exited cleanly, just no paper - success
-
-            # These ARE errors - something went wrong
-            if '404' in stderr or '404' in stdout:
-                return False, None, '404 Not Found'
-            if '403' in stderr or '403' in stdout:
-                return False, None, '403 Forbidden'
-
-            return False, None, f'papers-dl error: {stderr[:200] or stdout[:200]}'
-
-    except subprocess.TimeoutExpired:
-        return False, None, f'Timed out after {timeout} seconds'
-    except Exception as e:
-        return False, None, f'{type(e).__name__}: {e}'
-
-
-@click.command()
-@click.option('--url', required=True, help='URL to download paper from')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
-def main(url: str, snapshot_id: str):
-    """Download scientific paper from a URL using papers-dl."""
-
-    output = None
-    status = 'failed'
-    error = ''
-
-    try:
-        # Check if papers-dl is enabled
-        if not get_env_bool('PAPERSDL_ENABLED', True):
-            print('Skipping papers-dl (PAPERSDL_ENABLED=False)', file=sys.stderr)
-            # Temporary failure (config disabled) - NO JSONL emission
-            sys.exit(0)
-
-        # Get binary from environment
-        binary = get_env('PAPERSDL_BINARY', 'papers-dl')
-
-        # Run extraction
-        success, output, error = save_paper(url, binary)
-
-        if success:
-            # Success - emit ArchiveResult
-            result = {
-                'type': 'ArchiveResult',
-                'status': 'succeeded',
-                'output_str': output or ''
-            }
-            print(json.dumps(result))
-            sys.exit(0)
-        else:
-            # Transient error - emit NO JSONL
-            print(f'ERROR: {error}', file=sys.stderr)
-            sys.exit(1)
-
-    except Exception as e:
-        # Transient error - emit NO JSONL
-        print(f'ERROR: {type(e).__name__}: {e}', file=sys.stderr)
-        sys.exit(1)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/papersdl/templates/card.html b/archivebox/plugins/papersdl/templates/card.html
deleted file mode 100644
index abe6f09a..00000000
--- a/archivebox/plugins/papersdl/templates/card.html
+++ /dev/null
@@ -1,7 +0,0 @@
-<!-- Paper thumbnail - shows PDF icon placeholder -->
-<div class="extractor-thumbnail papersdl-thumbnail" style="width: 100%; height: 100px; overflow: hidden; background: #1a1a1a; display: flex; align-items: center; justify-content: center;">
-    <div style="display: flex; flex-direction: column; align-items: center; color: #888; font-size: 12px;">
-        <span style="font-size: 32px;">📄</span>
-        <span>Paper</span>
-    </div>
-</div>
diff --git a/archivebox/plugins/papersdl/templates/full.html b/archivebox/plugins/papersdl/templates/full.html
deleted file mode 100644
index f2cee0c8..00000000
--- a/archivebox/plugins/papersdl/templates/full.html
+++ /dev/null
@@ -1,71 +0,0 @@
-<!-- Fullscreen paper view - shows PDF in full screen -->
-<!DOCTYPE html>
-<html>
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Scientific Paper</title>
-    <style>
-        body {
-            margin: 0;
-            padding: 0;
-            background: #1a1a1a;
-            color: #ddd;
-            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif;
-            display: flex;
-            flex-direction: column;
-            height: 100vh;
-        }
-        .header {
-            text-align: center;
-            padding: 15px;
-            background: #0d1117;
-            border-bottom: 1px solid #30363d;
-        }
-        .icon {
-            font-size: 32px;
-            margin-bottom: 5px;
-        }
-        h1 {
-            margin: 0;
-            font-size: 20px;
-            color: #f0f6fc;
-        }
-        .pdf-container {
-            flex: 1;
-            width: 100%;
-            overflow: hidden;
-        }
-        embed {
-            width: 100%;
-            height: 100%;
-        }
-        .download-link {
-            position: fixed;
-            bottom: 20px;
-            right: 20px;
-            background: #58a6ff;
-            color: #fff;
-            padding: 12px 24px;
-            border-radius: 6px;
-            text-decoration: none;
-            font-weight: 600;
-            box-shadow: 0 4px 6px rgba(0,0,0,0.3);
-            transition: background 0.2s;
-        }
-        .download-link:hover {
-            background: #1f6feb;
-        }
-    </style>
-</head>
-<body>
-    <div class="header">
-        <div class="icon">📄</div>
-        <h1>Scientific Paper</h1>
-    </div>
-    <div class="pdf-container">
-        <embed src="{{ output_path }}" type="application/pdf" />
-    </div>
-    <a href="{{ output_path }}" download class="download-link">Download PDF</a>
-</body>
-</html>
diff --git a/archivebox/plugins/papersdl/templates/icon.html b/archivebox/plugins/papersdl/templates/icon.html
deleted file mode 100644
index 94afb781..00000000
--- a/archivebox/plugins/papersdl/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--papersdl" title="Papers"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M14 3H6a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V9z"/><path d="M14 3v6h6"/><path d="M12 12v5"/><path d="M9.5 14.5L12 17l2.5-2.5"/></svg></span>
diff --git a/archivebox/plugins/papersdl/tests/test_papersdl.py b/archivebox/plugins/papersdl/tests/test_papersdl.py
deleted file mode 100644
index d26ef9cb..00000000
--- a/archivebox/plugins/papersdl/tests/test_papersdl.py
+++ /dev/null
@@ -1,190 +0,0 @@
-"""
-Integration tests for papersdl plugin
-
-Tests verify:
-1. Hook script exists
-2. Dependencies installed via validation hooks
-3. Verify deps with abx-pkg
-4. Paper extraction works on paper URLs
-5. JSONL output is correct
-6. Config options work
-7. Handles non-paper URLs gracefully
-"""
-
-import json
-import subprocess
-import sys
-import tempfile
-import uuid
-from pathlib import Path
-import pytest
-
-PLUGIN_DIR = Path(__file__).parent.parent
-PLUGINS_ROOT = PLUGIN_DIR.parent
-PAPERSDL_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_papersdl.*'), None)
-TEST_URL = 'https://example.com'
-
-# Module-level cache for binary path
-_papersdl_binary_path = None
-
-def get_papersdl_binary_path():
-    """Get the installed papers-dl binary path from cache or by running installation."""
-    global _papersdl_binary_path
-    if _papersdl_binary_path:
-        return _papersdl_binary_path
-
-    # Try to find papers-dl binary using abx-pkg
-    from abx_pkg import Binary, PipProvider, EnvProvider, BinProviderOverrides
-
-    try:
-        binary = Binary(
-            name='papers-dl',
-            binproviders=[PipProvider(), EnvProvider()]
-        ).load()
-
-        if binary and binary.abspath:
-            _papersdl_binary_path = str(binary.abspath)
-            return _papersdl_binary_path
-    except Exception:
-        pass
-
-    # If not found, try to install via pip
-    pip_hook = PLUGINS_ROOT / 'pip' / 'on_Binary__install_using_pip_provider.py'
-    if pip_hook.exists():
-        binary_id = str(uuid.uuid4())
-        machine_id = str(uuid.uuid4())
-
-        cmd = [
-            sys.executable, str(pip_hook),
-            '--binary-id', binary_id,
-            '--machine-id', machine_id,
-            '--name', 'papers-dl'
-        ]
-
-        install_result = subprocess.run(
-            cmd,
-            capture_output=True,
-            text=True,
-            timeout=300
-        )
-
-        # Parse Binary from pip installation
-        for install_line in install_result.stdout.strip().split('\n'):
-            if install_line.strip():
-                try:
-                    install_record = json.loads(install_line)
-                    if install_record.get('type') == 'Binary' and install_record.get('name') == 'papers-dl':
-                        _papersdl_binary_path = install_record.get('abspath')
-                        return _papersdl_binary_path
-                except json.JSONDecodeError:
-                    pass
-
-    return None
-
-def test_hook_script_exists():
-    """Verify on_Snapshot hook exists."""
-    assert PAPERSDL_HOOK.exists(), f"Hook not found: {PAPERSDL_HOOK}"
-
-
-def test_verify_deps_with_abx_pkg():
-    """Verify papers-dl is installed by calling the REAL installation hooks."""
-    binary_path = get_papersdl_binary_path()
-    assert binary_path, "papers-dl must be installed successfully via install hook and pip provider"
-    assert Path(binary_path).is_file(), f"Binary path must be a valid file: {binary_path}"
-
-
-def test_handles_non_paper_url():
-    """Test that papers-dl extractor handles non-paper URLs gracefully via hook."""
-    import os
-
-    binary_path = get_papersdl_binary_path()
-    assert binary_path, "Binary must be installed for this test"
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        env = os.environ.copy()
-        env['PAPERSDL_BINARY'] = binary_path
-
-        # Run papers-dl extraction hook on non-paper URL
-        result = subprocess.run(
-            [sys.executable, str(PAPERSDL_HOOK), '--url', 'https://example.com', '--snapshot-id', 'test789'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=60
-        )
-
-        # Should exit 0 even for non-paper URL
-        assert result.returncode == 0, f"Should handle non-paper URL gracefully: {result.stderr}"
-
-        # Parse clean JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-
-def test_config_save_papersdl_false_skips():
-    """Test that PAPERSDL_ENABLED=False exits without emitting JSONL."""
-    import os
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        env = os.environ.copy()
-        env['PAPERSDL_ENABLED'] = 'False'
-
-        result = subprocess.run(
-            [sys.executable, str(PAPERSDL_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=30
-        )
-
-        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
-
-        # Feature disabled - temporary failure, should NOT emit JSONL
-        assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
-
-        # Should NOT emit any JSONL
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
-
-
-def test_config_timeout():
-    """Test that PAPERSDL_TIMEOUT config is respected."""
-    import os
-
-    binary_path = get_papersdl_binary_path()
-    assert binary_path, "Binary must be installed for this test"
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        env = os.environ.copy()
-        env['PAPERSDL_BINARY'] = binary_path
-        env['PAPERSDL_TIMEOUT'] = '5'
-
-        result = subprocess.run(
-            [sys.executable, str(PAPERSDL_HOOK), '--url', 'https://example.com', '--snapshot-id', 'testtimeout'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=30
-        )
-
-        assert result.returncode == 0, "Should complete without hanging"
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/parse_dom_outlinks/config.json b/archivebox/plugins/parse_dom_outlinks/config.json
deleted file mode 100644
index b391981b..00000000
--- a/archivebox/plugins/parse_dom_outlinks/config.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "required_plugins": ["chrome"],
-  "properties": {
-    "PARSE_DOM_OUTLINKS_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_DOM_OUTLINKS", "USE_PARSE_DOM_OUTLINKS"],
-      "description": "Enable DOM outlinks parsing from archived pages"
-    },
-    "PARSE_DOM_OUTLINKS_TIMEOUT": {
-      "type": "integer",
-      "default": 30,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for DOM outlinks parsing in seconds"
-    }
-  }
-}
diff --git a/archivebox/plugins/parse_dom_outlinks/on_Snapshot__75_parse_dom_outlinks.js b/archivebox/plugins/parse_dom_outlinks/on_Snapshot__75_parse_dom_outlinks.js
deleted file mode 100755
index 3076fe61..00000000
--- a/archivebox/plugins/parse_dom_outlinks/on_Snapshot__75_parse_dom_outlinks.js
+++ /dev/null
@@ -1,292 +0,0 @@
-#!/usr/bin/env node
-/**
- * Extract and categorize outgoing links from a page's DOM.
- *
- * Categorizes links by type:
- * - hrefs: All <a> links
- * - images: <img src>
- * - css_stylesheets: <link rel=stylesheet>
- * - css_images: CSS background-image: url()
- * - js_scripts: <script src>
- * - iframes: <iframe src>
- * - links: <link> tags with rel/href
- *
- * Usage: on_Snapshot__75_parse_dom_outlinks.js --url=<url> --snapshot-id=<uuid>
- * Output: Writes parse_dom_outlinks/outlinks.json and parse_dom_outlinks/urls.jsonl
- *
- * Environment variables:
- *     PARSE_DOM_OUTLINKS_ENABLED: Enable DOM outlinks extraction (default: true)
- */
-
-const fs = require('fs');
-const path = require('path');
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-const puppeteer = require('puppeteer-core');
-
-// Extractor metadata
-const PLUGIN_NAME = 'parse_dom_outlinks';
-const OUTPUT_DIR = '.';
-const OUTPUT_FILE = 'outlinks.json';
-const URLS_FILE = 'urls.jsonl';  // For crawl system
-const CHROME_SESSION_DIR = '../chrome';
-
-// Parse command line arguments
-function parseArgs() {
-    const args = {};
-    process.argv.slice(2).forEach(arg => {
-        if (arg.startsWith('--')) {
-            const [key, ...valueParts] = arg.slice(2).split('=');
-            args[key.replace(/-/g, '_')] = valueParts.join('=') || true;
-        }
-    });
-    return args;
-}
-
-// Get environment variable with default
-function getEnv(name, defaultValue = '') {
-    return (process.env[name] || defaultValue).trim();
-}
-
-function getEnvBool(name, defaultValue = false) {
-    const val = getEnv(name, '').toLowerCase();
-    if (['true', '1', 'yes', 'on'].includes(val)) return true;
-    if (['false', '0', 'no', 'off'].includes(val)) return false;
-    return defaultValue;
-}
-
-// Wait for chrome tab to be fully loaded
-async function waitForChromeTabLoaded(timeoutMs = 60000) {
-    const navigationFile = path.join(CHROME_SESSION_DIR, 'navigation.json');
-    const startTime = Date.now();
-
-    while (Date.now() - startTime < timeoutMs) {
-        if (fs.existsSync(navigationFile)) {
-            return true;
-        }
-        // Wait 100ms before checking again
-        await new Promise(resolve => setTimeout(resolve, 100));
-    }
-
-    return false;
-}
-
-// Get CDP URL from chrome plugin
-function getCdpUrl() {
-    const cdpFile = path.join(CHROME_SESSION_DIR, 'cdp_url.txt');
-    if (fs.existsSync(cdpFile)) {
-        return fs.readFileSync(cdpFile, 'utf8').trim();
-    }
-    return null;
-}
-
-// Extract outlinks
-async function extractOutlinks(url, snapshotId, crawlId, depth) {
-    // Output directory is current directory (hook already runs in output dir)
-    const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
-
-    let browser = null;
-
-    try {
-        // Connect to existing Chrome session
-        const cdpUrl = getCdpUrl();
-        if (!cdpUrl) {
-            return { success: false, error: 'No Chrome session found (chrome plugin must run first)' };
-        }
-
-        browser = await puppeteer.connect({
-            browserWSEndpoint: cdpUrl,
-        });
-
-        // Get the page
-        const pages = await browser.pages();
-        const page = pages.find(p => p.url().startsWith('http')) || pages[0];
-
-        if (!page) {
-            return { success: false, error: 'No page found in Chrome session' };
-        }
-
-        // Extract outlinks by category
-        const outlinksData = await page.evaluate(() => {
-            const LINK_REGEX = /https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)/gi;
-
-            const filterDataUrls = (urls) => urls.filter(url => url && !url.startsWith('data:'));
-            const filterW3Urls = (urls) => urls.filter(url => url && !url.startsWith('http://www.w3.org/'));
-
-            // Get raw links from HTML
-            const html = document.documentElement.outerHTML;
-            const raw = Array.from(html.matchAll(LINK_REGEX)).map(m => m[0]);
-
-            // Get all <a href> links
-            const hrefs = Array.from(document.querySelectorAll('a[href]'))
-                .map(elem => elem.href)
-                .filter(url => url);
-
-            // Get all <link> tags (not just stylesheets)
-            const linksMap = {};
-            document.querySelectorAll('link[href]').forEach(elem => {
-                const rel = elem.rel || '';
-                const href = elem.href;
-                if (href && rel !== 'stylesheet') {
-                    linksMap[href] = { rel, href };
-                }
-            });
-            const links = Object.values(linksMap);
-
-            // Get iframes
-            const iframes = Array.from(document.querySelectorAll('iframe[src]'))
-                .map(elem => elem.src)
-                .filter(url => url);
-
-            // Get images
-            const images = Array.from(document.querySelectorAll('img[src]'))
-                .map(elem => elem.src)
-                .filter(url => url && !url.startsWith('data:'));
-
-            // Get CSS background images
-            const css_images = Array.from(document.querySelectorAll('*'))
-                .map(elem => {
-                    const bgImg = window.getComputedStyle(elem).getPropertyValue('background-image');
-                    const match = /url\(\s*?['"]?\s*?(\S+?)\s*?["']?\s*?\)/i.exec(bgImg);
-                    return match ? match[1] : null;
-                })
-                .filter(url => url);
-
-            // Get stylesheets
-            const css_stylesheets = Array.from(document.querySelectorAll('link[rel=stylesheet]'))
-                .map(elem => elem.href)
-                .filter(url => url);
-
-            // Get JS scripts
-            const js_scripts = Array.from(document.querySelectorAll('script[src]'))
-                .map(elem => elem.src)
-                .filter(url => url);
-
-            return {
-                url: window.location.href,
-                raw: [...new Set(filterDataUrls(filterW3Urls(raw)))],
-                hrefs: [...new Set(filterDataUrls(hrefs))],
-                links,
-                iframes: [...new Set(iframes)],
-                images: [...new Set(filterDataUrls(images))],
-                css_images: [...new Set(filterDataUrls(css_images))],
-                css_stylesheets: [...new Set(filterDataUrls(css_stylesheets))],
-                js_scripts: [...new Set(filterDataUrls(js_scripts))],
-            };
-        });
-
-        // Write detailed output (for archival)
-        fs.writeFileSync(outputPath, JSON.stringify(outlinksData, null, 2));
-
-        // Write urls.jsonl for crawl system (only hrefs that are crawlable pages)
-        const urlsPath = path.join(OUTPUT_DIR, URLS_FILE);
-        const crawlableUrls = outlinksData.hrefs.filter(href => {
-            // Only include http/https URLs, exclude static assets
-            if (!href.startsWith('http://') && !href.startsWith('https://')) return false;
-            // Exclude common static file extensions
-            const staticExts = ['.css', '.js', '.png', '.jpg', '.jpeg', '.gif', '.svg', '.ico', '.woff', '.woff2', '.ttf', '.eot', '.mp4', '.webm', '.mp3', '.pdf'];
-            const urlPath = href.split('?')[0].split('#')[0].toLowerCase();
-            return !staticExts.some(ext => urlPath.endsWith(ext));
-        });
-
-        const urlsJsonl = crawlableUrls.map(href => JSON.stringify({
-            type: 'Snapshot',
-            url: href,
-            plugin: PLUGIN_NAME,
-            depth: depth + 1,
-            parent_snapshot_id: snapshotId || undefined,
-            crawl_id: crawlId || undefined,
-        })).join('\n');
-
-        if (urlsJsonl) {
-            fs.writeFileSync(urlsPath, urlsJsonl + '\n');
-        }
-
-        return { success: true, output: outputPath, outlinksData, crawlableCount: crawlableUrls.length };
-
-    } catch (e) {
-        return { success: false, error: `${e.name}: ${e.message}` };
-    } finally {
-        if (browser) {
-            browser.disconnect();
-        }
-    }
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-    const snapshotId = args.snapshot_id;
-    const crawlId = args.crawl_id || process.env.CRAWL_ID;
-    const depth = parseInt(args.depth || process.env.SNAPSHOT_DEPTH || '0', 10) || 0;
-
-    if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__75_parse_dom_outlinks.js --url=<url> --snapshot-id=<uuid>');
-        process.exit(1);
-    }
-
-    const startTs = new Date();
-    let status = 'failed';
-    let output = null;
-    let error = '';
-
-    try {
-        // Check if enabled
-        if (!getEnvBool('PARSE_DOM_OUTLINKS_ENABLED', true)) {
-            console.log('Skipping DOM outlinks (PARSE_DOM_OUTLINKS_ENABLED=False)');
-            // Output clean JSONL (no RESULT_JSON= prefix)
-            console.log(JSON.stringify({
-                type: 'ArchiveResult',
-                status: 'skipped',
-                output_str: 'PARSE_DOM_OUTLINKS_ENABLED=False',
-            }));
-            process.exit(0);
-        }
-
-        // Check if Chrome session exists, then wait for page load
-        const cdpUrl = getCdpUrl();
-        if (cdpUrl) {
-            // Wait for page to be fully loaded
-            const pageLoaded = await waitForChromeTabLoaded(60000);
-            if (!pageLoaded) {
-                throw new Error('Page not loaded after 60s (chrome_navigate must complete first)');
-            }
-        }
-
-        const result = await extractOutlinks(url, snapshotId, crawlId, depth);
-
-        if (result.success) {
-            status = 'succeeded';
-            output = result.output;
-            const total = result.outlinksData.hrefs.length;
-            const crawlable = result.crawlableCount;
-            const images = result.outlinksData.images.length;
-            const scripts = result.outlinksData.js_scripts.length;
-            console.log(`DOM outlinks extracted: ${total} links (${crawlable} crawlable), ${images} images, ${scripts} scripts`);
-        } else {
-            status = 'failed';
-            error = result.error;
-        }
-    } catch (e) {
-        error = `${e.name}: ${e.message}`;
-        status = 'failed';
-    }
-
-    const endTs = new Date();
-
-    if (error) console.error(`ERROR: ${error}`);
-
-    // Output clean JSONL (no RESULT_JSON= prefix)
-    console.log(JSON.stringify({
-        type: 'ArchiveResult',
-        status,
-        output_str: output || error || '',
-    }));
-
-    process.exit(status === 'succeeded' ? 0 : 1);
-}
-
-main().catch(e => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
diff --git a/archivebox/plugins/parse_dom_outlinks/templates/icon.html b/archivebox/plugins/parse_dom_outlinks/templates/icon.html
deleted file mode 100644
index b333082c..00000000
--- a/archivebox/plugins/parse_dom_outlinks/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--parse_dom_outlinks" title="Outlinks"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M10 13a4 4 0 0 1 0-6l2-2a4 4 0 0 1 6 6l-1 1"/><path d="M14 11a4 4 0 0 1 0 6l-2 2a4 4 0 0 1-6-6l1-1"/></svg></span>
diff --git a/archivebox/plugins/parse_dom_outlinks/tests/test_parse_dom_outlinks.py b/archivebox/plugins/parse_dom_outlinks/tests/test_parse_dom_outlinks.py
deleted file mode 100644
index 6f45eb4b..00000000
--- a/archivebox/plugins/parse_dom_outlinks/tests/test_parse_dom_outlinks.py
+++ /dev/null
@@ -1,119 +0,0 @@
-"""
-Tests for the parse_dom_outlinks plugin.
-
-Tests the real DOM outlinks hook with an actual URL to verify
-link extraction and categorization.
-"""
-
-import json
-import shutil
-import subprocess
-import sys
-import tempfile
-from pathlib import Path
-
-import pytest
-from django.test import TestCase
-
-# Import chrome test helpers
-sys.path.insert(0, str(Path(__file__).parent.parent.parent / 'chrome' / 'tests'))
-from chrome_test_helpers import (
-    chrome_session,
-    get_test_env,
-    get_plugin_dir,
-    get_hook_script,
-)
-
-
-def chrome_available() -> bool:
-    """Check if Chrome/Chromium is available."""
-    for name in ['chromium', 'chromium-browser', 'google-chrome', 'chrome']:
-        if shutil.which(name):
-            return True
-    return False
-
-
-# Get the path to the parse_dom_outlinks hook
-PLUGIN_DIR = get_plugin_dir(__file__)
-OUTLINKS_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_parse_dom_outlinks.*')
-
-
-class TestParseDomOutlinksPlugin(TestCase):
-    """Test the parse_dom_outlinks plugin."""
-
-    def test_outlinks_hook_exists(self):
-        """DOM outlinks hook script should exist."""
-        self.assertIsNotNone(OUTLINKS_HOOK, "DOM outlinks hook not found in plugin directory")
-        self.assertTrue(OUTLINKS_HOOK.exists(), f"Hook not found: {OUTLINKS_HOOK}")
-
-
-class TestParseDomOutlinksWithChrome(TestCase):
-    """Integration tests for parse_dom_outlinks plugin with Chrome."""
-
-    def setUp(self):
-        """Set up test environment."""
-        self.temp_dir = Path(tempfile.mkdtemp())
-
-    def tearDown(self):
-        """Clean up."""
-        shutil.rmtree(self.temp_dir, ignore_errors=True)
-
-    def test_outlinks_extracts_links_from_page(self):
-        """DOM outlinks hook should extract and categorize links from page."""
-        test_url = 'https://example.com'
-        snapshot_id = 'test-outlinks-snapshot'
-
-        try:
-            with chrome_session(
-                self.temp_dir,
-                crawl_id='test-outlinks-crawl',
-                snapshot_id=snapshot_id,
-                test_url=test_url,
-                navigate=True,
-                timeout=30,
-            ) as (chrome_process, chrome_pid, snapshot_chrome_dir, env):
-                # Use the environment from chrome_session (already has CHROME_HEADLESS=true)
-
-
-                # Run outlinks hook with the active Chrome session
-                result = subprocess.run(
-                    ['node', str(OUTLINKS_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                    cwd=str(snapshot_chrome_dir),
-                    capture_output=True,
-                    text=True,
-                    timeout=60,
-                    env=env
-                )
-
-                # Check for output file
-                outlinks_output = snapshot_chrome_dir / 'outlinks.json'
-
-                outlinks_data = None
-                json_error = None
-
-                # Try parsing from file first
-                if outlinks_output.exists():
-                    with open(outlinks_output) as f:
-                        try:
-                            outlinks_data = json.load(f)
-                        except json.JSONDecodeError as e:
-                            json_error = str(e)
-
-                # Verify hook ran successfully
-                self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}")
-                self.assertNotIn('Traceback', result.stderr)
-
-                # Verify we got outlinks data with expected categories
-                self.assertIsNotNone(outlinks_data, f"No outlinks data found - file missing or invalid JSON: {json_error}")
-
-                self.assertIn('url', outlinks_data, f"Missing url: {outlinks_data}")
-                self.assertIn('hrefs', outlinks_data, f"Missing hrefs: {outlinks_data}")
-                # example.com has at least one link (to iana.org)
-                self.assertIsInstance(outlinks_data['hrefs'], list)
-
-        except RuntimeError:
-            raise
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/parse_html_urls/config.json b/archivebox/plugins/parse_html_urls/config.json
deleted file mode 100644
index 3cafe13f..00000000
--- a/archivebox/plugins/parse_html_urls/config.json
+++ /dev/null
@@ -1,13 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "PARSE_HTML_URLS_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["USE_PARSE_HTML_URLS"],
-      "description": "Enable HTML URL parsing"
-    }
-  }
-}
diff --git a/archivebox/plugins/parse_html_urls/on_Snapshot__70_parse_html_urls.py b/archivebox/plugins/parse_html_urls/on_Snapshot__70_parse_html_urls.py
deleted file mode 100755
index 462c72f0..00000000
--- a/archivebox/plugins/parse_html_urls/on_Snapshot__70_parse_html_urls.py
+++ /dev/null
@@ -1,291 +0,0 @@
-#!/usr/bin/env python3
-"""
-Parse HTML files and extract href URLs.
-
-This is a standalone extractor that can run without ArchiveBox.
-It reads HTML content and extracts all <a href="..."> URLs.
-
-NOTE: If parse_dom_outlinks already ran (parse_dom_outlinks/urls.jsonl exists),
-this extractor will skip since parse_dom_outlinks provides better coverage via Chrome.
-
-Usage: ./on_Snapshot__60_parse_html_urls.py --url=<url>
-Output: Appends discovered URLs to urls.jsonl in current directory
-
-Examples:
-    ./on_Snapshot__60_parse_html_urls.py --url=file:///path/to/page.html
-    ./on_Snapshot__60_parse_html_urls.py --url=https://example.com/page.html
-"""
-
-import json
-import os
-import re
-import sys
-from datetime import datetime, timezone
-from html import unescape
-from html.parser import HTMLParser
-from pathlib import Path
-from urllib.parse import urljoin, urlparse, urlunparse
-
-import rich_click as click
-
-PLUGIN_NAME = 'parse_html_urls'
-
-# Check if parse_dom_outlinks extractor already ran (sibling plugin output dir)
-DOM_OUTLINKS_URLS_FILE = Path('..') / 'parse_dom_outlinks' / 'urls.jsonl'
-URLS_FILE = Path('urls.jsonl')
-
-
-# URL regex from archivebox/misc/util.py
-URL_REGEX = re.compile(
-    r'(?=('
-    r'http[s]?://'
-    r'(?:[a-zA-Z]|[0-9]'
-    r'|[-_$@.&+!*\(\),]'
-    r'|[^\u0000-\u007F])+'
-    r'[^\]\[<>"\'\s]+'
-    r'))',
-    re.IGNORECASE | re.UNICODE,
-)
-
-
-class HrefParser(HTMLParser):
-    """Extract href attributes from anchor tags."""
-
-    def __init__(self):
-        super().__init__()
-        self.urls = []
-
-    def handle_starttag(self, tag, attrs):
-        if tag == 'a':
-            for attr, value in attrs:
-                if attr == 'href' and value:
-                    self.urls.append(value)
-
-
-def did_urljoin_misbehave(root_url: str, relative_path: str, final_url: str) -> bool:
-    """Check if urljoin incorrectly stripped // from sub-URLs."""
-    relative_path = relative_path.lower()
-    if relative_path.startswith('http://') or relative_path.startswith('https://'):
-        relative_path = relative_path.split('://', 1)[-1]
-
-    original_path_had_suburl = '://' in relative_path
-    original_root_had_suburl = '://' in root_url[8:]
-    final_joined_has_suburl = '://' in final_url[8:]
-
-    return (original_root_had_suburl or original_path_had_suburl) and not final_joined_has_suburl
-
-
-def fix_urljoin_bug(url: str, nesting_limit=5) -> str:
-    """Fix broken sub-URLs where :// was changed to :/."""
-    input_url = url
-    for _ in range(nesting_limit):
-        url = re.sub(
-            r'(?P<root>.+?)'
-            r'(?P<separator>[-=/_&+%$#@!*\(\\])'
-            r'(?P<subscheme>[a-zA-Z0-9+_-]{1,32}?):/'
-            r'(?P<suburl>[^/\\]+)',
-            r'\1\2\3://\4',
-            input_url,
-            re.IGNORECASE | re.UNICODE,
-        )
-        if url == input_url:
-            break
-        input_url = url
-    return url
-
-
-def normalize_url(url: str, root_url: str = None) -> str:
-    """Normalize a URL, resolving relative paths if root_url provided."""
-    url = clean_url_candidate(url)
-    if not root_url:
-        return _normalize_trailing_slash(url)
-
-    url_is_absolute = url.lower().startswith('http://') or url.lower().startswith('https://')
-
-    if url_is_absolute:
-        return url
-
-    # Resolve relative URL
-    resolved = urljoin(root_url, url)
-
-    # Fix urljoin bug with sub-URLs
-    if did_urljoin_misbehave(root_url, url, resolved):
-        resolved = fix_urljoin_bug(resolved)
-
-    return _normalize_trailing_slash(resolved)
-
-
-def _normalize_trailing_slash(url: str) -> str:
-    """Drop trailing slash for non-root paths when no query/fragment."""
-    try:
-        parsed = urlparse(url)
-        path = parsed.path or ''
-        if path != '/' and path.endswith('/') and not parsed.query and not parsed.fragment:
-            path = path.rstrip('/')
-            return urlunparse((parsed.scheme, parsed.netloc, path, parsed.params, parsed.query, parsed.fragment))
-    except Exception:
-        pass
-    return url
-
-
-def clean_url_candidate(url: str) -> str:
-    """Strip obvious surrounding/trailing punctuation from extracted URLs."""
-    cleaned = (url or '').strip()
-    if not cleaned:
-        return cleaned
-
-    # Strip common wrappers
-    cleaned = cleaned.strip(' \t\r\n')
-    cleaned = cleaned.strip('"\''"'"'<>[]()')
-
-    # Strip trailing punctuation and escape artifacts
-    cleaned = cleaned.rstrip('.,;:!?)\\\'"')
-    cleaned = cleaned.rstrip('"')
-
-    # Strip leading punctuation artifacts
-    cleaned = cleaned.lstrip('("'\''<')
-
-    return cleaned
-
-
-def fetch_content(url: str) -> str:
-    """Fetch content from a URL (supports file:// and https://)."""
-    parsed = urlparse(url)
-
-    if parsed.scheme == 'file':
-        file_path = parsed.path
-        with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
-            return f.read()
-    else:
-        timeout = int(os.environ.get('TIMEOUT', '60'))
-        user_agent = os.environ.get('USER_AGENT', 'Mozilla/5.0 (compatible; ArchiveBox/1.0)')
-
-        import urllib.request
-        req = urllib.request.Request(url, headers={'User-Agent': user_agent})
-        with urllib.request.urlopen(req, timeout=timeout) as response:
-            return response.read().decode('utf-8', errors='replace')
-
-
-def find_html_sources() -> list[str]:
-    """Find HTML content from other extractors in the snapshot directory."""
-    search_patterns = [
-        'readability/content.html',
-        '*_readability/content.html',
-        'mercury/content.html',
-        '*_mercury/content.html',
-        'singlefile/singlefile.html',
-        '*_singlefile/singlefile.html',
-        'singlefile/*.html',
-        '*_singlefile/*.html',
-        'dom/output.html',
-        '*_dom/output.html',
-        'dom/*.html',
-        '*_dom/*.html',
-        'wget/**/*.html',
-        '*_wget/**/*.html',
-        'wget/**/*.htm',
-        '*_wget/**/*.htm',
-        'wget/**/*.htm*',
-        '*_wget/**/*.htm*',
-    ]
-
-    sources: list[str] = []
-    for base in (Path.cwd(), Path.cwd().parent):
-        for pattern in search_patterns:
-            for match in base.glob(pattern):
-                if not match.is_file() or match.stat().st_size == 0:
-                    continue
-                try:
-                    sources.append(match.read_text(errors='ignore'))
-                except Exception:
-                    continue
-
-    return sources
-
-
-@click.command()
-@click.option('--url', required=True, help='HTML URL to parse')
-@click.option('--snapshot-id', required=False, help='Parent Snapshot UUID')
-@click.option('--crawl-id', required=False, help='Crawl UUID')
-@click.option('--depth', type=int, default=0, help='Current depth level')
-def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0):
-    """Parse HTML and extract href URLs."""
-    env_depth = os.environ.get('SNAPSHOT_DEPTH')
-    if env_depth is not None:
-        try:
-            depth = int(env_depth)
-        except Exception:
-            pass
-    crawl_id = crawl_id or os.environ.get('CRAWL_ID')
-
-    # Skip only if parse_dom_outlinks already ran AND found URLs (it uses Chrome for better coverage)
-    # If parse_dom_outlinks ran but found nothing, we still try static HTML parsing as fallback
-    if DOM_OUTLINKS_URLS_FILE.exists() and DOM_OUTLINKS_URLS_FILE.stat().st_size > 0:
-        click.echo(f'Skipping parse_html_urls - parse_dom_outlinks already extracted URLs')
-        sys.exit(0)
-
-    contents = find_html_sources()
-    if not contents:
-        try:
-            contents = [fetch_content(url)]
-        except Exception as e:
-            click.echo(f'Failed to fetch {url}: {e}', err=True)
-            sys.exit(1)
-
-    urls_found = set()
-    for content in contents:
-        # Parse HTML for hrefs
-        parser = HrefParser()
-        try:
-            parser.feed(content)
-        except Exception:
-            pass
-
-        for href in parser.urls:
-            normalized = normalize_url(href, root_url=url)
-            if normalized.lower().startswith('http://') or normalized.lower().startswith('https://'):
-                if normalized != url:
-                    urls_found.add(unescape(normalized))
-
-        # Also capture explicit URLs in the HTML text
-        for match in URL_REGEX.findall(content):
-            normalized = normalize_url(match, root_url=url)
-            if normalized.lower().startswith('http://') or normalized.lower().startswith('https://'):
-                if normalized != url:
-                    urls_found.add(unescape(normalized))
-
-    # Emit Snapshot records to stdout (JSONL) and urls.jsonl for crawl system
-    records = []
-    for found_url in sorted(urls_found):
-        record = {
-            'type': 'Snapshot',
-            'url': found_url,
-            'plugin': PLUGIN_NAME,
-            'depth': depth + 1,
-        }
-        if snapshot_id:
-            record['parent_snapshot_id'] = snapshot_id
-        if crawl_id:
-            record['crawl_id'] = crawl_id
-
-        records.append(record)
-        print(json.dumps(record))
-
-    URLS_FILE.write_text('\n'.join(json.dumps(r) for r in records) + ('\n' if records else ''))
-
-    # Emit ArchiveResult record to mark completion
-    status = 'succeeded' if urls_found else 'skipped'
-    output_str = URLS_FILE.name
-    ar_record = {
-        'type': 'ArchiveResult',
-        'status': status,
-        'output_str': output_str,
-    }
-    print(json.dumps(ar_record))
-
-    click.echo(output_str, err=True)
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/parse_html_urls/templates/icon.html b/archivebox/plugins/parse_html_urls/templates/icon.html
deleted file mode 100644
index ee9d8294..00000000
--- a/archivebox/plugins/parse_html_urls/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--parse_html_urls" title="HTML URLs"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M8 9l-3 3 3 3"/><path d="M16 9l3 3-3 3"/><path d="M10 20l4-16"/></svg></span>
diff --git a/archivebox/plugins/parse_html_urls/tests/test_parse_html_urls.py b/archivebox/plugins/parse_html_urls/tests/test_parse_html_urls.py
deleted file mode 100644
index 8dbef37e..00000000
--- a/archivebox/plugins/parse_html_urls/tests/test_parse_html_urls.py
+++ /dev/null
@@ -1,248 +0,0 @@
-#!/usr/bin/env python3
-"""Unit tests for parse_html_urls extractor."""
-
-import json
-import subprocess
-import sys
-from pathlib import Path
-
-import pytest
-
-PLUGIN_DIR = Path(__file__).parent.parent
-SCRIPT_PATH = next(PLUGIN_DIR.glob('on_Snapshot__*_parse_html_urls.*'), None)
-
-
-class TestParseHtmlUrls:
-    """Test the parse_html_urls extractor CLI."""
-
-    def test_parses_real_example_com(self, tmp_path):
-        """Test parsing real https://example.com and extracting its links."""
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', 'https://example.com'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-            timeout=30
-        )
-
-        assert result.returncode == 0, f"Failed to parse example.com: {result.stderr}"
-
-        # Verify stdout contains JSONL records for discovered URLs
-        # example.com links to iana.org
-        assert 'iana.org' in result.stdout or 'example' in result.stdout, "Expected links from example.com not found"
-
-        # Verify ArchiveResult record is present
-        assert '"type": "ArchiveResult"' in result.stdout, "Missing ArchiveResult record"
-        assert '"status": "succeeded"' in result.stdout, "Missing success status"
-
-    def test_extracts_href_urls(self, tmp_path):
-        """Test extracting URLs from anchor tags."""
-        input_file = tmp_path / 'page.html'
-        input_file.write_text('''
-<!DOCTYPE html>
-<html>
-<body>
-    <a href="https://example.com">Example</a>
-    <a href="https://foo.bar/page">Foo</a>
-    <a href="http://test.org">Test</a>
-</body>
-</html>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        assert 'urls.jsonl' in result.stderr
-
-        # Parse Snapshot records from stdout
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '"type": "Snapshot"' in line]
-        assert len(lines) == 3, f"Expected 3 Snapshot records, got {len(lines)}"
-
-        urls = set()
-        for line in lines:
-            entry = json.loads(line)
-            assert entry['type'] == 'Snapshot'
-            assert 'url' in entry
-            urls.add(entry['url'])
-
-        assert 'https://example.com' in urls
-        assert 'https://foo.bar/page' in urls
-        assert 'http://test.org' in urls
-
-        # Verify ArchiveResult record
-        assert '"type": "ArchiveResult"' in result.stdout
-        assert '"status": "succeeded"' in result.stdout
-
-        urls_file = tmp_path / 'urls.jsonl'
-        assert urls_file.exists(), "urls.jsonl not created"
-        file_lines = [line for line in urls_file.read_text().splitlines() if line.strip()]
-        assert len(file_lines) == 3, f"Expected 3 urls.jsonl entries, got {len(file_lines)}"
-
-    def test_ignores_non_http_schemes(self, tmp_path):
-        """Test that non-http schemes are ignored."""
-        input_file = tmp_path / 'page.html'
-        input_file.write_text('''
-<html>
-<body>
-    <a href="mailto:test@example.com">Email</a>
-    <a href="javascript:void(0)">JS</a>
-    <a href="tel:+1234567890">Phone</a>
-    <a href="https://valid.com">Valid</a>
-</body>
-</html>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-
-        # Parse Snapshot records from stdout
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '"type": "Snapshot"' in line]
-        assert len(lines) == 1, f"Expected 1 Snapshot record, got {len(lines)}"
-
-        entry = json.loads(lines[0])
-        assert entry['url'] == 'https://valid.com'
-
-    def test_handles_html_entities(self, tmp_path):
-        """Test that HTML entities in URLs are decoded."""
-        input_file = tmp_path / 'page.html'
-        input_file.write_text('''
-<html>
-<body>
-    <a href="https://example.com/page?a=1&amp;b=2">Link</a>
-</body>
-</html>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
-        entry = json.loads(lines[0])
-        assert entry['url'] == 'https://example.com/page?a=1&b=2'
-
-    def test_deduplicates_urls(self, tmp_path):
-        """Test that duplicate URLs are deduplicated."""
-        input_file = tmp_path / 'page.html'
-        input_file.write_text('''
-<html>
-<body>
-    <a href="https://example.com">Link 1</a>
-    <a href="https://example.com">Link 2</a>
-    <a href="https://example.com">Link 3</a>
-</body>
-</html>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
-        assert len(lines) == 1
-
-    def test_excludes_source_url(self, tmp_path):
-        """Test that the source URL itself is excluded from results."""
-        input_file = tmp_path / 'page.html'
-        source_url = f'file://{input_file}'
-        input_file.write_text(f'''
-<html>
-<body>
-    <a href="{source_url}">Self</a>
-    <a href="https://other.com">Other</a>
-</body>
-</html>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', source_url],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
-        assert len(lines) == 1
-        entry = json.loads(lines[0])
-        assert entry['url'] == 'https://other.com'
-
-    def test_skips_when_no_urls_found(self, tmp_path):
-        """Test that script returns skipped status when no URLs found."""
-        input_file = tmp_path / 'page.html'
-        input_file.write_text('<html><body>No links here</body></html>')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        assert 'urls.jsonl' in result.stderr
-        assert '"status": "skipped"' in result.stdout
-
-    def test_handles_malformed_html(self, tmp_path):
-        """Test handling of malformed HTML."""
-        input_file = tmp_path / 'malformed.html'
-        input_file.write_text('''
-<html>
-<body>
-    <a href="https://example.com">Unclosed tag
-    <a href="https://other.com">Another link</a>
-</body>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
-        assert len(lines) == 2
-
-    def test_output_is_valid_json(self, tmp_path):
-        """Test that output contains required fields."""
-        input_file = tmp_path / 'page.html'
-        input_file.write_text('<a href="https://example.com">Link</a>')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
-        entry = json.loads(lines[0])
-        assert entry['url'] == 'https://example.com'
-        assert entry['type'] == 'Snapshot'
-        assert entry['plugin'] == 'parse_html_urls'
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/parse_jsonl_urls/config.json b/archivebox/plugins/parse_jsonl_urls/config.json
deleted file mode 100644
index 032eab1e..00000000
--- a/archivebox/plugins/parse_jsonl_urls/config.json
+++ /dev/null
@@ -1,13 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "PARSE_JSONL_URLS_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["USE_PARSE_JSONL_URLS"],
-      "description": "Enable JSON Lines URL parsing"
-    }
-  }
-}
diff --git a/archivebox/plugins/parse_jsonl_urls/on_Snapshot__74_parse_jsonl_urls.py b/archivebox/plugins/parse_jsonl_urls/on_Snapshot__74_parse_jsonl_urls.py
deleted file mode 100755
index f9c060dc..00000000
--- a/archivebox/plugins/parse_jsonl_urls/on_Snapshot__74_parse_jsonl_urls.py
+++ /dev/null
@@ -1,211 +0,0 @@
-#!/usr/bin/env python3
-"""
-Parse JSONL bookmark files and extract URLs.
-
-This is a standalone extractor that can run without ArchiveBox.
-It reads JSONL-format bookmark exports (one JSON object per line).
-
-Usage: ./on_Snapshot__54_parse_jsonl_urls.py --url=<url>
-Output: Appends discovered URLs to urls.jsonl in current directory
-
-Expected JSONL format (one object per line):
-    {"url": "https://example.com", "title": "Example", "tags": "tag1,tag2"}
-    {"href": "https://other.com", "description": "Other Site"}
-
-Supports various field names for URL, title, timestamp, and tags.
-"""
-
-import json
-import os
-import sys
-from pathlib import Path
-from datetime import datetime
-from html import unescape
-from urllib.parse import urlparse
-
-import rich_click as click
-
-PLUGIN_NAME = 'parse_jsonl_urls'
-URLS_FILE = Path('urls.jsonl')
-
-
-def parse_bookmarked_at(link: dict) -> str | None:
-    """Parse timestamp from various JSON formats, return ISO 8601."""
-    from datetime import timezone
-
-    def json_date(s: str) -> datetime:
-        # Try ISO 8601 format
-        return datetime.strptime(s.split(',', 1)[0], '%Y-%m-%dT%H:%M:%S%z')
-
-    def to_iso(dt: datetime) -> str:
-        if dt.tzinfo is None:
-            dt = dt.replace(tzinfo=timezone.utc)
-        return dt.isoformat()
-
-    try:
-        if link.get('bookmarked_at'):
-            # Already in our format, pass through
-            return link['bookmarked_at']
-        elif link.get('timestamp'):
-            # Chrome/Firefox histories use microseconds
-            return to_iso(datetime.fromtimestamp(link['timestamp'] / 1000000, tz=timezone.utc))
-        elif link.get('time'):
-            return to_iso(json_date(link['time']))
-        elif link.get('created_at'):
-            return to_iso(json_date(link['created_at']))
-        elif link.get('created'):
-            return to_iso(json_date(link['created']))
-        elif link.get('date'):
-            return to_iso(json_date(link['date']))
-        elif link.get('bookmarked'):
-            return to_iso(json_date(link['bookmarked']))
-        elif link.get('saved'):
-            return to_iso(json_date(link['saved']))
-    except (ValueError, TypeError, KeyError):
-        pass
-
-    return None
-
-
-def json_object_to_entry(link: dict) -> dict | None:
-    """Convert a JSON bookmark object to a URL entry."""
-    # Parse URL (try various field names)
-    url = link.get('href') or link.get('url') or link.get('URL')
-    if not url:
-        return None
-
-    entry = {
-        'type': 'Snapshot',
-        'url': unescape(url),
-        'plugin': PLUGIN_NAME,
-    }
-
-    # Parse title
-    title = None
-    if link.get('title'):
-        title = link['title'].strip()
-    elif link.get('description'):
-        title = link['description'].replace(' — Readability', '').strip()
-    elif link.get('name'):
-        title = link['name'].strip()
-    if title:
-        entry['title'] = unescape(title)
-
-    # Parse bookmarked_at (ISO 8601)
-    bookmarked_at = parse_bookmarked_at(link)
-    if bookmarked_at:
-        entry['bookmarked_at'] = bookmarked_at
-
-    # Parse tags
-    tags = link.get('tags', '')
-    if isinstance(tags, list):
-        tags = ','.join(tags)
-    elif isinstance(tags, str) and ',' not in tags and tags:
-        # If no comma, assume space-separated
-        tags = tags.replace(' ', ',')
-    if tags:
-        entry['tags'] = unescape(tags)
-
-    return entry
-
-
-def fetch_content(url: str) -> str:
-    """Fetch content from a URL (supports file:// and https://)."""
-    parsed = urlparse(url)
-
-    if parsed.scheme == 'file':
-        file_path = parsed.path
-        with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
-            return f.read()
-    else:
-        timeout = int(os.environ.get('TIMEOUT', '60'))
-        user_agent = os.environ.get('USER_AGENT', 'Mozilla/5.0 (compatible; ArchiveBox/1.0)')
-
-        import urllib.request
-        req = urllib.request.Request(url, headers={'User-Agent': user_agent})
-        with urllib.request.urlopen(req, timeout=timeout) as response:
-            return response.read().decode('utf-8', errors='replace')
-
-
-@click.command()
-@click.option('--url', required=True, help='JSONL file URL to parse')
-@click.option('--snapshot-id', required=False, help='Parent Snapshot UUID')
-@click.option('--crawl-id', required=False, help='Crawl UUID')
-@click.option('--depth', type=int, default=0, help='Current depth level')
-def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0):
-    """Parse JSONL bookmark file and extract URLs."""
-    env_depth = os.environ.get('SNAPSHOT_DEPTH')
-    if env_depth is not None:
-        try:
-            depth = int(env_depth)
-        except Exception:
-            pass
-    crawl_id = crawl_id or os.environ.get('CRAWL_ID')
-
-    try:
-        content = fetch_content(url)
-    except Exception as e:
-        click.echo(f'Failed to fetch {url}: {e}', err=True)
-        sys.exit(1)
-
-    urls_found = []
-    all_tags = set()
-
-    for line in content.splitlines():
-        line = line.strip()
-        if not line:
-            continue
-
-        try:
-            link = json.loads(line)
-            entry = json_object_to_entry(link)
-            if entry:
-                # Add crawl tracking metadata
-                entry['depth'] = depth + 1
-                if snapshot_id:
-                    entry['parent_snapshot_id'] = snapshot_id
-                if crawl_id:
-                    entry['crawl_id'] = crawl_id
-
-                # Collect tags
-                if entry.get('tags'):
-                    for tag in entry['tags'].split(','):
-                        tag = tag.strip()
-                        if tag:
-                            all_tags.add(tag)
-
-                urls_found.append(entry)
-        except json.JSONDecodeError:
-            # Skip malformed lines
-            continue
-
-    # Emit Tag records first (to stdout as JSONL)
-    for tag_name in sorted(all_tags):
-        print(json.dumps({
-            'type': 'Tag',
-            'name': tag_name,
-        }))
-
-    # Emit Snapshot records (to stdout as JSONL)
-    for entry in urls_found:
-        print(json.dumps(entry))
-
-    # Write urls.jsonl to disk for crawl system
-    URLS_FILE.write_text('\n'.join(json.dumps(r) for r in urls_found) + ('\n' if urls_found else ''))
-
-    # Emit ArchiveResult record to mark completion
-    status = 'succeeded' if urls_found else 'skipped'
-    output_str = URLS_FILE.name
-    ar_record = {
-        'type': 'ArchiveResult',
-        'status': status,
-        'output_str': output_str,
-    }
-    print(json.dumps(ar_record))
-
-    click.echo(output_str, err=True)
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/parse_jsonl_urls/templates/icon.html b/archivebox/plugins/parse_jsonl_urls/templates/icon.html
deleted file mode 100644
index 124a8cb4..00000000
--- a/archivebox/plugins/parse_jsonl_urls/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--parse_jsonl_urls" title="JSONL URLs"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M8 4H5v16h3"/><path d="M16 4h3v16h-3"/><circle cx="12" cy="8" r="1" fill="currentColor" stroke="none"/><circle cx="12" cy="12" r="1" fill="currentColor" stroke="none"/><circle cx="12" cy="16" r="1" fill="currentColor" stroke="none"/></svg></span>
diff --git a/archivebox/plugins/parse_jsonl_urls/tests/test_parse_jsonl_urls.py b/archivebox/plugins/parse_jsonl_urls/tests/test_parse_jsonl_urls.py
deleted file mode 100644
index b425d3f3..00000000
--- a/archivebox/plugins/parse_jsonl_urls/tests/test_parse_jsonl_urls.py
+++ /dev/null
@@ -1,276 +0,0 @@
-#!/usr/bin/env python3
-"""Unit tests for parse_jsonl_urls extractor."""
-
-import json
-import subprocess
-import sys
-from pathlib import Path
-
-import pytest
-
-PLUGIN_DIR = Path(__file__).parent.parent
-SCRIPT_PATH = next(PLUGIN_DIR.glob('on_Snapshot__*_parse_jsonl_urls.*'), None)
-
-
-class TestParseJsonlUrls:
-    """Test the parse_jsonl_urls extractor CLI."""
-
-    def test_extracts_urls_from_jsonl(self, tmp_path):
-        """Test extracting URLs from JSONL bookmark file."""
-        input_file = tmp_path / 'bookmarks.jsonl'
-        input_file.write_text(
-            '{"url": "https://example.com", "title": "Example"}\n'
-            '{"url": "https://foo.bar/page", "title": "Foo Bar"}\n'
-            '{"url": "https://test.org", "title": "Test Org"}\n'
-        )
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        assert 'urls.jsonl' in result.stderr or 'urls.jsonl' in result.stdout
-
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        assert len(lines) == 3
-
-        entries = [json.loads(line) for line in lines]
-        urls = {e['url'] for e in entries}
-        titles = {e.get('title') for e in entries}
-
-        assert 'https://example.com' in urls
-        assert 'https://foo.bar/page' in urls
-        assert 'https://test.org' in urls
-        assert 'Example' in titles
-        assert 'Foo Bar' in titles
-        assert 'Test Org' in titles
-
-    def test_supports_href_field(self, tmp_path):
-        """Test that 'href' field is recognized as URL."""
-        input_file = tmp_path / 'bookmarks.jsonl'
-        input_file.write_text('{"href": "https://example.com", "title": "Test"}\n')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-        assert entry['url'] == 'https://example.com'
-
-    def test_supports_description_as_title(self, tmp_path):
-        """Test that 'description' field is used as title fallback."""
-        input_file = tmp_path / 'bookmarks.jsonl'
-        input_file.write_text('{"url": "https://example.com", "description": "A description"}\n')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-        assert entry['title'] == 'A description'
-
-    def test_parses_various_timestamp_formats(self, tmp_path):
-        """Test parsing of different timestamp field names."""
-        input_file = tmp_path / 'bookmarks.jsonl'
-        input_file.write_text('{"url": "https://example.com", "timestamp": 1609459200000000}\n')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-        # Parser converts timestamp to bookmarked_at
-        assert 'bookmarked_at' in entry
-
-    def test_parses_tags_as_string(self, tmp_path):
-        """Test parsing tags as comma-separated string."""
-        input_file = tmp_path / 'bookmarks.jsonl'
-        input_file.write_text('{"url": "https://example.com", "tags": "tech,news,reading"}\n')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        # Parser converts tags to separate Tag objects in the output
-        content = result.stdout
-        assert 'tech' in content or 'news' in content or 'Tag' in content
-
-    def test_parses_tags_as_list(self, tmp_path):
-        """Test parsing tags as JSON array."""
-        input_file = tmp_path / 'bookmarks.jsonl'
-        input_file.write_text('{"url": "https://example.com", "tags": ["tech", "news"]}\n')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        # Parser converts tags to separate Tag objects in the output
-        content = result.stdout
-        assert 'tech' in content or 'news' in content or 'Tag' in content
-
-    def test_skips_malformed_lines(self, tmp_path):
-        """Test that malformed JSON lines are skipped."""
-        input_file = tmp_path / 'bookmarks.jsonl'
-        input_file.write_text(
-            '{"url": "https://valid.com"}\n'
-            'not valid json\n'
-            '{"url": "https://also-valid.com"}\n'
-        )
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        assert len(lines) == 2
-
-    def test_skips_entries_without_url(self, tmp_path):
-        """Test that entries without URL field are skipped."""
-        input_file = tmp_path / 'bookmarks.jsonl'
-        input_file.write_text(
-            '{"url": "https://valid.com"}\n'
-            '{"title": "No URL here"}\n'
-            '{"url": "https://also-valid.com"}\n'
-        )
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        assert len(lines) == 2
-
-    def test_skips_when_no_urls_found(self, tmp_path):
-        """Test that script returns skipped status when no URLs found."""
-        input_file = tmp_path / 'empty.jsonl'
-        input_file.write_text('{"title": "No URL"}\n')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        assert 'urls.jsonl' in result.stderr
-        assert '"status": "skipped"' in result.stdout
-
-    def test_exits_1_when_file_not_found(self, tmp_path):
-        """Test that script exits with code 1 when file doesn't exist."""
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', 'file:///nonexistent/bookmarks.jsonl'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 1
-        assert 'Failed to fetch' in result.stderr
-
-    def test_handles_html_entities(self, tmp_path):
-        """Test that HTML entities in URLs and titles are decoded."""
-        input_file = tmp_path / 'bookmarks.jsonl'
-        input_file.write_text('{"url": "https://example.com/page?a=1&amp;b=2", "title": "Test &amp; Title"}\n')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-        assert entry['url'] == 'https://example.com/page?a=1&b=2'
-        assert entry['title'] == 'Test & Title'
-
-    def test_skips_empty_lines(self, tmp_path):
-        """Test that empty lines are skipped."""
-        input_file = tmp_path / 'bookmarks.jsonl'
-        input_file.write_text(
-            '{"url": "https://example.com"}\n'
-            '\n'
-            '   \n'
-            '{"url": "https://other.com"}\n'
-        )
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        assert len(lines) == 2
-
-    def test_output_includes_required_fields(self, tmp_path):
-        """Test that output includes required fields."""
-        input_file = tmp_path / 'bookmarks.jsonl'
-        input_file.write_text('{"url": "https://example.com"}\n')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-        assert entry['url'] == 'https://example.com'
-        assert 'type' in entry
-        assert 'plugin' in entry
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/parse_netscape_urls/config.json b/archivebox/plugins/parse_netscape_urls/config.json
deleted file mode 100644
index 04afe872..00000000
--- a/archivebox/plugins/parse_netscape_urls/config.json
+++ /dev/null
@@ -1,13 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "PARSE_NETSCAPE_URLS_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["USE_PARSE_NETSCAPE_URLS"],
-      "description": "Enable Netscape bookmarks HTML URL parsing"
-    }
-  }
-}
diff --git a/archivebox/plugins/parse_netscape_urls/on_Snapshot__73_parse_netscape_urls.py b/archivebox/plugins/parse_netscape_urls/on_Snapshot__73_parse_netscape_urls.py
deleted file mode 100755
index 1627e919..00000000
--- a/archivebox/plugins/parse_netscape_urls/on_Snapshot__73_parse_netscape_urls.py
+++ /dev/null
@@ -1,255 +0,0 @@
-#!/usr/bin/env python3
-"""
-Parse Netscape bookmark HTML files and extract URLs.
-
-This is a standalone extractor that can run without ArchiveBox.
-It reads Netscape-format bookmark exports (produced by all major browsers).
-
-Usage: ./on_Snapshot__53_parse_netscape_urls.py --url=<url>
-Output: Appends discovered URLs to urls.jsonl in current directory
-
-Examples:
-    ./on_Snapshot__53_parse_netscape_urls.py --url=file:///path/to/bookmarks.html
-"""
-
-import json
-import os
-import re
-import sys
-from pathlib import Path
-from datetime import datetime, timezone
-from html import unescape
-from urllib.parse import urlparse
-
-import rich_click as click
-
-PLUGIN_NAME = 'parse_netscape_urls'
-URLS_FILE = Path('urls.jsonl')
-
-# Constants for timestamp epoch detection
-UNIX_EPOCH = 0  # 1970-01-01 00:00:00 UTC
-MAC_COCOA_EPOCH = 978307200  # 2001-01-01 00:00:00 UTC (Mac/Cocoa/NSDate epoch)
-
-# Reasonable date range for bookmarks (to detect correct epoch/unit)
-MIN_REASONABLE_YEAR = 1995  # Netscape Navigator era
-MAX_REASONABLE_YEAR = 2035  # Far enough in future
-
-# Regex pattern for Netscape bookmark format
-# Example: <DT><A HREF="https://example.com/?q=1+2" ADD_DATE="1497562974" TAGS="tag1,tag2">example title</A>
-# Make ADD_DATE optional and allow negative numbers
-NETSCAPE_PATTERN = re.compile(
-    r'<a\s+href="([^"]+)"(?:\s+add_date="([^"]*)")?(?:\s+[^>]*?tags="([^"]*)")?[^>]*>([^<]+)</a>',
-    re.UNICODE | re.IGNORECASE
-)
-
-
-def parse_timestamp(timestamp_str: str) -> datetime | None:
-    """
-    Intelligently parse bookmark timestamp with auto-detection of format and epoch.
-
-    Browsers use different timestamp formats:
-    - Firefox: Unix epoch (1970) in seconds (10 digits): 1609459200
-    - Safari: Mac/Cocoa epoch (2001) in seconds (9-10 digits): 631152000
-    - Chrome: Unix epoch in microseconds (16 digits): 1609459200000000
-    - Others: Unix epoch in milliseconds (13 digits): 1609459200000
-
-    Strategy:
-    1. Try parsing with different epoch + unit combinations
-    2. Pick the one that yields a reasonable date (1995-2035)
-    3. Prioritize more common formats (Unix seconds, then Mac seconds, etc.)
-    """
-    if not timestamp_str or timestamp_str == '':
-        return None
-
-    try:
-        timestamp_num = float(timestamp_str)
-    except (ValueError, TypeError):
-        return None
-
-    # Detect sign and work with absolute value
-    is_negative = timestamp_num < 0
-    abs_timestamp = abs(timestamp_num)
-
-    # Determine number of digits to guess the unit
-    if abs_timestamp == 0:
-        num_digits = 1
-    else:
-        num_digits = len(str(int(abs_timestamp)))
-
-    # Try different interpretations in order of likelihood
-    candidates = []
-
-    # Unix epoch seconds (10-11 digits) - Most common: Firefox, Chrome HTML export
-    if 9 <= num_digits <= 11:
-        try:
-            dt = datetime.fromtimestamp(timestamp_num, tz=timezone.utc)
-            if MIN_REASONABLE_YEAR <= dt.year <= MAX_REASONABLE_YEAR:
-                candidates.append((dt, 'unix_seconds', 100))  # Highest priority
-        except (ValueError, OSError, OverflowError):
-            pass
-
-    # Mac/Cocoa epoch seconds (9-10 digits) - Safari
-    # Only consider if Unix seconds didn't work or gave unreasonable date
-    if 8 <= num_digits <= 11:
-        try:
-            dt = datetime.fromtimestamp(timestamp_num + MAC_COCOA_EPOCH, tz=timezone.utc)
-            if MIN_REASONABLE_YEAR <= dt.year <= MAX_REASONABLE_YEAR:
-                candidates.append((dt, 'mac_seconds', 90))
-        except (ValueError, OSError, OverflowError):
-            pass
-
-    # Unix epoch milliseconds (13 digits) - JavaScript exports
-    if 12 <= num_digits <= 14:
-        try:
-            dt = datetime.fromtimestamp(timestamp_num / 1000, tz=timezone.utc)
-            if MIN_REASONABLE_YEAR <= dt.year <= MAX_REASONABLE_YEAR:
-                candidates.append((dt, 'unix_milliseconds', 95))
-        except (ValueError, OSError, OverflowError):
-            pass
-
-    # Mac/Cocoa epoch milliseconds (12-13 digits) - Rare
-    if 11 <= num_digits <= 14:
-        try:
-            dt = datetime.fromtimestamp((timestamp_num / 1000) + MAC_COCOA_EPOCH, tz=timezone.utc)
-            if MIN_REASONABLE_YEAR <= dt.year <= MAX_REASONABLE_YEAR:
-                candidates.append((dt, 'mac_milliseconds', 85))
-        except (ValueError, OSError, OverflowError):
-            pass
-
-    # Unix epoch microseconds (16-17 digits) - Chrome WebKit timestamps
-    if 15 <= num_digits <= 18:
-        try:
-            dt = datetime.fromtimestamp(timestamp_num / 1_000_000, tz=timezone.utc)
-            if MIN_REASONABLE_YEAR <= dt.year <= MAX_REASONABLE_YEAR:
-                candidates.append((dt, 'unix_microseconds', 98))
-        except (ValueError, OSError, OverflowError):
-            pass
-
-    # Mac/Cocoa epoch microseconds (15-16 digits) - Very rare
-    if 14 <= num_digits <= 18:
-        try:
-            dt = datetime.fromtimestamp((timestamp_num / 1_000_000) + MAC_COCOA_EPOCH, tz=timezone.utc)
-            if MIN_REASONABLE_YEAR <= dt.year <= MAX_REASONABLE_YEAR:
-                candidates.append((dt, 'mac_microseconds', 80))
-        except (ValueError, OSError, OverflowError):
-            pass
-
-    # If no candidates found, return None
-    if not candidates:
-        return None
-
-    # Sort by priority (highest first) and return best match
-    candidates.sort(key=lambda x: x[2], reverse=True)
-    best_dt, best_format, _ = candidates[0]
-
-    return best_dt
-
-
-def fetch_content(url: str) -> str:
-    """Fetch content from a URL (supports file:// and https://)."""
-    parsed = urlparse(url)
-
-    if parsed.scheme == 'file':
-        file_path = parsed.path
-        with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
-            return f.read()
-    else:
-        timeout = int(os.environ.get('TIMEOUT', '60'))
-        user_agent = os.environ.get('USER_AGENT', 'Mozilla/5.0 (compatible; ArchiveBox/1.0)')
-
-        import urllib.request
-        req = urllib.request.Request(url, headers={'User-Agent': user_agent})
-        with urllib.request.urlopen(req, timeout=timeout) as response:
-            return response.read().decode('utf-8', errors='replace')
-
-
-@click.command()
-@click.option('--url', required=True, help='Netscape bookmark file URL to parse')
-@click.option('--snapshot-id', required=False, help='Parent Snapshot UUID')
-@click.option('--crawl-id', required=False, help='Crawl UUID')
-@click.option('--depth', type=int, default=0, help='Current depth level')
-def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0):
-    """Parse Netscape bookmark HTML and extract URLs."""
-    env_depth = os.environ.get('SNAPSHOT_DEPTH')
-    if env_depth is not None:
-        try:
-            depth = int(env_depth)
-        except Exception:
-            pass
-    crawl_id = crawl_id or os.environ.get('CRAWL_ID')
-
-    try:
-        content = fetch_content(url)
-    except Exception as e:
-        click.echo(f'Failed to fetch {url}: {e}', err=True)
-        sys.exit(1)
-
-    urls_found = []
-    all_tags = set()
-
-    for line in content.splitlines():
-        match = NETSCAPE_PATTERN.search(line)
-        if match:
-            bookmark_url = match.group(1)
-            timestamp_str = match.group(2)
-            tags_str = match.group(3) or ''
-            title = match.group(4).strip()
-
-            entry = {
-                'type': 'Snapshot',
-                'url': unescape(bookmark_url),
-                'plugin': PLUGIN_NAME,
-                'depth': depth + 1,
-            }
-            if snapshot_id:
-                entry['parent_snapshot_id'] = snapshot_id
-            if crawl_id:
-                entry['crawl_id'] = crawl_id
-            if title:
-                entry['title'] = unescape(title)
-            if tags_str:
-                entry['tags'] = tags_str
-                # Collect unique tags
-                for tag in tags_str.split(','):
-                    tag = tag.strip()
-                    if tag:
-                        all_tags.add(tag)
-
-            # Parse timestamp with intelligent format detection
-            if timestamp_str:
-                dt = parse_timestamp(timestamp_str)
-                if dt:
-                    entry['bookmarked_at'] = dt.isoformat()
-
-            urls_found.append(entry)
-
-    # Emit Tag records first (to stdout as JSONL)
-    for tag_name in sorted(all_tags):
-        print(json.dumps({
-            'type': 'Tag',
-            'name': tag_name,
-        }))
-
-    # Emit Snapshot records (to stdout as JSONL)
-    for entry in urls_found:
-        print(json.dumps(entry))
-
-    # Write urls.jsonl to disk for crawl system
-    URLS_FILE.write_text('\n'.join(json.dumps(r) for r in urls_found) + ('\n' if urls_found else ''))
-
-    # Emit ArchiveResult record to mark completion
-    status = 'succeeded' if urls_found else 'skipped'
-    output_str = URLS_FILE.name
-    ar_record = {
-        'type': 'ArchiveResult',
-        'status': status,
-        'output_str': output_str,
-    }
-    print(json.dumps(ar_record))
-
-    click.echo(output_str, err=True)
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/parse_netscape_urls/templates/icon.html b/archivebox/plugins/parse_netscape_urls/templates/icon.html
deleted file mode 100644
index 4c60899c..00000000
--- a/archivebox/plugins/parse_netscape_urls/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--parse_netscape_urls" title="Netscape Bookmarks"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M6 4h12v16l-6-4-6 4z"/></svg></span>
diff --git a/archivebox/plugins/parse_netscape_urls/tests/test_parse_netscape_urls.py b/archivebox/plugins/parse_netscape_urls/tests/test_parse_netscape_urls.py
deleted file mode 100644
index 43754b59..00000000
--- a/archivebox/plugins/parse_netscape_urls/tests/test_parse_netscape_urls.py
+++ /dev/null
@@ -1,208 +0,0 @@
-#!/usr/bin/env python3
-"""Unit tests for parse_netscape_urls extractor."""
-
-import json
-import subprocess
-import sys
-from pathlib import Path
-
-import pytest
-
-PLUGIN_DIR = Path(__file__).parent.parent
-SCRIPT_PATH = next(PLUGIN_DIR.glob('on_Snapshot__*_parse_netscape_urls.*'), None)
-
-
-class TestParseNetscapeUrls:
-    """Test the parse_netscape_urls extractor CLI."""
-
-    def test_extracts_urls_from_netscape_bookmarks(self, tmp_path):
-        """Test extracting URLs from Netscape bookmark HTML format."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''<!DOCTYPE NETSCAPE-Bookmark-file-1>
-<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
-<TITLE>Bookmarks</TITLE>
-<H1>Bookmarks</H1>
-<DL><p>
-    <DT><A HREF="https://example.com" ADD_DATE="1609459200">Example Site</A>
-    <DT><A HREF="https://foo.bar/page" ADD_DATE="1609545600">Foo Bar</A>
-    <DT><A HREF="https://test.org" ADD_DATE="1609632000">Test Org</A>
-</DL><p>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        assert 'urls.jsonl' in result.stderr or 'urls.jsonl' in result.stdout
-
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        assert len(lines) == 3
-
-        entries = [json.loads(line) for line in lines]
-        urls = {e['url'] for e in entries}
-        titles = {e.get('title') for e in entries}
-
-        assert 'https://example.com' in urls
-        assert 'https://foo.bar/page' in urls
-        assert 'https://test.org' in urls
-        assert 'Example Site' in titles
-        assert 'Foo Bar' in titles
-        assert 'Test Org' in titles
-
-    def test_parses_add_date_timestamps(self, tmp_path):
-        """Test that ADD_DATE timestamps are parsed correctly."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
-<DT><A HREF="https://example.com" ADD_DATE="1609459200">Test</A>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-        # Parser converts timestamp to bookmarked_at
-        assert 'bookmarked_at' in entry
-
-    def test_handles_query_params_in_urls(self, tmp_path):
-        """Test that URLs with query parameters are preserved."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
-<DT><A HREF="https://example.com/search?q=test+query&page=1" ADD_DATE="1609459200">Search</A>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-        assert 'q=test+query' in entry['url']
-        assert 'page=1' in entry['url']
-
-    def test_handles_html_entities(self, tmp_path):
-        """Test that HTML entities in URLs and titles are decoded."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
-<DT><A HREF="https://example.com/page?a=1&amp;b=2" ADD_DATE="1609459200">Test &amp; Title</A>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-        assert entry['url'] == 'https://example.com/page?a=1&b=2'
-        assert entry['title'] == 'Test & Title'
-
-    def test_skips_when_no_bookmarks_found(self, tmp_path):
-        """Test that script returns skipped status when no bookmarks found."""
-        input_file = tmp_path / 'empty.html'
-        input_file.write_text('''<!DOCTYPE NETSCAPE-Bookmark-file-1>
-<TITLE>Bookmarks</TITLE>
-<H1>Bookmarks</H1>
-<DL><p>
-</DL><p>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        assert 'urls.jsonl' in result.stderr
-        assert '"status": "skipped"' in result.stdout
-
-    def test_exits_1_when_file_not_found(self, tmp_path):
-        """Test that script exits with code 1 when file doesn't exist."""
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', 'file:///nonexistent/bookmarks.html'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 1
-        assert 'Failed to fetch' in result.stderr
-
-    def test_handles_nested_folders(self, tmp_path):
-        """Test parsing bookmarks in nested folder structure."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''<!DOCTYPE NETSCAPE-Bookmark-file-1>
-<DL><p>
-    <DT><H3>Folder 1</H3>
-    <DL><p>
-        <DT><A HREF="https://example.com/nested1" ADD_DATE="1609459200">Nested 1</A>
-        <DT><H3>Subfolder</H3>
-        <DL><p>
-            <DT><A HREF="https://example.com/nested2" ADD_DATE="1609459200">Nested 2</A>
-        </DL><p>
-    </DL><p>
-    <DT><A HREF="https://example.com/top" ADD_DATE="1609459200">Top Level</A>
-</DL><p>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        urls = {json.loads(line)['url'] for line in lines}
-
-        assert 'https://example.com/nested1' in urls
-        assert 'https://example.com/nested2' in urls
-        assert 'https://example.com/top' in urls
-
-    def test_case_insensitive_parsing(self, tmp_path):
-        """Test that parsing is case-insensitive for HTML tags."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
-<dt><a HREF="https://example.com" ADD_DATE="1609459200">Test</a>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-        assert entry['url'] == 'https://example.com'
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/parse_netscape_urls/tests/test_parse_netscape_urls_comprehensive.py b/archivebox/plugins/parse_netscape_urls/tests/test_parse_netscape_urls_comprehensive.py
deleted file mode 100644
index 402b823f..00000000
--- a/archivebox/plugins/parse_netscape_urls/tests/test_parse_netscape_urls_comprehensive.py
+++ /dev/null
@@ -1,953 +0,0 @@
-#!/usr/bin/env python3
-"""Comprehensive tests for parse_netscape_urls extractor covering various browser formats."""
-
-import json
-import subprocess
-import sys
-from datetime import datetime
-from pathlib import Path
-
-import pytest
-
-PLUGIN_DIR = Path(__file__).parent.parent
-SCRIPT_PATH = next(PLUGIN_DIR.glob('on_Snapshot__*_parse_netscape_urls.*'), None)
-
-
-class TestFirefoxFormat:
-    """Test Firefox Netscape bookmark export format."""
-
-    def test_firefox_basic_format(self, tmp_path):
-        """Test standard Firefox export format with Unix timestamps in seconds."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''<!DOCTYPE NETSCAPE-Bookmark-file-1>
-<!-- This is an automatically generated file.
-     It will be read and overwritten.
-     DO NOT EDIT! -->
-<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
-<TITLE>Bookmarks</TITLE>
-<H1>Bookmarks Menu</H1>
-<DL><p>
-    <DT><A HREF="https://example.com" ADD_DATE="1609459200" LAST_MODIFIED="1609545600">Example Site</A>
-    <DT><A HREF="https://mozilla.org" ADD_DATE="1640995200">Mozilla</A>
-</DL><p>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        entries = [json.loads(line) for line in lines]
-
-        assert len(entries) == 2
-        assert entries[0]['url'] == 'https://example.com'
-        assert entries[0]['title'] == 'Example Site'
-        # Timestamp should be parsed as seconds (Jan 1, 2021)
-        assert '2021-01-01' in entries[0]['bookmarked_at']
-        # Second bookmark (Jan 1, 2022)
-        assert '2022-01-01' in entries[1]['bookmarked_at']
-
-    def test_firefox_with_tags(self, tmp_path):
-        """Test Firefox bookmarks with tags."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''<!DOCTYPE NETSCAPE-Bookmark-file-1>
-<DL><p>
-    <DT><A HREF="https://example.com" ADD_DATE="1609459200" TAGS="coding,tutorial,python">Python Tutorial</A>
-    <DT><A HREF="https://rust-lang.org" ADD_DATE="1609459200" TAGS="coding,rust">Rust Lang</A>
-</DL><p>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL) - get all JSONL records
-        all_lines = [line for line in result.stdout.strip().split('\n') if line.strip() and line.startswith('{')]
-        records = [json.loads(line) for line in all_lines]
-
-        # Should have Tag records + Snapshot records
-        tags = [r for r in records if r.get('type') == 'Tag']
-        snapshots = [r for r in records if r.get('type') == 'Snapshot']
-
-        tag_names = {t['name'] for t in tags}
-        assert 'coding' in tag_names
-        assert 'tutorial' in tag_names
-        assert 'python' in tag_names
-        assert 'rust' in tag_names
-
-        assert snapshots[0]['tags'] == 'coding,tutorial,python'
-        assert snapshots[1]['tags'] == 'coding,rust'
-
-    def test_firefox_nested_folders(self, tmp_path):
-        """Test Firefox bookmark folders and nested structure."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''<!DOCTYPE NETSCAPE-Bookmark-file-1>
-<DL><p>
-    <DT><H3 ADD_DATE="1609459200" LAST_MODIFIED="1609545600">Toolbar</H3>
-    <DL><p>
-        <DT><A HREF="https://github.com" ADD_DATE="1609459200">GitHub</A>
-        <DT><H3 ADD_DATE="1609459200" LAST_MODIFIED="1609545600">Development</H3>
-        <DL><p>
-            <DT><A HREF="https://stackoverflow.com" ADD_DATE="1609459200">Stack Overflow</A>
-            <DT><A HREF="https://developer.mozilla.org" ADD_DATE="1609459200">MDN</A>
-        </DL><p>
-    </DL><p>
-    <DT><A HREF="https://news.ycombinator.com" ADD_DATE="1609459200">Hacker News</A>
-</DL><p>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        entries = [json.loads(line) for line in lines]
-        urls = {e['url'] for e in entries}
-
-        assert 'https://github.com' in urls
-        assert 'https://stackoverflow.com' in urls
-        assert 'https://developer.mozilla.org' in urls
-        assert 'https://news.ycombinator.com' in urls
-        assert len(entries) == 4
-
-    def test_firefox_icon_and_icon_uri(self, tmp_path):
-        """Test Firefox bookmarks with ICON and ICON_URI attributes."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''<!DOCTYPE NETSCAPE-Bookmark-file-1>
-<DL><p>
-    <DT><A HREF="https://example.com" ADD_DATE="1609459200" ICON="data:image/png;base64,iVBORw0K">Example</A>
-    <DT><A HREF="https://github.com" ADD_DATE="1609459200" ICON_URI="https://github.com/favicon.ico">GitHub</A>
-</DL><p>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        entries = [json.loads(line) for line in lines]
-
-        assert entries[0]['url'] == 'https://example.com'
-        assert entries[1]['url'] == 'https://github.com'
-
-
-class TestChromeFormat:
-    """Test Chrome/Chromium Netscape bookmark export format."""
-
-    def test_chrome_microsecond_timestamps(self, tmp_path):
-        """Test Chrome format with microsecond timestamps (16-17 digits)."""
-        input_file = tmp_path / 'bookmarks.html'
-        # Chrome uses WebKit/Chrome timestamps which are microseconds
-        # 1609459200000000 = Jan 1, 2021 00:00:00 in microseconds
-        input_file.write_text('''<!DOCTYPE NETSCAPE-Bookmark-file-1>
-<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
-<TITLE>Bookmarks</TITLE>
-<H1>Bookmarks</H1>
-<DL><p>
-    <DT><A HREF="https://google.com" ADD_DATE="1609459200000000">Google</A>
-    <DT><A HREF="https://chrome.google.com" ADD_DATE="1640995200000000">Chrome</A>
-</DL><p>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        entries = [json.loads(line) for line in lines]
-
-        # Should correctly parse microsecond timestamps
-        # Currently will fail - we'll fix the parser after writing tests
-        assert entries[0]['url'] == 'https://google.com'
-        # Timestamp should be around Jan 1, 2021, not year 52970!
-        if 'bookmarked_at' in entries[0]:
-            year = datetime.fromisoformat(entries[0]['bookmarked_at']).year
-            # Should be 2021, not some far future date
-            assert 2020 <= year <= 2025, f"Year should be ~2021, got {year}"
-
-    def test_chrome_with_folders(self, tmp_path):
-        """Test Chrome bookmark folder structure."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''<!DOCTYPE NETSCAPE-Bookmark-file-1>
-<DL><p>
-    <DT><H3 ADD_DATE="1609459200" LAST_MODIFIED="1609459200" PERSONAL_TOOLBAR_FOLDER="true">Bookmarks bar</H3>
-    <DL><p>
-        <DT><A HREF="https://google.com" ADD_DATE="1609459200">Google</A>
-    </DL><p>
-    <DT><H3 ADD_DATE="1609459200" LAST_MODIFIED="1609459200">Other bookmarks</H3>
-    <DL><p>
-        <DT><A HREF="https://example.com" ADD_DATE="1609459200">Example</A>
-    </DL><p>
-</DL><p>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        entries = [json.loads(line) for line in lines]
-        urls = {e['url'] for e in entries}
-
-        assert 'https://google.com' in urls
-        assert 'https://example.com' in urls
-
-
-class TestSafariFormat:
-    """Test Safari Netscape bookmark export format."""
-
-    def test_safari_basic_format(self, tmp_path):
-        """Test Safari export format."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''<!DOCTYPE NETSCAPE-Bookmark-file-1>
-<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
-<Title>Bookmarks</Title>
-<H1>Bookmarks</H1>
-<DL><p>
-    <DT><H3 FOLDED ADD_DATE="1609459200">BookmarksBar</H3>
-    <DL><p>
-        <DT><A HREF="https://apple.com" ADD_DATE="1609459200">Apple</A>
-        <DT><A HREF="https://webkit.org" ADD_DATE="1609459200">WebKit</A>
-    </DL><p>
-</DL><p>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        entries = [json.loads(line) for line in lines]
-        urls = {e['url'] for e in entries}
-
-        assert 'https://apple.com' in urls
-        assert 'https://webkit.org' in urls
-
-    def test_safari_reading_list(self, tmp_path):
-        """Test Safari Reading List entries."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''<!DOCTYPE NETSCAPE-Bookmark-file-1>
-<DL><p>
-    <DT><H3 FOLDED ADD_DATE="1609459200">com.apple.ReadingList</H3>
-    <DL><p>
-        <DT><A HREF="https://article1.com" ADD_DATE="1609459200">Article 1</A>
-        <DD>Long article to read later
-        <DT><A HREF="https://article2.com" ADD_DATE="1609545600">Article 2</A>
-        <DD>Another saved article
-    </DL><p>
-</DL><p>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        entries = [json.loads(line) for line in lines]
-        urls = {e['url'] for e in entries}
-
-        assert 'https://article1.com' in urls
-        assert 'https://article2.com' in urls
-
-
-class TestEdgeFormat:
-    """Test Edge/IE bookmark export formats."""
-
-    def test_edge_chromium_format(self, tmp_path):
-        """Test Edge (Chromium-based) format."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''<!DOCTYPE NETSCAPE-Bookmark-file-1>
-<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
-<TITLE>Bookmarks</TITLE>
-<H1>Bookmarks</H1>
-<DL><p>
-    <DT><A HREF="https://microsoft.com" ADD_DATE="1609459200">Microsoft</A>
-    <DT><A HREF="https://bing.com" ADD_DATE="1609459200">Bing</A>
-</DL><p>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        entries = [json.loads(line) for line in lines]
-        urls = {e['url'] for e in entries}
-
-        assert 'https://microsoft.com' in urls
-        assert 'https://bing.com' in urls
-
-
-class TestTimestampFormats:
-    """Test various timestamp format handling and edge cases."""
-
-    def test_unix_seconds_timestamp(self, tmp_path):
-        """Test Unix epoch timestamp in seconds (10-11 digits) - Firefox, Chrome HTML export."""
-        input_file = tmp_path / 'bookmarks.html'
-        # 1609459200 = Jan 1, 2021 00:00:00 UTC (Unix epoch)
-        input_file.write_text('''
-<DT><A HREF="https://example.com" ADD_DATE="1609459200">Test</A>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-
-        dt = datetime.fromisoformat(entry['bookmarked_at'])
-        assert dt.year == 2021
-        assert dt.month == 1
-        assert dt.day == 1
-
-    def test_mac_cocoa_seconds_timestamp(self, tmp_path):
-        """Test Mac/Cocoa epoch timestamp in seconds - Safari uses epoch of 2001-01-01."""
-        input_file = tmp_path / 'bookmarks.html'
-        # Safari uses Mac absolute time: seconds since 2001-01-01 00:00:00 UTC
-        # 631152000 seconds after 2001-01-01 = Jan 1, 2021
-        # 631152000 as Unix would be Feb 1990 (too old for a recent bookmark)
-        input_file.write_text('''
-<DT><A HREF="https://apple.com" ADD_DATE="631152000">Safari Bookmark</A>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-
-        dt = datetime.fromisoformat(entry['bookmarked_at'])
-        # Should detect Mac epoch and convert correctly to 2021
-        assert 2020 <= dt.year <= 2022, f"Expected ~2021, got {dt.year}"
-
-    def test_safari_recent_timestamp(self, tmp_path):
-        """Test recent Safari timestamp (Mac epoch)."""
-        input_file = tmp_path / 'bookmarks.html'
-        # 725846400 seconds after 2001-01-01 = Jan 1, 2024
-        input_file.write_text('''
-<DT><A HREF="https://webkit.org" ADD_DATE="725846400">Recent Safari</A>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-
-        dt = datetime.fromisoformat(entry['bookmarked_at'])
-        # Should detect Mac epoch and convert to 2024
-        assert 2023 <= dt.year <= 2025, f"Expected ~2024, got {dt.year}"
-
-    def test_unix_milliseconds_timestamp(self, tmp_path):
-        """Test Unix epoch timestamp in milliseconds (13 digits) - Some JavaScript exports."""
-        input_file = tmp_path / 'bookmarks.html'
-        # 1609459200000 = Jan 1, 2021 00:00:00 UTC in milliseconds
-        input_file.write_text('''
-<DT><A HREF="https://example.com" ADD_DATE="1609459200000">Test</A>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-
-        dt = datetime.fromisoformat(entry['bookmarked_at'])
-        assert dt.year == 2021
-        assert dt.month == 1
-        assert dt.day == 1
-
-    def test_chrome_webkit_microseconds_timestamp(self, tmp_path):
-        """Test Chrome WebKit timestamp in microseconds (16-17 digits) - Chrome internal format."""
-        input_file = tmp_path / 'bookmarks.html'
-        # 1609459200000000 = Jan 1, 2021 00:00:00 UTC in microseconds (Unix epoch)
-        # Chrome sometimes exports with microsecond precision
-        input_file.write_text('''
-<DT><A HREF="https://example.com" ADD_DATE="1609459200000000">Test</A>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-
-        dt = datetime.fromisoformat(entry['bookmarked_at'])
-        assert dt.year == 2021
-        assert dt.month == 1
-        assert dt.day == 1
-
-    def test_mac_cocoa_milliseconds_timestamp(self, tmp_path):
-        """Test Mac/Cocoa epoch in milliseconds (rare but possible)."""
-        input_file = tmp_path / 'bookmarks.html'
-        # 631152000000 milliseconds after 2001-01-01 = Jan 1, 2021
-        input_file.write_text('''
-<DT><A HREF="https://apple.com" ADD_DATE="631152000000">Safari Milliseconds</A>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-
-        dt = datetime.fromisoformat(entry['bookmarked_at'])
-        # Should detect Mac epoch with milliseconds and convert to 2021
-        assert 2020 <= dt.year <= 2022, f"Expected ~2021, got {dt.year}"
-
-    def test_ambiguous_timestamp_detection(self, tmp_path):
-        """Test that ambiguous timestamps are resolved to reasonable dates."""
-        input_file = tmp_path / 'bookmarks.html'
-        # Test multiple bookmarks with different timestamp formats mixed together
-        # Parser should handle each correctly
-        input_file.write_text('''
-<DT><A HREF="https://unix-seconds.com" ADD_DATE="1609459200">Unix Seconds 2021</A>
-<DT><A HREF="https://mac-seconds.com" ADD_DATE="631152000">Mac Seconds 2021</A>
-<DT><A HREF="https://unix-ms.com" ADD_DATE="1704067200000">Unix MS 2024</A>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        entries = [json.loads(line) for line in lines]
-
-        # All should be parsed to reasonable dates (2020-2025)
-        for entry in entries:
-            dt = datetime.fromisoformat(entry['bookmarked_at'])
-            assert 2020 <= dt.year <= 2025, f"Date {dt.year} out of reasonable range for {entry['url']}"
-
-    def test_very_old_timestamp(self, tmp_path):
-        """Test very old timestamp (1990s)."""
-        input_file = tmp_path / 'bookmarks.html'
-        # 820454400 = Jan 1, 1996
-        input_file.write_text('''
-<DT><A HREF="https://example.com" ADD_DATE="820454400">Old Bookmark</A>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-
-        dt = datetime.fromisoformat(entry['bookmarked_at'])
-        assert dt.year == 1996
-
-    def test_recent_timestamp(self, tmp_path):
-        """Test recent timestamp (2024)."""
-        input_file = tmp_path / 'bookmarks.html'
-        # 1704067200 = Jan 1, 2024
-        input_file.write_text('''
-<DT><A HREF="https://example.com" ADD_DATE="1704067200">Recent</A>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-
-        dt = datetime.fromisoformat(entry['bookmarked_at'])
-        assert dt.year == 2024
-
-    def test_invalid_timestamp(self, tmp_path):
-        """Test invalid/malformed timestamp - should extract URL but skip timestamp."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
-<DT><A HREF="https://example.com" ADD_DATE="invalid">Test</A>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-
-        # Should still extract URL but skip timestamp
-        assert entry['url'] == 'https://example.com'
-        assert 'bookmarked_at' not in entry
-
-    def test_zero_timestamp(self, tmp_path):
-        """Test timestamp of 0 (Unix epoch) - too old, should be skipped."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
-<DT><A HREF="https://example.com" ADD_DATE="0">Test</A>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-
-        # Timestamp 0 = 1970, which is before MIN_REASONABLE_YEAR (1995)
-        # Parser should skip it as unreasonable
-        assert entry['url'] == 'https://example.com'
-        # Timestamp should be omitted (outside reasonable range)
-        assert 'bookmarked_at' not in entry
-
-    def test_negative_timestamp(self, tmp_path):
-        """Test negative timestamp (before Unix epoch) - should handle gracefully."""
-        input_file = tmp_path / 'bookmarks.html'
-        # -86400 = 1 day before Unix epoch = Dec 31, 1969
-        input_file.write_text('''
-<DT><A HREF="https://example.com" ADD_DATE="-86400">Before Unix Epoch</A>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        # Should handle gracefully (extracts URL, may or may not include timestamp)
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-        assert entry['url'] == 'https://example.com'
-        # If timestamp is included, should be reasonable (1969)
-        if 'bookmarked_at' in entry:
-            dt = datetime.fromisoformat(entry['bookmarked_at'])
-            # Should be near Unix epoch (late 1969)
-            assert 1969 <= dt.year <= 1970
-
-
-class TestBookmarkAttributes:
-    """Test various bookmark attributes and metadata."""
-
-    def test_private_attribute(self, tmp_path):
-        """Test bookmarks with PRIVATE attribute."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
-<DT><A HREF="https://private.example.com" ADD_DATE="1609459200" PRIVATE="1">Private</A>
-<DT><A HREF="https://public.example.com" ADD_DATE="1609459200">Public</A>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        entries = [json.loads(line) for line in lines]
-
-        # Both should be extracted
-        assert len(entries) == 2
-
-    def test_shortcuturl_attribute(self, tmp_path):
-        """Test bookmarks with SHORTCUTURL keyword attribute."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
-<DT><A HREF="https://google.com/search?q=%s" ADD_DATE="1609459200" SHORTCUTURL="g">Google Search</A>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-
-        assert 'google.com' in entry['url']
-
-    def test_post_data_attribute(self, tmp_path):
-        """Test bookmarks with POST_DATA attribute."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
-<DT><A HREF="https://example.com/login" ADD_DATE="1609459200" POST_DATA="user=test">Login</A>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-
-        assert entry['url'] == 'https://example.com/login'
-
-
-class TestEdgeCases:
-    """Test edge cases and malformed data."""
-
-    def test_multiline_bookmark(self, tmp_path):
-        """Test bookmark spanning multiple lines."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
-<DT><A HREF="https://example.com"
-       ADD_DATE="1609459200"
-       TAGS="tag1,tag2">
-    Multi-line Bookmark
-</A>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        # Current regex works line-by-line, so this might not match
-        # Document current behavior
-        if result.returncode == 0:
-            # Output goes to stdout (JSONL)
-            content = result.stdout.strip()
-            if content:
-                lines = [line for line in content.split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-                if lines:
-                    entry = json.loads(lines[0])
-                    assert 'example.com' in entry['url']
-
-    def test_missing_add_date(self, tmp_path):
-        """Test bookmark without ADD_DATE attribute - should still extract URL."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
-<DT><A HREF="https://example.com">No Date</A>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        # Should succeed and extract URL without timestamp
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-        assert entry['url'] == 'https://example.com'
-        assert entry['title'] == 'No Date'
-        assert 'bookmarked_at' not in entry
-
-    def test_empty_title(self, tmp_path):
-        """Test bookmark with empty title."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
-<DT><A HREF="https://example.com" ADD_DATE="1609459200"></A>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        # Current regex requires non-empty title [^<]+
-        # Parser emits skipped ArchiveResult when no valid bookmarks found
-        assert result.returncode == 0
-        result_json = json.loads(result.stdout.strip())
-        assert result_json['type'] == 'ArchiveResult'
-        assert result_json['status'] == 'skipped'
-
-    def test_special_chars_in_url(self, tmp_path):
-        """Test URLs with special characters."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
-<DT><A HREF="https://example.com/path?q=test&foo=bar&baz=qux#section" ADD_DATE="1609459200">Special URL</A>
-<DT><A HREF="https://example.com/path%20with%20spaces" ADD_DATE="1609459200">Encoded Spaces</A>
-<DT><A HREF="https://example.com/unicode/日本語" ADD_DATE="1609459200">Unicode Path</A>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        entries = [json.loads(line) for line in lines]
-
-        assert len(entries) == 3
-        assert 'q=test&foo=bar' in entries[0]['url']
-        assert '%20' in entries[1]['url']
-
-    def test_javascript_url(self, tmp_path):
-        """Test javascript: URLs (should still be extracted)."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
-<DT><A HREF="javascript:alert('test')" ADD_DATE="1609459200">JS Bookmarklet</A>
-<DT><A HREF="https://example.com" ADD_DATE="1609459200">Normal</A>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        entries = [json.loads(line) for line in lines]
-
-        # Both should be extracted
-        assert len(entries) == 2
-        assert entries[0]['url'].startswith('javascript:')
-
-    def test_data_url(self, tmp_path):
-        """Test data: URLs."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
-<DT><A HREF="data:text/html,<h1>Test</h1>" ADD_DATE="1609459200">Data URL</A>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-
-        assert entry['url'].startswith('data:')
-
-    def test_file_url(self, tmp_path):
-        """Test file:// URLs."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
-<DT><A HREF="file:///home/user/document.pdf" ADD_DATE="1609459200">Local File</A>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-
-        assert entry['url'].startswith('file://')
-
-    def test_very_long_url(self, tmp_path):
-        """Test very long URLs (2000+ characters)."""
-        long_url = 'https://example.com/path?' + '&'.join([f'param{i}=value{i}' for i in range(100)])
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text(f'''
-<DT><A HREF="{long_url}" ADD_DATE="1609459200">Long URL</A>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-
-        assert len(entry['url']) > 1000
-        assert entry['url'].startswith('https://example.com')
-
-    def test_unicode_in_title(self, tmp_path):
-        """Test Unicode characters in titles."""
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text('''
-<DT><A HREF="https://example.com" ADD_DATE="1609459200">日本語のタイトル</A>
-<DT><A HREF="https://example.org" ADD_DATE="1609459200">Título en Español</A>
-<DT><A HREF="https://example.net" ADD_DATE="1609459200">Заголовок на русском</A>
-<DT><A HREF="https://example.biz" ADD_DATE="1609459200">عنوان بالعربية</A>
-<DT><A HREF="https://example.info" ADD_DATE="1609459200">Emoji 🚀 📚 🎉</A>
-        ''', encoding='utf-8')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        entries = [json.loads(line) for line in lines]
-
-        assert len(entries) == 5
-        assert any('日本語' in e.get('title', '') for e in entries)
-        assert any('Español' in e.get('title', '') for e in entries)
-
-    def test_large_file_many_bookmarks(self, tmp_path):
-        """Test parsing large file with many bookmarks (1000+)."""
-        bookmarks = []
-        for i in range(1000):
-            bookmarks.append(
-                f'<DT><A HREF="https://example.com/page{i}" ADD_DATE="1609459200" TAGS="tag{i % 10}">Bookmark {i}</A>'
-            )
-
-        input_file = tmp_path / 'bookmarks.html'
-        input_file.write_text(
-            '<!DOCTYPE NETSCAPE-Bookmark-file-1>\n<DL><p>\n' +
-            '\n'.join(bookmarks) +
-            '\n</DL><p>'
-        )
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-            timeout=30,
-        )
-
-        assert result.returncode == 0
-        assert 'urls.jsonl' in result.stderr or 'urls.jsonl' in result.stdout
-
-        # Output goes to stdout (JSONL) - get all JSONL records
-        all_lines = [line for line in result.stdout.strip().split('\n') if line.strip() and line.startswith('{')]
-        records = [json.loads(line) for line in all_lines]
-
-        # Should have 10 unique tags + 1000 snapshots
-        tags = [r for r in records if r.get('type') == 'Tag']
-        snapshots = [r for r in records if r.get('type') == 'Snapshot']
-
-        assert len(tags) == 10
-        assert len(snapshots) == 1000
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/parse_rss_urls/config.json b/archivebox/plugins/parse_rss_urls/config.json
deleted file mode 100644
index 95a1223f..00000000
--- a/archivebox/plugins/parse_rss_urls/config.json
+++ /dev/null
@@ -1,13 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "PARSE_RSS_URLS_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["USE_PARSE_RSS_URLS"],
-      "description": "Enable RSS/Atom feed URL parsing"
-    }
-  }
-}
diff --git a/archivebox/plugins/parse_rss_urls/on_Snapshot__72_parse_rss_urls.py b/archivebox/plugins/parse_rss_urls/on_Snapshot__72_parse_rss_urls.py
deleted file mode 100755
index dbbaccd4..00000000
--- a/archivebox/plugins/parse_rss_urls/on_Snapshot__72_parse_rss_urls.py
+++ /dev/null
@@ -1,163 +0,0 @@
-#!/usr/bin/env python3
-"""
-Parse RSS/Atom feeds and extract URLs.
-
-This is a standalone extractor that can run without ArchiveBox.
-It reads feed content from a URL and extracts article URLs.
-
-Usage: ./on_Snapshot__51_parse_rss_urls.py --url=<url>
-Output: Appends discovered URLs to urls.jsonl in current directory
-
-Examples:
-    ./on_Snapshot__51_parse_rss_urls.py --url=https://example.com/feed.rss
-    ./on_Snapshot__51_parse_rss_urls.py --url=file:///path/to/feed.xml
-"""
-
-import json
-import os
-import sys
-from pathlib import Path
-from datetime import datetime, timezone
-from html import unescape
-from time import mktime
-from urllib.parse import urlparse
-
-import rich_click as click
-
-PLUGIN_NAME = 'parse_rss_urls'
-URLS_FILE = Path('urls.jsonl')
-
-try:
-    import feedparser
-except ImportError:
-    feedparser = None
-
-
-def fetch_content(url: str) -> str:
-    """Fetch content from a URL (supports file:// and https://)."""
-    parsed = urlparse(url)
-
-    if parsed.scheme == 'file':
-        file_path = parsed.path
-        with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
-            return f.read()
-    else:
-        timeout = int(os.environ.get('TIMEOUT', '60'))
-        user_agent = os.environ.get('USER_AGENT', 'Mozilla/5.0 (compatible; ArchiveBox/1.0)')
-
-        import urllib.request
-        req = urllib.request.Request(url, headers={'User-Agent': user_agent})
-        with urllib.request.urlopen(req, timeout=timeout) as response:
-            return response.read().decode('utf-8', errors='replace')
-
-
-@click.command()
-@click.option('--url', required=True, help='RSS/Atom feed URL to parse')
-@click.option('--snapshot-id', required=False, help='Parent Snapshot UUID')
-@click.option('--crawl-id', required=False, help='Crawl UUID')
-@click.option('--depth', type=int, default=0, help='Current depth level')
-def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0):
-    """Parse RSS/Atom feed and extract article URLs."""
-    env_depth = os.environ.get('SNAPSHOT_DEPTH')
-    if env_depth is not None:
-        try:
-            depth = int(env_depth)
-        except Exception:
-            pass
-    crawl_id = crawl_id or os.environ.get('CRAWL_ID')
-
-    if feedparser is None:
-        click.echo('feedparser library not installed', err=True)
-        sys.exit(1)
-
-    try:
-        content = fetch_content(url)
-    except Exception as e:
-        click.echo(f'Failed to fetch {url}: {e}', err=True)
-        sys.exit(1)
-
-    # Parse the feed
-    feed = feedparser.parse(content)
-
-    urls_found = []
-    all_tags = set()
-
-    if not feed.entries:
-        # No entries - will emit skipped status at end
-        pass
-    else:
-        for item in feed.entries:
-            item_url = getattr(item, 'link', None)
-            if not item_url:
-                continue
-
-            title = getattr(item, 'title', None)
-
-            # Get bookmarked_at (published/updated date as ISO 8601)
-            bookmarked_at = None
-            if hasattr(item, 'published_parsed') and item.published_parsed:
-                bookmarked_at = datetime.fromtimestamp(mktime(item.published_parsed), tz=timezone.utc).isoformat()
-            elif hasattr(item, 'updated_parsed') and item.updated_parsed:
-                bookmarked_at = datetime.fromtimestamp(mktime(item.updated_parsed), tz=timezone.utc).isoformat()
-
-            # Get tags
-            tags = ''
-            if hasattr(item, 'tags') and item.tags:
-                try:
-                    tags = ','.join(tag.term for tag in item.tags if hasattr(tag, 'term'))
-                    # Collect unique tags
-                    for tag in tags.split(','):
-                        tag = tag.strip()
-                        if tag:
-                            all_tags.add(tag)
-                except (AttributeError, TypeError):
-                    pass
-
-            entry = {
-                'type': 'Snapshot',
-                'url': unescape(item_url),
-                'plugin': PLUGIN_NAME,
-                'depth': depth + 1,
-            }
-            if snapshot_id:
-                entry['parent_snapshot_id'] = snapshot_id
-            if crawl_id:
-                entry['crawl_id'] = crawl_id
-            if title:
-                entry['title'] = unescape(title)
-            if bookmarked_at:
-                entry['bookmarked_at'] = bookmarked_at
-            if tags:
-                entry['tags'] = tags
-            urls_found.append(entry)
-
-    # Emit Tag records first (to stdout as JSONL)
-    for tag_name in sorted(all_tags):
-        print(json.dumps({
-            'type': 'Tag',
-            'name': tag_name,
-        }))
-
-    # Emit Snapshot records (to stdout as JSONL)
-    for entry in urls_found:
-        print(json.dumps(entry))
-
-    # Write urls.jsonl to disk for crawl system
-    URLS_FILE.write_text('\n'.join(json.dumps(r) for r in urls_found) + ('\n' if urls_found else ''))
-
-    # Emit ArchiveResult record to mark completion
-    status = 'succeeded' if urls_found else 'skipped'
-    output_str = URLS_FILE.name
-    ar_record = {
-        'type': 'ArchiveResult',
-        'status': status,
-        'output_str': output_str,
-    }
-    print(json.dumps(ar_record))
-
-    click.echo(output_str, err=True)
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/parse_rss_urls/templates/icon.html b/archivebox/plugins/parse_rss_urls/templates/icon.html
deleted file mode 100644
index 09b3b8e7..00000000
--- a/archivebox/plugins/parse_rss_urls/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--parse_rss_urls" title="RSS"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><circle cx="5" cy="19" r="1.5" fill="currentColor" stroke="none"/><path d="M5 11a8 8 0 0 1 8 8"/><path d="M5 5a14 14 0 0 1 14 14"/></svg></span>
diff --git a/archivebox/plugins/parse_rss_urls/tests/test_parse_rss_urls.py b/archivebox/plugins/parse_rss_urls/tests/test_parse_rss_urls.py
deleted file mode 100644
index 3cd54f60..00000000
--- a/archivebox/plugins/parse_rss_urls/tests/test_parse_rss_urls.py
+++ /dev/null
@@ -1,212 +0,0 @@
-#!/usr/bin/env python3
-"""Unit tests for parse_rss_urls extractor."""
-
-import json
-import subprocess
-import sys
-from pathlib import Path
-
-import pytest
-
-PLUGIN_DIR = Path(__file__).parent.parent
-SCRIPT_PATH = next(PLUGIN_DIR.glob('on_Snapshot__*_parse_rss_urls.*'), None)
-
-
-class TestParseRssUrls:
-    """Test the parse_rss_urls extractor CLI."""
-
-    def test_parses_real_rss_feed(self, tmp_path):
-        """Test parsing a real RSS feed from the web."""
-        # Use httpbin.org which provides a sample RSS feed
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', 'https://news.ycombinator.com/rss'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-            timeout=30
-        )
-
-        # HN RSS feed should parse successfully
-        if result.returncode == 0:
-            # Output goes to stdout (JSONL)
-            content = result.stdout
-            assert len(content) > 0, "No URLs extracted from real RSS feed"
-
-            # Verify at least one URL was extracted
-            lines = content.strip().split('\n')
-            assert len(lines) > 0, "No entries found in RSS feed"
-
-    def test_extracts_urls_from_rss_feed(self, tmp_path):
-        """Test extracting URLs from an RSS 2.0 feed."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0" encoding="UTF-8"?>
-<rss version="2.0">
-  <channel>
-    <title>Test Feed</title>
-    <link>https://example.com</link>
-    <item>
-      <title>First Post</title>
-      <link>https://example.com/post/1</link>
-      <pubDate>Mon, 01 Jan 2024 12:00:00 GMT</pubDate>
-    </item>
-    <item>
-      <title>Second Post</title>
-      <link>https://example.com/post/2</link>
-      <pubDate>Tue, 02 Jan 2024 12:00:00 GMT</pubDate>
-    </item>
-  </channel>
-</rss>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        assert 'urls.jsonl' in result.stderr or 'urls.jsonl' in result.stdout
-
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        assert len(lines) == 2
-
-        entries = [json.loads(line) for line in lines]
-        urls = {e['url'] for e in entries}
-        titles = {e.get('title') for e in entries}
-
-        assert 'https://example.com/post/1' in urls
-        assert 'https://example.com/post/2' in urls
-        assert 'First Post' in titles
-        assert 'Second Post' in titles
-
-    def test_extracts_urls_from_atom_feed(self, tmp_path):
-        """Test extracting URLs from an Atom feed."""
-        input_file = tmp_path / 'feed.atom'
-        input_file.write_text('''<?xml version="1.0" encoding="UTF-8"?>
-<feed xmlns="http://www.w3.org/2005/Atom">
-  <title>Test Atom Feed</title>
-  <entry>
-    <title>Atom Post 1</title>
-    <link href="https://atom.example.com/entry/1"/>
-    <updated>2024-01-01T12:00:00Z</updated>
-  </entry>
-  <entry>
-    <title>Atom Post 2</title>
-    <link href="https://atom.example.com/entry/2"/>
-    <updated>2024-01-02T12:00:00Z</updated>
-  </entry>
-</feed>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        urls = {json.loads(line)['url'] for line in lines}
-
-        assert 'https://atom.example.com/entry/1' in urls
-        assert 'https://atom.example.com/entry/2' in urls
-
-    def test_skips_when_no_entries(self, tmp_path):
-        """Test that script returns skipped status when feed has no entries."""
-        input_file = tmp_path / 'empty.rss'
-        input_file.write_text('''<?xml version="1.0"?>
-<rss version="2.0">
-  <channel>
-    <title>Empty Feed</title>
-  </channel>
-</rss>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        assert 'urls.jsonl' in result.stderr
-        assert '"status": "skipped"' in result.stdout
-
-    def test_exits_1_when_file_not_found(self, tmp_path):
-        """Test that script exits with code 1 when file doesn't exist."""
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', 'file:///nonexistent/feed.rss'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 1
-        assert 'Failed to fetch' in result.stderr
-
-    def test_handles_html_entities_in_urls(self, tmp_path):
-        """Test that HTML entities in URLs are decoded."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
-<rss version="2.0">
-  <channel>
-    <item>
-      <title>Entity Test</title>
-      <link>https://example.com/page?a=1&amp;b=2</link>
-    </item>
-  </channel>
-</rss>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-        assert entry['url'] == 'https://example.com/page?a=1&b=2'
-
-    def test_includes_optional_metadata(self, tmp_path):
-        """Test that title and timestamp are included when present."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
-<rss version="2.0">
-  <channel>
-    <item>
-      <title>Test Title</title>
-      <link>https://example.com/test</link>
-      <pubDate>Wed, 15 Jan 2020 10:30:00 GMT</pubDate>
-    </item>
-  </channel>
-</rss>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-        assert entry['url'] == 'https://example.com/test'
-        assert entry['title'] == 'Test Title'
-        # Parser converts timestamp to bookmarked_at
-        assert 'bookmarked_at' in entry
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/parse_rss_urls/tests/test_parse_rss_urls_comprehensive.py b/archivebox/plugins/parse_rss_urls/tests/test_parse_rss_urls_comprehensive.py
deleted file mode 100644
index fbc415f9..00000000
--- a/archivebox/plugins/parse_rss_urls/tests/test_parse_rss_urls_comprehensive.py
+++ /dev/null
@@ -1,1002 +0,0 @@
-#!/usr/bin/env python3
-"""Comprehensive tests for parse_rss_urls extractor covering various RSS/Atom variants."""
-
-import json
-import subprocess
-import sys
-from pathlib import Path
-
-import pytest
-
-PLUGIN_DIR = Path(__file__).parent.parent
-SCRIPT_PATH = next(PLUGIN_DIR.glob('on_Snapshot__*_parse_rss_urls.*'), None)
-
-
-class TestRssVariants:
-    """Test various RSS format variants."""
-
-    def test_rss_091(self, tmp_path):
-        """Test RSS 0.91 format (oldest RSS version)."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0" encoding="UTF-8"?>
-<rss version="0.91">
-  <channel>
-    <title>RSS 0.91 Feed</title>
-    <link>https://example.com</link>
-    <description>Test RSS 0.91</description>
-    <item>
-      <title>RSS 0.91 Article</title>
-      <link>https://example.com/article1</link>
-      <description>An article in RSS 0.91 format</description>
-    </item>
-  </channel>
-</rss>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0, f"Failed: {result.stderr}"
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-
-        assert entry['url'] == 'https://example.com/article1'
-        assert entry['title'] == 'RSS 0.91 Article'
-        assert entry['plugin'] == 'parse_rss_urls'
-
-    def test_rss_10_rdf(self, tmp_path):
-        """Test RSS 1.0 (RDF) format."""
-        input_file = tmp_path / 'feed.rdf'
-        input_file.write_text('''<?xml version="1.0" encoding="UTF-8"?>
-<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
-         xmlns="http://purl.org/rss/1.0/"
-         xmlns:dc="http://purl.org/dc/elements/1.1/">
-  <channel rdf:about="https://example.com">
-    <title>RSS 1.0 Feed</title>
-    <link>https://example.com</link>
-  </channel>
-  <item rdf:about="https://example.com/rdf1">
-    <title>RDF Item 1</title>
-    <link>https://example.com/rdf1</link>
-    <dc:date>2024-01-15T10:30:00Z</dc:date>
-    <dc:subject>Technology</dc:subject>
-  </item>
-  <item rdf:about="https://example.com/rdf2">
-    <title>RDF Item 2</title>
-    <link>https://example.com/rdf2</link>
-    <dc:date>2024-01-16T14:20:00Z</dc:date>
-  </item>
-</rdf:RDF>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0, f"Failed: {result.stderr}"
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        entries = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Snapshot']
-
-        urls = {e['url'] for e in entries}
-        assert 'https://example.com/rdf1' in urls
-        assert 'https://example.com/rdf2' in urls
-        assert any(e.get('bookmarked_at') for e in entries)
-
-    def test_rss_20_with_full_metadata(self, tmp_path):
-        """Test RSS 2.0 with all standard metadata fields."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0" encoding="UTF-8"?>
-<rss version="2.0">
-  <channel>
-    <title>Full RSS 2.0</title>
-    <link>https://example.com</link>
-    <description>Complete RSS 2.0 feed</description>
-    <item>
-      <title>Complete Article</title>
-      <link>https://example.com/complete</link>
-      <description>Full description here</description>
-      <author>author@example.com</author>
-      <category>Technology</category>
-      <category>Programming</category>
-      <guid>https://example.com/complete</guid>
-      <pubDate>Mon, 15 Jan 2024 10:30:00 GMT</pubDate>
-    </item>
-  </channel>
-</rss>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        content = result.stdout.strip()
-        lines = content.split('\n')
-
-        # Check for Tag records
-        tags = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Tag']
-        tag_names = {t['name'] for t in tags}
-        assert 'Technology' in tag_names
-        assert 'Programming' in tag_names
-
-        # Check Snapshot record
-        snapshots = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Snapshot']
-        entry = snapshots[0]
-        assert entry['url'] == 'https://example.com/complete'
-        assert entry['title'] == 'Complete Article'
-        assert 'bookmarked_at' in entry
-        assert entry['tags'] == 'Technology,Programming' or entry['tags'] == 'Programming,Technology'
-
-
-class TestAtomVariants:
-    """Test various Atom format variants."""
-
-    def test_atom_10_full(self, tmp_path):
-        """Test Atom 1.0 with full metadata."""
-        input_file = tmp_path / 'feed.atom'
-        input_file.write_text('''<?xml version="1.0" encoding="UTF-8"?>
-<feed xmlns="http://www.w3.org/2005/Atom">
-  <title>Atom 1.0 Feed</title>
-  <updated>2024-01-15T00:00:00Z</updated>
-  <entry>
-    <title>Atom Entry 1</title>
-    <link href="https://atom.example.com/1"/>
-    <id>urn:uuid:1234-5678</id>
-    <updated>2024-01-15T10:30:00Z</updated>
-    <published>2024-01-14T08:00:00Z</published>
-    <category term="science"/>
-    <category term="research"/>
-  </entry>
-</feed>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip()]
-
-        tags = [json.loads(line) for line in lines if json.loads(line).get('type') == 'Tag']
-        tag_names = {t['name'] for t in tags}
-        assert 'science' in tag_names
-        assert 'research' in tag_names
-
-        snapshots = [json.loads(line) for line in lines if json.loads(line).get('type') == 'Snapshot']
-        entry = snapshots[0]
-        assert entry['url'] == 'https://atom.example.com/1'
-        assert 'bookmarked_at' in entry
-
-    def test_atom_with_alternate_link(self, tmp_path):
-        """Test Atom feed with alternate link types."""
-        input_file = tmp_path / 'feed.atom'
-        input_file.write_text('''<?xml version="1.0" encoding="UTF-8"?>
-<feed xmlns="http://www.w3.org/2005/Atom">
-  <title>Atom Alternate Links</title>
-  <entry>
-    <title>Entry with alternate</title>
-    <link rel="alternate" type="text/html" href="https://atom.example.com/article"/>
-    <link rel="self" href="https://atom.example.com/feed"/>
-    <updated>2024-01-15T10:30:00Z</updated>
-  </entry>
-</feed>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-        # feedparser should pick the alternate link
-        assert 'atom.example.com/article' in entry['url']
-
-
-class TestDateFormats:
-    """Test various date format handling."""
-
-    def test_rfc822_date(self, tmp_path):
-        """Test RFC 822 date format (RSS 2.0 standard)."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
-<rss version="2.0">
-  <channel>
-    <item>
-      <title>RFC 822 Date</title>
-      <link>https://example.com/rfc822</link>
-      <pubDate>Wed, 15 Jan 2020 10:30:45 GMT</pubDate>
-    </item>
-  </channel>
-</rss>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-        assert 'bookmarked_at' in entry
-        assert '2020-01-15' in entry['bookmarked_at']
-
-    def test_iso8601_date(self, tmp_path):
-        """Test ISO 8601 date format (Atom standard)."""
-        input_file = tmp_path / 'feed.atom'
-        input_file.write_text('''<?xml version="1.0"?>
-<feed xmlns="http://www.w3.org/2005/Atom">
-  <entry>
-    <title>ISO 8601 Date</title>
-    <link href="https://example.com/iso"/>
-    <published>2024-01-15T10:30:45.123Z</published>
-  </entry>
-</feed>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-        assert 'bookmarked_at' in entry
-        assert '2024-01-15' in entry['bookmarked_at']
-
-    def test_updated_vs_published_date(self, tmp_path):
-        """Test that published date is preferred over updated date."""
-        input_file = tmp_path / 'feed.atom'
-        input_file.write_text('''<?xml version="1.0"?>
-<feed xmlns="http://www.w3.org/2005/Atom">
-  <entry>
-    <title>Date Priority Test</title>
-    <link href="https://example.com/dates"/>
-    <published>2024-01-10T10:00:00Z</published>
-    <updated>2024-01-15T10:00:00Z</updated>
-  </entry>
-</feed>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-        # Should use published date (Jan 10) not updated date (Jan 15)
-        assert '2024-01-10' in entry['bookmarked_at']
-
-    def test_only_updated_date(self, tmp_path):
-        """Test fallback to updated date when published is missing."""
-        input_file = tmp_path / 'feed.atom'
-        input_file.write_text('''<?xml version="1.0"?>
-<feed xmlns="http://www.w3.org/2005/Atom">
-  <entry>
-    <title>Only Updated</title>
-    <link href="https://example.com/updated"/>
-    <updated>2024-01-20T10:00:00Z</updated>
-  </entry>
-</feed>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-        assert '2024-01-20' in entry['bookmarked_at']
-
-    def test_no_date(self, tmp_path):
-        """Test entries without any date."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
-<rss version="2.0">
-  <channel>
-    <item>
-      <title>No Date</title>
-      <link>https://example.com/nodate</link>
-    </item>
-  </channel>
-</rss>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-        assert 'bookmarked_at' not in entry
-
-
-class TestTagsAndCategories:
-    """Test various tag and category formats."""
-
-    def test_rss_categories(self, tmp_path):
-        """Test RSS 2.0 category elements."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
-<rss version="2.0">
-  <channel>
-    <item>
-      <title>Multi Category</title>
-      <link>https://example.com/cats</link>
-      <category>Tech</category>
-      <category>Web</category>
-      <category>Programming</category>
-    </item>
-  </channel>
-</rss>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip()]
-
-        tags = [json.loads(line) for line in lines if json.loads(line).get('type') == 'Tag']
-        tag_names = {t['name'] for t in tags}
-        assert 'Tech' in tag_names
-        assert 'Web' in tag_names
-        assert 'Programming' in tag_names
-
-        snapshots = [json.loads(line) for line in lines if json.loads(line).get('type') == 'Snapshot']
-        entry = snapshots[0]
-        tags_list = entry['tags'].split(',')
-        assert len(tags_list) == 3
-
-    def test_atom_categories(self, tmp_path):
-        """Test Atom category elements with various attributes."""
-        input_file = tmp_path / 'feed.atom'
-        input_file.write_text('''<?xml version="1.0"?>
-<feed xmlns="http://www.w3.org/2005/Atom">
-  <entry>
-    <title>Atom Categories</title>
-    <link href="https://example.com/atomcats"/>
-    <category term="python" scheme="http://example.com/categories" label="Python Programming"/>
-    <category term="django" label="Django Framework"/>
-    <updated>2024-01-15T10:00:00Z</updated>
-  </entry>
-</feed>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip()]
-
-        tags = [json.loads(line) for line in lines if json.loads(line).get('type') == 'Tag']
-        tag_names = {t['name'] for t in tags}
-        # feedparser extracts the 'term' attribute
-        assert 'python' in tag_names
-        assert 'django' in tag_names
-
-    def test_no_tags(self, tmp_path):
-        """Test entries without tags."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
-<rss version="2.0">
-  <channel>
-    <item>
-      <title>No Tags</title>
-      <link>https://example.com/notags</link>
-    </item>
-  </channel>
-</rss>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-        assert 'tags' not in entry or entry['tags'] == ''
-
-    def test_duplicate_tags(self, tmp_path):
-        """Test that duplicate tags are handled properly."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
-<rss version="2.0">
-  <channel>
-    <item>
-      <title>Duplicate Tags</title>
-      <link>https://example.com/dups</link>
-      <category>Python</category>
-      <category>Python</category>
-      <category>Web</category>
-    </item>
-  </channel>
-</rss>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip()]
-        tags = [json.loads(line) for line in lines if json.loads(line).get('type') == 'Tag']
-        # Tag records should be unique
-        tag_names = [t['name'] for t in tags]
-        assert tag_names.count('Python') == 1
-
-
-class TestCustomNamespaces:
-    """Test custom namespace handling (Dublin Core, Media RSS, etc.)."""
-
-    def test_dublin_core_metadata(self, tmp_path):
-        """Test Dublin Core namespace fields."""
-        input_file = tmp_path / 'feed.rdf'
-        input_file.write_text('''<?xml version="1.0" encoding="UTF-8"?>
-<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
-         xmlns="http://purl.org/rss/1.0/"
-         xmlns:dc="http://purl.org/dc/elements/1.1/">
-  <channel rdf:about="https://example.com">
-    <title>Dublin Core Feed</title>
-  </channel>
-  <item rdf:about="https://example.com/dc1">
-    <title>Dublin Core Article</title>
-    <link>https://example.com/dc1</link>
-    <dc:creator>John Doe</dc:creator>
-    <dc:subject>Technology</dc:subject>
-    <dc:date>2024-01-15T10:30:00Z</dc:date>
-    <dc:rights>Copyright 2024</dc:rights>
-  </item>
-</rdf:RDF>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        snapshots = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Snapshot']
-        entry = snapshots[0]
-
-        assert entry['url'] == 'https://example.com/dc1'
-        assert entry['title'] == 'Dublin Core Article'
-        # feedparser should parse dc:date as bookmarked_at
-        assert 'bookmarked_at' in entry
-
-    def test_media_rss_namespace(self, tmp_path):
-        """Test Media RSS namespace (common in podcast feeds)."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
-<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">
-  <channel>
-    <title>Media RSS Feed</title>
-    <item>
-      <title>Podcast Episode 1</title>
-      <link>https://example.com/podcast/1</link>
-      <media:content url="https://example.com/audio.mp3" type="audio/mpeg"/>
-      <media:thumbnail url="https://example.com/thumb.jpg"/>
-      <pubDate>Mon, 15 Jan 2024 10:00:00 GMT</pubDate>
-    </item>
-  </channel>
-</rss>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-
-        assert entry['url'] == 'https://example.com/podcast/1'
-        assert entry['title'] == 'Podcast Episode 1'
-
-    def test_itunes_namespace(self, tmp_path):
-        """Test iTunes namespace (common in podcast feeds)."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
-<rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
-  <channel>
-    <title>iTunes Podcast</title>
-    <item>
-      <title>Episode 1: Getting Started</title>
-      <link>https://example.com/ep1</link>
-      <itunes:author>Jane Smith</itunes:author>
-      <itunes:duration>45:30</itunes:duration>
-      <itunes:keywords>programming, tutorial, beginner</itunes:keywords>
-      <pubDate>Tue, 16 Jan 2024 08:00:00 GMT</pubDate>
-    </item>
-  </channel>
-</rss>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-        snapshots = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Snapshot']
-        entry = snapshots[0]
-
-        assert entry['url'] == 'https://example.com/ep1'
-        assert entry['title'] == 'Episode 1: Getting Started'
-
-
-class TestEdgeCases:
-    """Test edge cases and malformed data."""
-
-    def test_missing_title(self, tmp_path):
-        """Test entries without title."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
-<rss version="2.0">
-  <channel>
-    <item>
-      <link>https://example.com/notitle</link>
-      <pubDate>Mon, 15 Jan 2024 10:00:00 GMT</pubDate>
-    </item>
-  </channel>
-</rss>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-
-        assert entry['url'] == 'https://example.com/notitle'
-        assert 'title' not in entry
-
-    def test_missing_link(self, tmp_path):
-        """Test entries without link (should be skipped)."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
-<rss version="2.0">
-  <channel>
-    <item>
-      <title>No Link</title>
-      <description>This entry has no link</description>
-    </item>
-    <item>
-      <title>Has Link</title>
-      <link>https://example.com/haslink</link>
-    </item>
-  </channel>
-</rss>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-
-        # Should only have the entry with a link
-        assert entry['url'] == 'https://example.com/haslink'
-        assert '1 URL' in result.stdout
-
-    def test_html_entities_in_title(self, tmp_path):
-        """Test HTML entities in titles are properly decoded."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
-<rss version="2.0">
-  <channel>
-    <item>
-      <title>Using &lt;div&gt; &amp; &lt;span&gt; tags</title>
-      <link>https://example.com/html</link>
-    </item>
-  </channel>
-</rss>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-
-        assert entry['title'] == 'Using <div> & <span> tags'
-
-    def test_special_characters_in_tags(self, tmp_path):
-        """Test special characters in tags."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
-<rss version="2.0">
-  <channel>
-    <item>
-      <title>Special Tags</title>
-      <link>https://example.com/special</link>
-      <category>C++</category>
-      <category>Node.js</category>
-      <category>Web/Mobile</category>
-    </item>
-  </channel>
-</rss>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip()]
-
-        tags = [json.loads(line) for line in lines if json.loads(line).get('type') == 'Tag']
-        tag_names = {t['name'] for t in tags}
-        assert 'C++' in tag_names
-        assert 'Node.js' in tag_names
-        assert 'Web/Mobile' in tag_names
-
-    def test_cdata_sections(self, tmp_path):
-        """Test CDATA sections in titles and descriptions."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
-<rss version="2.0">
-  <channel>
-    <item>
-      <title><![CDATA[Using <strong>HTML</strong> in titles]]></title>
-      <link>https://example.com/cdata</link>
-      <description><![CDATA[Content with <em>markup</em>]]></description>
-    </item>
-  </channel>
-</rss>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-
-        # feedparser should strip HTML tags
-        assert 'HTML' in entry['title']
-        assert entry['url'] == 'https://example.com/cdata'
-
-    def test_relative_urls(self, tmp_path):
-        """Test that relative URLs are preserved (feedparser handles them)."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
-<rss version="2.0">
-  <channel>
-    <link>https://example.com</link>
-    <item>
-      <title>Relative URL</title>
-      <link>/article/relative</link>
-    </item>
-  </channel>
-</rss>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-
-        # feedparser may convert relative to absolute, or leave as-is
-        assert 'article/relative' in entry['url']
-
-    def test_unicode_characters(self, tmp_path):
-        """Test Unicode characters in feed content."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0" encoding="UTF-8"?>
-<rss version="2.0">
-  <channel>
-    <item>
-      <title>Unicode: 日本語 Français 中文 العربية</title>
-      <link>https://example.com/unicode</link>
-      <category>日本語</category>
-      <category>Français</category>
-    </item>
-  </channel>
-</rss>
-        ''', encoding='utf-8')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip()]
-
-        snapshots = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Snapshot']
-        entry = snapshots[0]
-        assert '日本語' in entry['title']
-        assert 'Français' in entry['title']
-
-    def test_very_long_title(self, tmp_path):
-        """Test handling of very long titles."""
-        long_title = 'A' * 1000
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text(f'''<?xml version="1.0"?>
-<rss version="2.0">
-  <channel>
-    <item>
-      <title>{long_title}</title>
-      <link>https://example.com/long</link>
-    </item>
-  </channel>
-</rss>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-
-        assert len(entry['title']) == 1000
-        assert entry['title'] == long_title
-
-    def test_multiple_entries_batch(self, tmp_path):
-        """Test processing a large batch of entries."""
-        items = []
-        for i in range(100):
-            items.append(f'''
-    <item>
-      <title>Article {i}</title>
-      <link>https://example.com/article/{i}</link>
-      <category>Tag{i % 10}</category>
-      <pubDate>Mon, {15 + (i % 15)} Jan 2024 10:00:00 GMT</pubDate>
-    </item>
-            ''')
-
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text(f'''<?xml version="1.0"?>
-<rss version="2.0">
-  <channel>
-    <title>Large Feed</title>
-    {''.join(items)}
-  </channel>
-</rss>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        assert 'urls.jsonl' in result.stderr or 'urls.jsonl' in result.stdout
-
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip()]
-
-        # Should have 10 unique tags (Tag0-Tag9) + 100 snapshots
-        tags = [json.loads(line) for line in lines if json.loads(line).get('type') == 'Tag']
-        snapshots = [json.loads(line) for line in lines if json.loads(line).get('type') == 'Snapshot']
-
-        assert len(tags) == 10
-        assert len(snapshots) == 100
-
-
-class TestRealWorldFeeds:
-    """Test patterns from real-world RSS feeds."""
-
-    def test_medium_style_feed(self, tmp_path):
-        """Test Medium-style feed structure."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
-<rss version="2.0">
-  <channel>
-    <title>Medium Feed</title>
-    <item>
-      <title>Article Title</title>
-      <link>https://medium.com/@user/article-slug-123abc</link>
-      <guid isPermaLink="false">https://medium.com/p/123abc</guid>
-      <pubDate>Wed, 15 Jan 2024 10:30:00 GMT</pubDate>
-      <category>Programming</category>
-      <category>JavaScript</category>
-      <dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Author Name</dc:creator>
-    </item>
-  </channel>
-</rss>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-
-        snapshots = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Snapshot']
-        entry = snapshots[0]
-        assert 'medium.com' in entry['url']
-        assert entry['title'] == 'Article Title'
-
-    def test_reddit_style_feed(self, tmp_path):
-        """Test Reddit-style feed structure."""
-        input_file = tmp_path / 'feed.rss'
-        input_file.write_text('''<?xml version="1.0"?>
-<feed xmlns="http://www.w3.org/2005/Atom">
-  <title>Reddit Feed</title>
-  <entry>
-    <title>Post Title</title>
-    <link href="https://www.reddit.com/r/programming/comments/abc123/post_title/"/>
-    <updated>2024-01-15T10:30:00+00:00</updated>
-    <category term="programming" label="r/programming"/>
-    <id>t3_abc123</id>
-  </entry>
-</feed>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '\"type\": \"Snapshot\"' in line]
-
-        snapshots = [json.loads(line) for line in lines if json.loads(line)['type'] == 'Snapshot']
-        entry = snapshots[0]
-        assert 'reddit.com' in entry['url']
-
-    def test_youtube_style_feed(self, tmp_path):
-        """Test YouTube-style feed structure."""
-        input_file = tmp_path / 'feed.atom'
-        input_file.write_text('''<?xml version="1.0"?>
-<feed xmlns:yt="http://www.youtube.com/xml/schemas/2015"
-      xmlns="http://www.w3.org/2005/Atom">
-  <title>YouTube Channel</title>
-  <entry>
-    <title>Video Title</title>
-    <link rel="alternate" href="https://www.youtube.com/watch?v=dQw4w9WgXcQ"/>
-    <published>2024-01-15T10:30:00+00:00</published>
-    <yt:videoId>dQw4w9WgXcQ</yt:videoId>
-    <yt:channelId>UCxxxxxxxx</yt:channelId>
-  </entry>
-</feed>
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        # Output goes to stdout (JSONL)
-        lines = [line for line in result.stdout.strip().split('\n') if '\"type\": \"Snapshot\"' in line]
-        entry = json.loads(lines[0])
-
-        assert 'youtube.com' in entry['url']
-        assert 'dQw4w9WgXcQ' in entry['url']
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/parse_txt_urls/config.json b/archivebox/plugins/parse_txt_urls/config.json
deleted file mode 100644
index ea183cc1..00000000
--- a/archivebox/plugins/parse_txt_urls/config.json
+++ /dev/null
@@ -1,13 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "PARSE_TXT_URLS_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["USE_PARSE_TXT_URLS"],
-      "description": "Enable plain text URL parsing"
-    }
-  }
-}
diff --git a/archivebox/plugins/parse_txt_urls/on_Snapshot__71_parse_txt_urls.py b/archivebox/plugins/parse_txt_urls/on_Snapshot__71_parse_txt_urls.py
deleted file mode 100755
index 4ee3c4b2..00000000
--- a/archivebox/plugins/parse_txt_urls/on_Snapshot__71_parse_txt_urls.py
+++ /dev/null
@@ -1,162 +0,0 @@
-#!/usr/bin/env python3
-"""
-Parse plain text files and extract URLs.
-
-This is a standalone extractor that can run without ArchiveBox.
-It reads text content from a URL (file:// or https://) and extracts all URLs found.
-
-Usage: ./on_Snapshot__52_parse_txt_urls.py --url=<url>
-Output: Appends discovered URLs to urls.jsonl in current directory
-
-Examples:
-    ./on_Snapshot__52_parse_txt_urls.py --url=file:///path/to/urls.txt
-    ./on_Snapshot__52_parse_txt_urls.py --url=https://example.com/urls.txt
-"""
-
-import json
-import os
-import re
-import sys
-from datetime import datetime, timezone
-from html import unescape
-from pathlib import Path
-from urllib.parse import urlparse
-from urllib.request import urlopen
-
-import rich_click as click
-
-PLUGIN_NAME = 'parse_txt_urls'
-URLS_FILE = Path('urls.jsonl')
-
-# URL regex from archivebox/misc/util.py
-# https://mathiasbynens.be/demo/url-regex
-URL_REGEX = re.compile(
-    r'(?=('
-    r'http[s]?://'                     # start matching from allowed schemes
-    r'(?:[a-zA-Z]|[0-9]'               # followed by allowed alphanum characters
-    r'|[-_$@.&+!*\(\),]'               #   or allowed symbols (keep hyphen first to match literal hyphen)
-    r'|[^\u0000-\u007F])+'             #   or allowed unicode bytes
-    r'[^\]\[<>"\'\s]+'                 # stop parsing at these symbols
-    r'))',
-    re.IGNORECASE | re.UNICODE,
-)
-
-
-def parens_are_matched(string: str, open_char='(', close_char=')') -> bool:
-    """Check that all parentheses in a string are balanced and nested properly."""
-    count = 0
-    for c in string:
-        if c == open_char:
-            count += 1
-        elif c == close_char:
-            count -= 1
-        if count < 0:
-            return False
-    return count == 0
-
-
-def fix_url_from_markdown(url_str: str) -> str:
-    """
-    Cleanup a regex-parsed URL that may contain trailing parens from markdown syntax.
-    Example: https://wiki.org/article_(Disambiguation).html?q=1).text -> https://wiki.org/article_(Disambiguation).html?q=1
-    """
-    trimmed_url = url_str
-
-    # Cut off trailing characters until parens are balanced
-    while not parens_are_matched(trimmed_url):
-        trimmed_url = trimmed_url[:-1]
-
-    # Verify trimmed URL is still valid
-    if re.findall(URL_REGEX, trimmed_url):
-        return trimmed_url
-
-    return url_str
-
-
-def find_all_urls(text: str):
-    """Find all URLs in a text string."""
-    for url in re.findall(URL_REGEX, text):
-        yield fix_url_from_markdown(url)
-
-
-def fetch_content(url: str) -> str:
-    """Fetch content from a URL (supports file:// and https://)."""
-    parsed = urlparse(url)
-
-    if parsed.scheme == 'file':
-        # Local file
-        file_path = parsed.path
-        with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
-            return f.read()
-    else:
-        # Remote URL
-        timeout = int(os.environ.get('TIMEOUT', '60'))
-        user_agent = os.environ.get('USER_AGENT', 'Mozilla/5.0 (compatible; ArchiveBox/1.0)')
-
-        import urllib.request
-        req = urllib.request.Request(url, headers={'User-Agent': user_agent})
-        with urllib.request.urlopen(req, timeout=timeout) as response:
-            return response.read().decode('utf-8', errors='replace')
-
-
-@click.command()
-@click.option('--url', required=True, help='URL to parse (file:// or https://)')
-@click.option('--snapshot-id', required=False, help='Parent Snapshot UUID')
-@click.option('--crawl-id', required=False, help='Crawl UUID')
-@click.option('--depth', type=int, default=0, help='Current depth level')
-def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0):
-    """Parse plain text and extract URLs."""
-    env_depth = os.environ.get('SNAPSHOT_DEPTH')
-    if env_depth is not None:
-        try:
-            depth = int(env_depth)
-        except Exception:
-            pass
-    crawl_id = crawl_id or os.environ.get('CRAWL_ID')
-
-    try:
-        content = fetch_content(url)
-    except Exception as e:
-        click.echo(f'Failed to fetch {url}: {e}', err=True)
-        sys.exit(1)
-
-    urls_found = set()
-    for found_url in find_all_urls(content):
-        cleaned_url = unescape(found_url)
-        # Skip the source URL itself
-        if cleaned_url != url:
-            urls_found.add(cleaned_url)
-
-    # Emit Snapshot records to stdout (JSONL)
-    records = []
-    for found_url in sorted(urls_found):
-        record = {
-            'type': 'Snapshot',
-            'url': found_url,
-            'plugin': PLUGIN_NAME,
-            'depth': depth + 1,
-        }
-        if snapshot_id:
-            record['parent_snapshot_id'] = snapshot_id
-        if crawl_id:
-            record['crawl_id'] = crawl_id
-        records.append(record)
-        print(json.dumps(record))
-
-    # Emit ArchiveResult record to mark completion
-    URLS_FILE.write_text('\n'.join(json.dumps(r) for r in records) + ('\n' if records else ''))
-    status = 'succeeded' if urls_found else 'skipped'
-    output_str = URLS_FILE.name
-    ar_record = {
-        'type': 'ArchiveResult',
-        'status': status,
-        'output_str': output_str,
-    }
-    print(json.dumps(ar_record))
-
-    click.echo(output_str, err=True)
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/parse_txt_urls/templates/icon.html b/archivebox/plugins/parse_txt_urls/templates/icon.html
deleted file mode 100644
index af23375c..00000000
--- a/archivebox/plugins/parse_txt_urls/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--parse_txt_urls" title="Text URLs"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M6 3h8l4 4v14H6z"/><path d="M14 3v5h5"/><path d="M8 12h8"/><path d="M8 16h6"/></svg></span>
diff --git a/archivebox/plugins/parse_txt_urls/tests/test_parse_txt_urls.py b/archivebox/plugins/parse_txt_urls/tests/test_parse_txt_urls.py
deleted file mode 100644
index a3b53289..00000000
--- a/archivebox/plugins/parse_txt_urls/tests/test_parse_txt_urls.py
+++ /dev/null
@@ -1,193 +0,0 @@
-#!/usr/bin/env python3
-"""Unit tests for parse_txt_urls extractor."""
-
-import json
-import subprocess
-import sys
-from pathlib import Path
-
-import pytest
-
-PLUGIN_DIR = Path(__file__).parent.parent
-SCRIPT_PATH = next(PLUGIN_DIR.glob('on_Snapshot__*_parse_txt_urls.*'), None)
-
-
-class TestParseTxtUrls:
-    """Test the parse_txt_urls extractor CLI."""
-
-    def test_extracts_urls_including_real_example_com(self, tmp_path):
-        """Test extracting URLs from plain text including real example.com."""
-        input_file = tmp_path / 'urls.txt'
-        input_file.write_text('''
-https://example.com
-https://example.com/page
-https://www.iana.org/domains/reserved
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0, f"Failed: {result.stderr}"
-        assert 'urls.jsonl' in result.stderr
-
-        # Parse Snapshot records from stdout
-        lines = [line for line in result.stdout.strip().split('\n') if line.strip() and '"type": "Snapshot"' in line]
-        assert len(lines) == 3
-
-        urls = set()
-        for line in lines:
-            entry = json.loads(line)
-            assert entry['type'] == 'Snapshot'
-            assert 'url' in entry
-            urls.add(entry['url'])
-
-        # Verify real URLs are extracted correctly
-        assert 'https://example.com' in urls
-        assert 'https://example.com/page' in urls
-        assert 'https://www.iana.org/domains/reserved' in urls
-
-        # Verify ArchiveResult record
-        assert '"type": "ArchiveResult"' in result.stdout
-        assert '"status": "succeeded"' in result.stdout
-
-    def test_extracts_urls_from_mixed_content(self, tmp_path):
-        """Test extracting URLs embedded in prose text."""
-        input_file = tmp_path / 'mixed.txt'
-        input_file.write_text('''
-Check out this great article at https://blog.example.com/post
-You can also visit http://docs.test.org for more info.
-Also see https://github.com/user/repo for the code.
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
-        urls = {json.loads(line)['url'] for line in lines}
-
-        assert 'https://blog.example.com/post' in urls
-        assert 'http://docs.test.org' in urls
-        assert 'https://github.com/user/repo' in urls
-
-    def test_handles_markdown_urls(self, tmp_path):
-        """Test handling URLs in markdown format with parentheses."""
-        input_file = tmp_path / 'markdown.txt'
-        input_file.write_text('''
-[Example](https://example.com/page)
-[Wiki](https://en.wikipedia.org/wiki/Article_(Disambiguation))
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
-        urls = {json.loads(line)['url'] for line in lines}
-
-        assert 'https://example.com/page' in urls
-        assert any('wikipedia.org' in u for u in urls)
-
-    def test_skips_when_no_urls_found(self, tmp_path):
-        """Test that script returns skipped status when no URLs found."""
-        input_file = tmp_path / 'empty.txt'
-        input_file.write_text('no urls here, just plain text')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        assert 'urls.jsonl' in result.stderr
-        assert '"status": "skipped"' in result.stdout
-
-    def test_exits_1_when_file_not_found(self, tmp_path):
-        """Test that script exits with code 1 when file doesn't exist."""
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', 'file:///nonexistent/path.txt'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 1
-        assert 'Failed to fetch' in result.stderr
-
-    def test_deduplicates_urls(self, tmp_path):
-        """Test that duplicate URLs are deduplicated."""
-        input_file = tmp_path / 'dupes.txt'
-        input_file.write_text('''
-https://example.com
-https://example.com
-https://example.com
-https://other.com
-        ''')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
-        assert len(lines) == 2
-
-    def test_outputs_to_stdout(self, tmp_path):
-        """Test that output goes to stdout in JSONL format."""
-        input_file = tmp_path / 'urls.txt'
-        input_file.write_text('https://new.com\nhttps://other.com')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
-        assert len(lines) == 2
-
-        urls = {json.loads(line)['url'] for line in lines}
-        assert 'https://new.com' in urls
-        assert 'https://other.com' in urls
-
-    def test_output_is_valid_json(self, tmp_path):
-        """Test that output contains required fields."""
-        input_file = tmp_path / 'urls.txt'
-        input_file.write_text('https://example.com')
-
-        result = subprocess.run(
-            [sys.executable, str(SCRIPT_PATH), '--url', f'file://{input_file}'],
-            cwd=tmp_path,
-            capture_output=True,
-            text=True,
-        )
-
-        assert result.returncode == 0
-        lines = [line for line in result.stdout.strip().split('\n') if '"type": "Snapshot"' in line]
-        entry = json.loads(lines[0])
-        assert entry['url'] == 'https://example.com'
-        assert entry['type'] == 'Snapshot'
-        assert entry['plugin'] == 'parse_txt_urls'
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/pdf/config.json b/archivebox/plugins/pdf/config.json
deleted file mode 100644
index 1ab6d922..00000000
--- a/archivebox/plugins/pdf/config.json
+++ /dev/null
@@ -1,28 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "required_plugins": ["chrome"],
-  "properties": {
-    "PDF_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_PDF", "USE_PDF"],
-      "description": "Enable PDF generation"
-    },
-    "PDF_TIMEOUT": {
-      "type": "integer",
-      "default": 60,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for PDF generation in seconds"
-    },
-    "PDF_RESOLUTION": {
-      "type": "string",
-      "default": "1440,2000",
-      "pattern": "^\\d+,\\d+$",
-      "x-fallback": "RESOLUTION",
-      "description": "PDF page resolution (width,height)"
-    }
-  }
-}
diff --git a/archivebox/plugins/pdf/on_Snapshot__52_pdf.js b/archivebox/plugins/pdf/on_Snapshot__52_pdf.js
deleted file mode 100644
index d46a3779..00000000
--- a/archivebox/plugins/pdf/on_Snapshot__52_pdf.js
+++ /dev/null
@@ -1,193 +0,0 @@
-#!/usr/bin/env node
-/**
- * Print a URL to PDF using Chrome/Puppeteer.
- *
- * Requires a Chrome session (from chrome plugin) and connects to it via CDP.
- *
- * Usage: on_Snapshot__52_pdf.js --url=<url> --snapshot-id=<uuid>
- * Output: Writes pdf/output.pdf
- *
- * Environment variables:
- *     PDF_ENABLED: Enable PDF generation (default: true)
- */
-
-const fs = require('fs');
-const path = require('path');
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-
-const {
-    getEnvBool,
-    parseArgs,
-    readCdpUrl,
-} = require('../chrome/chrome_utils.js');
-
-// Check if PDF is enabled BEFORE requiring puppeteer
-if (!getEnvBool('PDF_ENABLED', true)) {
-    console.error('Skipping PDF (PDF_ENABLED=False)');
-    // Temporary failure (config disabled) - NO JSONL emission
-    process.exit(0);
-}
-
-// Now safe to require puppeteer
-const puppeteer = require('puppeteer-core');
-
-// Extractor metadata
-const PLUGIN_NAME = 'pdf';
-const OUTPUT_DIR = '.';
-const OUTPUT_FILE = 'output.pdf';
-const CHROME_SESSION_DIR = '../chrome';
-
-// Check if staticfile extractor already downloaded this URL
-const STATICFILE_DIR = '../staticfile';
-function hasStaticFileOutput() {
-    if (!fs.existsSync(STATICFILE_DIR)) return false;
-    const stdoutPath = path.join(STATICFILE_DIR, 'stdout.log');
-    if (!fs.existsSync(stdoutPath)) return false;
-    const stdout = fs.readFileSync(stdoutPath, 'utf8');
-    for (const line of stdout.split('\n')) {
-        const trimmed = line.trim();
-        if (!trimmed.startsWith('{')) continue;
-        try {
-            const record = JSON.parse(trimmed);
-            if (record.type === 'ArchiveResult' && record.status === 'succeeded') {
-                return true;
-            }
-        } catch (e) {}
-    }
-    return false;
-}
-
-// Wait for chrome tab to be fully loaded
-async function waitForChromeTabLoaded(timeoutMs = 60000) {
-    const navigationFile = path.join(CHROME_SESSION_DIR, 'navigation.json');
-    const startTime = Date.now();
-
-    while (Date.now() - startTime < timeoutMs) {
-        if (fs.existsSync(navigationFile)) {
-            return true;
-        }
-        // Wait 100ms before checking again
-        await new Promise(resolve => setTimeout(resolve, 100));
-    }
-
-    return false;
-}
-
-async function printToPdf(url) {
-    // Output directory is current directory (hook already runs in output dir)
-    const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
-
-    let browser = null;
-    let page = null;
-
-    try {
-        // Connect to existing Chrome session (required)
-        const cdpUrl = readCdpUrl(CHROME_SESSION_DIR);
-        if (!cdpUrl) {
-            return { success: false, error: 'No Chrome session found (chrome plugin must run first)' };
-        }
-
-        browser = await puppeteer.connect({
-            browserWSEndpoint: cdpUrl,
-            defaultViewport: null,
-        });
-
-        // Get existing pages or create new one
-        const pages = await browser.pages();
-        page = pages.find(p => p.url().startsWith('http')) || pages[0];
-
-        if (!page) {
-            page = await browser.newPage();
-        }
-
-        // Print to PDF
-        await page.pdf({
-            path: outputPath,
-            format: 'A4',
-            printBackground: true,
-            margin: {
-                top: '0.5in',
-                right: '0.5in',
-                bottom: '0.5in',
-                left: '0.5in',
-            },
-        });
-
-        if (fs.existsSync(outputPath) && fs.statSync(outputPath).size > 0) {
-            return { success: true, output: outputPath };
-        } else {
-            return { success: false, error: 'PDF file not created' };
-        }
-
-    } catch (e) {
-        return { success: false, error: `${e.name}: ${e.message}` };
-    } finally {
-        if (browser) {
-            browser.disconnect();
-        }
-    }
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-    const snapshotId = args.snapshot_id;
-
-    if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__52_pdf.js --url=<url> --snapshot-id=<uuid>');
-        process.exit(1);
-    }
-
-    try {
-        // Check if staticfile extractor already handled this (permanent skip)
-        if (hasStaticFileOutput()) {
-            console.error(`Skipping PDF - staticfile extractor already downloaded this`);
-            // Permanent skip - emit ArchiveResult
-            console.log(JSON.stringify({
-                type: 'ArchiveResult',
-                status: 'skipped',
-                output_str: 'staticfile already handled',
-            }));
-            process.exit(0);
-        }
-
-        const cdpUrl = readCdpUrl(CHROME_SESSION_DIR);
-        if (!cdpUrl) {
-            throw new Error('No Chrome session found (chrome plugin must run first)');
-        }
-
-        // Wait for page to be fully loaded
-        const pageLoaded = await waitForChromeTabLoaded(60000);
-        if (!pageLoaded) {
-            throw new Error('Page not loaded after 60s (chrome_navigate must complete first)');
-        }
-
-        const result = await printToPdf(url);
-
-        if (result.success) {
-            // Success - emit ArchiveResult
-            const size = fs.statSync(result.output).size;
-            console.error(`PDF saved (${size} bytes)`);
-            console.log(JSON.stringify({
-                type: 'ArchiveResult',
-                status: 'succeeded',
-                output_str: result.output,
-            }));
-            process.exit(0);
-        } else {
-            // Transient error - emit NO JSONL
-            console.error(`ERROR: ${result.error}`);
-            process.exit(1);
-        }
-    } catch (e) {
-        // Transient error - emit NO JSONL
-        console.error(`ERROR: ${e.name}: ${e.message}`);
-        process.exit(1);
-    }
-}
-
-main().catch(e => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
diff --git a/archivebox/plugins/pdf/templates/card.html b/archivebox/plugins/pdf/templates/card.html
deleted file mode 100644
index 32895d04..00000000
--- a/archivebox/plugins/pdf/templates/card.html
+++ /dev/null
@@ -1,6 +0,0 @@
-<!-- PDF thumbnail - shows first page preview -->
-<div class="extractor-thumbnail pdf-thumbnail" style="width: 100%; height: 100px; overflow: hidden; background: #f5f5f5;">
-    <embed src="{{ output_path }}#toolbar=0&navpanes=0&scrollbar=0&page=1&view=FitH"
-           type="application/pdf"
-           style="width: 100%; height: 200px; margin-top: -20px; pointer-events: none;">
-</div>
diff --git a/archivebox/plugins/pdf/templates/full.html b/archivebox/plugins/pdf/templates/full.html
deleted file mode 100644
index 240b7cea..00000000
--- a/archivebox/plugins/pdf/templates/full.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<!-- PDF fullscreen - full PDF viewer -->
-<embed src="{{ output_path }}#toolbar=1&navpanes=1&view=FitH"
-       type="application/pdf"
-       class="extractor-fullscreen pdf-fullscreen"
-       style="width: 100%; height: 100vh;">
diff --git a/archivebox/plugins/pdf/templates/icon.html b/archivebox/plugins/pdf/templates/icon.html
deleted file mode 100644
index 35a0ed89..00000000
--- a/archivebox/plugins/pdf/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--pdf" title="PDF"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M14 3H6a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V9z"/><path d="M14 3v6h6"/><rect x="8" y="12" width="8" height="4" rx="1"/></svg></span>
diff --git a/archivebox/plugins/pdf/tests/test_pdf.py b/archivebox/plugins/pdf/tests/test_pdf.py
deleted file mode 100644
index f9388129..00000000
--- a/archivebox/plugins/pdf/tests/test_pdf.py
+++ /dev/null
@@ -1,194 +0,0 @@
-"""
-Integration tests for pdf plugin
-
-Tests verify:
-    pass
-1. Hook script exists
-2. Dependencies installed via chrome validation hooks
-3. Verify deps with abx-pkg
-4. PDF extraction works on https://example.com
-5. JSONL output is correct
-6. Filesystem output is valid PDF file
-7. Config options work
-"""
-
-import json
-import os
-import subprocess
-import sys
-import tempfile
-from pathlib import Path
-
-import pytest
-
-from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-    get_test_env,
-    get_plugin_dir,
-    get_hook_script,
-    run_hook_and_parse,
-    LIB_DIR,
-    NODE_MODULES_DIR,
-    PLUGINS_ROOT,
-    chrome_session,
-)
-
-
-PLUGIN_DIR = get_plugin_dir(__file__)
-PDF_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_pdf.*')
-NPM_PROVIDER_HOOK = PLUGINS_ROOT / 'npm' / 'on_Binary__install_using_npm_provider.py'
-TEST_URL = 'https://example.com'
-
-
-def test_hook_script_exists():
-    """Verify on_Snapshot hook exists."""
-    assert PDF_HOOK.exists(), f"Hook not found: {PDF_HOOK}"
-
-
-def test_verify_deps_with_abx_pkg():
-    """Verify dependencies are available via abx-pkg after hook installation."""
-    from abx_pkg import Binary, EnvProvider, BinProviderOverrides
-
-    EnvProvider.model_rebuild()
-
-    # Verify node is available
-    node_binary = Binary(name='node', binproviders=[EnvProvider()])
-    node_loaded = node_binary.load()
-    assert node_loaded and node_loaded.abspath, "Node.js required for pdf plugin"
-
-
-def test_extracts_pdf_from_example_com():
-    """Test full workflow: extract PDF from real example.com via hook."""
-    # Prerequisites checked by earlier test
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        with chrome_session(tmpdir, test_url=TEST_URL) as (_process, _pid, snapshot_chrome_dir, env):
-            pdf_dir = snapshot_chrome_dir.parent / 'pdf'
-            pdf_dir.mkdir(exist_ok=True)
-
-            # Run PDF extraction hook
-            result = subprocess.run(
-                ['node', str(PDF_HOOK), f'--url={TEST_URL}', '--snapshot-id=test789'],
-                cwd=pdf_dir,
-                capture_output=True,
-                text=True,
-                timeout=120,
-                env=env
-            )
-
-        # Parse clean JSONL output (hook might fail due to network issues)
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                pass
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, "Should have ArchiveResult JSONL output"
-
-        # Skip verification if network failed
-        if result_json['status'] != 'succeeded':
-            pass
-            if 'TIMED_OUT' in result_json.get('output_str', '') or 'timeout' in result_json.get('output_str', '').lower():
-                pass
-            pytest.fail(f"Extraction failed: {result_json}")
-
-        assert result.returncode == 0, f"Should exit 0 on success: {result.stderr}"
-
-        # Verify filesystem output (hook writes to current directory)
-        pdf_file = pdf_dir / 'output.pdf'
-        assert pdf_file.exists(), "output.pdf not created"
-
-        # Verify file is valid PDF
-        file_size = pdf_file.stat().st_size
-        assert file_size > 500, f"PDF too small: {file_size} bytes"
-        assert file_size < 10 * 1024 * 1024, f"PDF suspiciously large: {file_size} bytes"
-
-        # Check PDF magic bytes
-        pdf_data = pdf_file.read_bytes()
-        assert pdf_data[:4] == b'%PDF', "Should be valid PDF file"
-
-
-def test_config_save_pdf_false_skips():
-    """Test that PDF_ENABLED=False exits without emitting JSONL."""
-    import os
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-        env = get_test_env()
-        env['PDF_ENABLED'] = 'False'
-
-        result = subprocess.run(
-            ['node', str(PDF_HOOK), f'--url={TEST_URL}', '--snapshot-id=test999'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=30
-        )
-
-        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
-
-        # Feature disabled - temporary failure, should NOT emit JSONL
-        assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
-
-        # Should NOT emit any JSONL
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
-
-
-def test_reports_missing_chrome():
-    """Test that script reports error when Chrome session is missing."""
-    import os
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-        env = get_test_env()
-        pdf_dir = tmpdir / 'snapshot' / 'pdf'
-        pdf_dir.mkdir(parents=True, exist_ok=True)
-
-        result = subprocess.run(
-            ['node', str(PDF_HOOK), f'--url={TEST_URL}', '--snapshot-id=test123'],
-            cwd=pdf_dir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=30
-        )
-
-        assert result.returncode != 0, "Should fail without shared Chrome session"
-        combined = result.stdout + result.stderr
-        assert 'chrome session' in combined.lower() or 'chrome plugin' in combined.lower()
-
-
-def test_runs_with_shared_chrome_session():
-    """Test that PDF hook completes when shared Chrome session is available."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        with chrome_session(tmpdir, test_url=TEST_URL) as (_process, _pid, snapshot_chrome_dir, env):
-            pdf_dir = snapshot_chrome_dir.parent / 'pdf'
-            pdf_dir.mkdir(exist_ok=True)
-
-            result = subprocess.run(
-                ['node', str(PDF_HOOK), f'--url={TEST_URL}', '--snapshot-id=testtimeout'],
-                cwd=pdf_dir,
-                capture_output=True,
-                text=True,
-                env=env,
-                timeout=30
-            )
-
-        # Should complete (success or fail, but not hang)
-        assert result.returncode in (0, 1), "Should complete without hanging"
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/pip/on_Binary__11_pip_install.py b/archivebox/plugins/pip/on_Binary__11_pip_install.py
deleted file mode 100644
index 468a2916..00000000
--- a/archivebox/plugins/pip/on_Binary__11_pip_install.py
+++ /dev/null
@@ -1,134 +0,0 @@
-#!/usr/bin/env python3
-"""
-Install a binary using pip package manager.
-
-Usage: on_Binary__install_using_pip_provider.py --binary-id=<uuid> --machine-id=<uuid> --name=<name>
-Output: Binary JSONL record to stdout after installation
-
-Environment variables:
-    LIB_DIR: Library directory including machine type (e.g., data/lib/arm64-darwin) (required)
-"""
-
-import json
-import os
-import shutil
-import subprocess
-import sys
-from pathlib import Path
-
-import rich_click as click
-from abx_pkg import Binary, PipProvider, BinProviderOverrides
-
-# Fix pydantic forward reference issue
-PipProvider.model_rebuild()
-
-
-@click.command()
-@click.option('--binary-id', required=True, help="Binary UUID")
-@click.option('--machine-id', required=True, help="Machine UUID")
-@click.option('--name', required=True, help="Binary name to install")
-@click.option('--binproviders', default='*', help="Allowed providers (comma-separated)")
-@click.option('--overrides', default=None, help="JSON-encoded overrides dict")
-def main(binary_id: str, machine_id: str, name: str, binproviders: str, overrides: str | None):
-    """Install binary using pip."""
-
-    # Check if pip provider is allowed
-    if binproviders != '*' and 'pip' not in binproviders.split(','):
-        click.echo(f"pip provider not allowed for {name}", err=True)
-        sys.exit(0)
-
-    # Get LIB_DIR from environment (required)
-    # Note: LIB_DIR already includes machine type (e.g., data/lib/arm64-darwin)
-    lib_dir = os.environ.get('LIB_DIR')
-
-    if not lib_dir:
-        click.echo("ERROR: LIB_DIR environment variable not set", err=True)
-        sys.exit(1)
-
-    # Structure: lib/arm64-darwin/pip/venv (PipProvider will create venv automatically)
-    pip_venv_path = Path(lib_dir) / 'pip' / 'venv'
-    pip_venv_path.parent.mkdir(parents=True, exist_ok=True)
-    venv_python = pip_venv_path / 'bin' / 'python'
-
-    # Prefer a stable system python for venv creation if provided/available
-    preferred_python = os.environ.get('PIP_VENV_PYTHON', '').strip()
-    if not preferred_python:
-        for candidate in ('python3.12', 'python3.11', 'python3.10'):
-            if shutil.which(candidate):
-                preferred_python = candidate
-                break
-    if preferred_python and not venv_python.exists():
-        try:
-            subprocess.run(
-                [preferred_python, '-m', 'venv', str(pip_venv_path), '--upgrade-deps'],
-                check=True,
-            )
-        except Exception:
-            # Fall back to PipProvider-managed venv creation
-            pass
-
-    # Use abx-pkg PipProvider to install binary with custom venv
-    provider = PipProvider(pip_venv=pip_venv_path)
-    if not provider.INSTALLER_BIN:
-        click.echo("pip not available on this system", err=True)
-        sys.exit(1)
-
-    click.echo(f"Installing {name} via pip to venv at {pip_venv_path}...", err=True)
-
-    try:
-        # Parse overrides if provided
-        overrides_dict = None
-        if overrides:
-            try:
-                overrides_dict = json.loads(overrides)
-                # Extract pip-specific overrides
-                overrides_dict = overrides_dict.get('pip', {})
-                click.echo(f"Using pip install overrides: {overrides_dict}", err=True)
-            except json.JSONDecodeError:
-                click.echo(f"Warning: Failed to parse overrides JSON: {overrides}", err=True)
-
-        binary = Binary(name=name, binproviders=[provider], overrides={'pip': overrides_dict} if overrides_dict else {}).install()
-    except Exception as e:
-        click.echo(f"pip install failed: {e}", err=True)
-        sys.exit(1)
-
-    if not binary.abspath:
-        click.echo(f"{name} not found after pip install", err=True)
-        sys.exit(1)
-
-    # Output Binary JSONL record to stdout
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'abspath': str(binary.abspath),
-        'version': str(binary.version) if binary.version else '',
-        'sha256': binary.sha256 or '',
-        'binprovider': 'pip',
-    }
-    print(json.dumps(record))
-
-    # Emit PATH update for pip bin dir
-    pip_bin_dir = str(pip_venv_path / 'bin')
-    current_path = os.environ.get('PATH', '')
-
-    # Check if pip_bin_dir is already in PATH
-    path_dirs = current_path.split(':')
-    new_path = f"{pip_bin_dir}:{current_path}" if current_path else pip_bin_dir
-    if pip_bin_dir in path_dirs:
-        new_path = current_path
-    print(json.dumps({
-        'type': 'Machine',
-        'config': {
-            'PATH': new_path,
-        },
-    }))
-
-    # Log human-readable info to stderr
-    click.echo(f"Installed {name} at {binary.abspath}", err=True)
-    click.echo(f"  version: {binary.version}", err=True)
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/pip/templates/icon.html b/archivebox/plugins/pip/templates/icon.html
deleted file mode 100644
index e69de29b..00000000
diff --git a/archivebox/plugins/pip/tests/test_pip_provider.py b/archivebox/plugins/pip/tests/test_pip_provider.py
deleted file mode 100644
index d24c7e64..00000000
--- a/archivebox/plugins/pip/tests/test_pip_provider.py
+++ /dev/null
@@ -1,191 +0,0 @@
-"""
-Tests for the pip binary provider plugin.
-
-Tests cover:
-1. Hook script execution
-2. pip package detection
-3. Virtual environment handling
-4. JSONL output format
-"""
-
-import json
-import os
-import subprocess
-import sys
-import tempfile
-from pathlib import Path
-from unittest.mock import patch, MagicMock
-
-import pytest
-from django.test import TestCase
-
-
-# Get the path to the pip provider hook
-PLUGIN_DIR = Path(__file__).parent.parent
-INSTALL_HOOK = next(PLUGIN_DIR.glob('on_Binary__*_pip_install.py'), None)
-
-
-class TestPipProviderHook(TestCase):
-    """Test the pip binary provider installation hook."""
-
-    def setUp(self):
-        """Set up test environment."""
-        self.temp_dir = tempfile.mkdtemp()
-        self.output_dir = Path(self.temp_dir) / 'output'
-        self.output_dir.mkdir()
-        self.lib_dir = Path(self.temp_dir) / 'lib' / 'x86_64-linux'
-        self.lib_dir.mkdir(parents=True, exist_ok=True)
-        self.lib_dir = Path(self.temp_dir) / 'lib' / 'x86_64-linux'
-        self.lib_dir.mkdir(parents=True, exist_ok=True)
-
-    def tearDown(self):
-        """Clean up."""
-        import shutil
-        shutil.rmtree(self.temp_dir, ignore_errors=True)
-
-    def test_hook_script_exists(self):
-        """Hook script should exist."""
-        self.assertTrue(INSTALL_HOOK and INSTALL_HOOK.exists(), f"Hook not found: {INSTALL_HOOK}")
-
-    def test_hook_help(self):
-        """Hook should accept --help without error."""
-        result = subprocess.run(
-            [sys.executable, str(INSTALL_HOOK), '--help'],
-            capture_output=True,
-            text=True,
-            timeout=30
-        )
-        # May succeed or fail depending on implementation
-        # At minimum should not crash with Python error
-        self.assertNotIn('Traceback', result.stderr)
-
-    def test_hook_finds_pip(self):
-        """Hook should find pip binary."""
-        env = os.environ.copy()
-        env['DATA_DIR'] = self.temp_dir
-        env['LIB_DIR'] = str(self.lib_dir)
-
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=pip',
-                '--binproviders=pip',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-            ],
-            capture_output=True,
-            text=True,
-            cwd=str(self.output_dir),
-            env=env,
-            timeout=60
-        )
-
-        # Check for JSONL output
-        jsonl_found = False
-        for line in result.stdout.split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'Binary' and record.get('name') == 'pip':
-                        jsonl_found = True
-                        # Verify structure
-                        self.assertIn('abspath', record)
-                        self.assertIn('version', record)
-                        break
-                except json.JSONDecodeError:
-                    continue
-
-        # Should not crash
-        self.assertNotIn('Traceback', result.stderr)
-
-        # Should find pip via pip provider
-        self.assertTrue(jsonl_found, "Expected to find pip binary in JSONL output")
-
-    def test_hook_unknown_package(self):
-        """Hook should handle unknown packages gracefully."""
-        env = os.environ.copy()
-        env['DATA_DIR'] = self.temp_dir
-        env['LIB_DIR'] = str(self.lib_dir)
-
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=nonexistent_package_xyz123',
-                '--binproviders=pip',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-            ],
-            capture_output=True,
-            text=True,
-            cwd=str(self.output_dir),
-            env=env,
-            timeout=60
-        )
-
-        # Should not crash
-        self.assertNotIn('Traceback', result.stderr)
-        # May have non-zero exit code for missing package
-
-
-class TestPipProviderIntegration(TestCase):
-    """Integration tests for pip provider with real packages."""
-
-    def setUp(self):
-        """Set up test environment."""
-        self.temp_dir = tempfile.mkdtemp()
-        self.output_dir = Path(self.temp_dir) / 'output'
-        self.output_dir.mkdir()
-
-    def tearDown(self):
-        """Clean up."""
-        import shutil
-        shutil.rmtree(self.temp_dir, ignore_errors=True)
-
-    def test_hook_finds_pip_installed_binary(self):
-        """Hook should find binaries installed via pip."""
-        pip_check = subprocess.run(
-            [sys.executable, '-m', 'pip', '--version'],
-            capture_output=True,
-            text=True,
-        )
-        assert pip_check.returncode == 0, "pip not available"
-        env = os.environ.copy()
-        env['DATA_DIR'] = self.temp_dir
-
-        # Try to find 'pip' itself which should be available
-        result = subprocess.run(
-            [
-                sys.executable, str(INSTALL_HOOK),
-                '--name=pip',
-                '--binproviders=pip,env',
-                '--binary-id=test-uuid',
-                '--machine-id=test-machine',
-            ],
-            capture_output=True,
-            text=True,
-            cwd=str(self.output_dir),
-            env=env,
-            timeout=60
-        )
-
-        # Look for success in output
-        for line in result.stdout.split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'Binary' and 'pip' in record.get('name', ''):
-                        # Found pip binary
-                        self.assertTrue(record.get('abspath'))
-                        return
-                except json.JSONDecodeError:
-                    continue
-
-        # If we get here without finding pip, that's acceptable
-        # as long as the hook didn't crash
-        self.assertNotIn('Traceback', result.stderr)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/puppeteer/__init__.py b/archivebox/plugins/puppeteer/__init__.py
deleted file mode 100644
index e32e0f82..00000000
--- a/archivebox/plugins/puppeteer/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Plugin namespace for puppeteer utilities.
diff --git a/archivebox/plugins/puppeteer/on_Binary__12_puppeteer_install.py b/archivebox/plugins/puppeteer/on_Binary__12_puppeteer_install.py
deleted file mode 100644
index a30e9cc0..00000000
--- a/archivebox/plugins/puppeteer/on_Binary__12_puppeteer_install.py
+++ /dev/null
@@ -1,170 +0,0 @@
-#!/usr/bin/env python3
-"""
-Install Chromium via the Puppeteer CLI.
-
-Usage: on_Binary__12_puppeteer_install.py --binary-id=<uuid> --machine-id=<uuid> --name=<name>
-Output: Binary JSONL record to stdout after installation
-"""
-
-import json
-import os
-import re
-import sys
-from pathlib import Path
-
-import rich_click as click
-from abx_pkg import Binary, EnvProvider, NpmProvider, BinProviderOverrides
-
-# Fix pydantic forward reference issue
-NpmProvider.model_rebuild()
-
-
-@click.command()
-@click.option('--machine-id', required=True, help='Machine UUID')
-@click.option('--binary-id', required=True, help='Binary UUID')
-@click.option('--name', required=True, help='Binary name to install')
-@click.option('--binproviders', default='*', help='Allowed providers (comma-separated)')
-@click.option('--overrides', default=None, help='JSON-encoded overrides dict')
-def main(machine_id: str, binary_id: str, name: str, binproviders: str, overrides: str | None) -> None:
-    if binproviders != '*' and 'puppeteer' not in binproviders.split(','):
-        sys.exit(0)
-
-    if name not in ('chromium', 'chrome'):
-        sys.exit(0)
-
-    lib_dir = os.environ.get('LIB_DIR', '').strip()
-    if not lib_dir:
-        click.echo('ERROR: LIB_DIR environment variable not set', err=True)
-        sys.exit(1)
-
-    npm_prefix = Path(lib_dir) / 'npm'
-    npm_prefix.mkdir(parents=True, exist_ok=True)
-    npm_provider = NpmProvider(npm_prefix=npm_prefix)
-    cache_dir = Path(lib_dir) / 'puppeteer'
-    cache_dir.mkdir(parents=True, exist_ok=True)
-    os.environ.setdefault('PUPPETEER_CACHE_DIR', str(cache_dir))
-
-    puppeteer_binary = Binary(
-        name='puppeteer',
-        binproviders=[npm_provider, EnvProvider()],
-        overrides={'npm': {'packages': ['puppeteer']}},
-    ).load()
-
-    if not puppeteer_binary.abspath:
-        click.echo('ERROR: puppeteer binary not found (install puppeteer first)', err=True)
-        sys.exit(1)
-
-    install_args = _parse_override_packages(overrides, default=['chromium@latest', '--install-deps'])
-    cmd = ['browsers', 'install', *install_args]
-    proc = puppeteer_binary.exec(cmd=cmd, timeout=300)
-    if proc.returncode != 0:
-        click.echo(proc.stdout.strip(), err=True)
-        click.echo(proc.stderr.strip(), err=True)
-        click.echo(f'ERROR: puppeteer install failed ({proc.returncode})', err=True)
-        sys.exit(1)
-
-    chromium_binary = _load_chromium_binary(proc.stdout + '\n' + proc.stderr)
-    if not chromium_binary or not chromium_binary.abspath:
-        click.echo('ERROR: failed to locate Chromium after install', err=True)
-        sys.exit(1)
-
-    _emit_chromium_binary_record(
-        binary=chromium_binary,
-        machine_id=machine_id,
-        binary_id=binary_id,
-    )
-
-    config_patch = {
-        'CHROME_BINARY': str(chromium_binary.abspath),
-        'CHROMIUM_VERSION': str(chromium_binary.version) if chromium_binary.version else '',
-    }
-
-    print(json.dumps({
-        'type': 'Machine',
-        'config': config_patch,
-    }))
-
-    sys.exit(0)
-
-
-def _parse_override_packages(overrides: str | None, default: list[str]) -> list[str]:
-    if not overrides:
-        return default
-    try:
-        overrides_dict = json.loads(overrides)
-    except json.JSONDecodeError:
-        return default
-
-    if isinstance(overrides_dict, dict):
-        provider_overrides = overrides_dict.get('puppeteer')
-        if isinstance(provider_overrides, dict):
-            packages = provider_overrides.get('packages')
-            if isinstance(packages, list) and packages:
-                return [str(arg) for arg in packages]
-        if isinstance(provider_overrides, list) and provider_overrides:
-            return [str(arg) for arg in provider_overrides]
-    if isinstance(overrides_dict, list) and overrides_dict:
-        return [str(arg) for arg in overrides_dict]
-
-    return default
-
-
-def _emit_chromium_binary_record(binary: Binary, machine_id: str, binary_id: str) -> None:
-    record = {
-        'type': 'Binary',
-        'name': 'chromium',
-        'abspath': str(binary.abspath),
-        'version': str(binary.version) if binary.version else '',
-        'sha256': binary.sha256 or '',
-        'binprovider': 'puppeteer',
-        'machine_id': machine_id,
-        'binary_id': binary_id,
-    }
-    print(json.dumps(record))
-
-
-def _load_chromium_binary(output: str) -> Binary | None:
-    candidates: list[Path] = []
-    match = re.search(r'(?:chromium|chrome)@[^\s]+\s+(\S+)', output)
-    if match:
-        candidates.append(Path(match.group(1)))
-
-    cache_dirs: list[Path] = []
-    cache_env = os.environ.get('PUPPETEER_CACHE_DIR')
-    if cache_env:
-        cache_dirs.append(Path(cache_env))
-
-    home = Path.home()
-    cache_dirs.extend([
-        home / '.cache' / 'puppeteer',
-        home / 'Library' / 'Caches' / 'puppeteer',
-    ])
-
-    for base in cache_dirs:
-        for root in (base, base / 'chromium', base / 'chrome'):
-            try:
-                candidates.extend(root.rglob('Chromium.app/Contents/MacOS/Chromium'))
-            except Exception:
-                pass
-            try:
-                candidates.extend(root.rglob('chrome'))
-            except Exception:
-                pass
-
-    for candidate in candidates:
-        try:
-            binary = Binary(
-                name='chromium',
-                binproviders=[EnvProvider()],
-                overrides={'env': {'abspath': str(candidate)}},
-            ).load()
-        except Exception:
-            continue
-        if binary.abspath:
-            return binary
-
-    return None
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/puppeteer/on_Crawl__60_puppeteer_install.py b/archivebox/plugins/puppeteer/on_Crawl__60_puppeteer_install.py
deleted file mode 100644
index 9125dc2f..00000000
--- a/archivebox/plugins/puppeteer/on_Crawl__60_puppeteer_install.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/usr/bin/env python3
-"""
-Emit Puppeteer Binary dependency for the crawl.
-"""
-
-import json
-import os
-import sys
-
-
-def main() -> None:
-    enabled = os.environ.get('PUPPETEER_ENABLED', 'true').lower() not in ('false', '0', 'no', 'off')
-    if not enabled:
-        sys.exit(0)
-
-    record = {
-        'type': 'Binary',
-        'name': 'puppeteer',
-        'binproviders': 'npm,env',
-        'overrides': {
-            'npm': {
-                'packages': ['puppeteer'],
-            }
-        },
-    }
-    print(json.dumps(record))
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/puppeteer/tests/test_puppeteer.py b/archivebox/plugins/puppeteer/tests/test_puppeteer.py
deleted file mode 100644
index a35db7a1..00000000
--- a/archivebox/plugins/puppeteer/tests/test_puppeteer.py
+++ /dev/null
@@ -1,124 +0,0 @@
-"""Integration tests for puppeteer plugin."""
-
-import json
-import os
-import shutil
-import subprocess
-import sys
-import tempfile
-from pathlib import Path
-
-import pytest
-
-from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-    get_plugin_dir,
-    get_hook_script,
-)
-
-
-PLUGIN_DIR = get_plugin_dir(__file__)
-CRAWL_HOOK = get_hook_script(PLUGIN_DIR, 'on_Crawl__*_puppeteer_install.py')
-BINARY_HOOK = get_hook_script(PLUGIN_DIR, 'on_Binary__*_puppeteer_install.py')
-NPM_BINARY_HOOK = PLUGIN_DIR.parent / 'npm' / 'on_Binary__10_npm_install.py'
-
-
-def test_hook_scripts_exist():
-    assert CRAWL_HOOK and CRAWL_HOOK.exists(), f"Hook not found: {CRAWL_HOOK}"
-    assert BINARY_HOOK and BINARY_HOOK.exists(), f"Hook not found: {BINARY_HOOK}"
-
-
-def test_crawl_hook_emits_puppeteer_binary():
-    with tempfile.TemporaryDirectory() as tmpdir:
-        env = os.environ.copy()
-        result = subprocess.run(
-            [sys.executable, str(CRAWL_HOOK)],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=30,
-        )
-
-        assert result.returncode == 0, f"crawl hook failed: {result.stderr}"
-        records = [json.loads(line) for line in result.stdout.splitlines() if line.strip().startswith('{')]
-        binaries = [r for r in records if r.get('type') == 'Binary' and r.get('name') == 'puppeteer']
-        assert binaries, f"Expected Binary record for puppeteer, got: {records}"
-        assert 'npm' in binaries[0].get('binproviders', ''), "puppeteer should be installable via npm provider"
-
-
-def test_puppeteer_installs_chromium():
-    assert shutil.which('npm'), "npm is required for puppeteer installation"
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-        lib_dir = tmpdir / 'lib' / 'arm64-darwin'
-        lib_dir.mkdir(parents=True, exist_ok=True)
-
-        env = os.environ.copy()
-        env['LIB_DIR'] = str(lib_dir)
-
-        crawl_result = subprocess.run(
-            [sys.executable, str(CRAWL_HOOK)],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=30,
-        )
-        assert crawl_result.returncode == 0, f"crawl hook failed: {crawl_result.stderr}"
-        crawl_records = [json.loads(line) for line in crawl_result.stdout.splitlines() if line.strip().startswith('{')]
-        puppeteer_record = next(
-            (r for r in crawl_records if r.get('type') == 'Binary' and r.get('name') == 'puppeteer'),
-            None,
-        )
-        assert puppeteer_record, f"Expected puppeteer Binary record, got: {crawl_records}"
-
-        npm_result = subprocess.run(
-            [
-                sys.executable,
-                str(NPM_BINARY_HOOK),
-                '--machine-id=test-machine',
-                '--binary-id=test-puppeteer',
-                '--name=puppeteer',
-                f"--binproviders={puppeteer_record.get('binproviders', '*')}",
-                '--overrides=' + json.dumps(puppeteer_record.get('overrides') or {}),
-            ],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=120,
-        )
-        assert npm_result.returncode == 0, (
-            "puppeteer npm install failed\n"
-            f"stdout:\n{npm_result.stdout}\n"
-            f"stderr:\n{npm_result.stderr}"
-        )
-
-        result = subprocess.run(
-            [
-                sys.executable,
-                str(BINARY_HOOK),
-                '--machine-id=test-machine',
-                '--binary-id=test-binary',
-                '--name=chromium',
-                '--binproviders=puppeteer',
-                '--overrides=' + json.dumps({'puppeteer': ['chromium@latest', '--install-deps']}),
-            ],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=120,
-        )
-
-        assert result.returncode == 0, (
-            "puppeteer binary hook failed\n"
-            f"stdout:\n{result.stdout}\n"
-            f"stderr:\n{result.stderr}"
-        )
-
-        records = [json.loads(line) for line in result.stdout.splitlines() if line.strip().startswith('{')]
-        binaries = [r for r in records if r.get('type') == 'Binary' and r.get('name') == 'chromium']
-        assert binaries, f"Expected Binary record for chromium, got: {records}"
-        abspath = binaries[0].get('abspath')
-        assert abspath and Path(abspath).exists(), f"Chromium binary path invalid: {abspath}"
diff --git a/archivebox/plugins/readability/config.json b/archivebox/plugins/readability/config.json
deleted file mode 100644
index 90173047..00000000
--- a/archivebox/plugins/readability/config.json
+++ /dev/null
@@ -1,39 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "READABILITY_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_READABILITY", "USE_READABILITY"],
-      "description": "Enable Readability text extraction"
-    },
-    "READABILITY_BINARY": {
-      "type": "string",
-      "default": "readability-extractor",
-      "description": "Path to readability-extractor binary"
-    },
-    "READABILITY_TIMEOUT": {
-      "type": "integer",
-      "default": 30,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for Readability in seconds"
-    },
-    "READABILITY_ARGS": {
-      "type": "array",
-      "items": {"type": "string"},
-      "default": [],
-      "x-aliases": ["READABILITY_DEFAULT_ARGS"],
-      "description": "Default Readability arguments"
-    },
-    "READABILITY_ARGS_EXTRA": {
-      "type": "array",
-      "items": {"type": "string"},
-      "default": [],
-      "x-aliases": ["READABILITY_EXTRA_ARGS"],
-      "description": "Extra arguments to append to Readability command"
-    }
-  }
-}
diff --git a/archivebox/plugins/readability/on_Crawl__35_readability_install.py b/archivebox/plugins/readability/on_Crawl__35_readability_install.py
deleted file mode 100755
index 6705c6bb..00000000
--- a/archivebox/plugins/readability/on_Crawl__35_readability_install.py
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/usr/bin/env python3
-"""
-Emit readability-extractor Binary dependency for the crawl.
-"""
-
-import json
-import os
-import sys
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def output_binary(name: str, binproviders: str):
-    """Output Binary JSONL record for a dependency."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,
-        'overrides': {
-            'npm': {
-                'packages': ['https://github.com/ArchiveBox/readability-extractor'],
-            },
-        },
-        'machine_id': machine_id,
-    }
-    print(json.dumps(record))
-
-
-def main():
-    readability_enabled = get_env_bool('READABILITY_ENABLED', True)
-
-    if not readability_enabled:
-        sys.exit(0)
-
-    output_binary(name='readability-extractor', binproviders='npm,env')
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/readability/on_Snapshot__56_readability.py b/archivebox/plugins/readability/on_Snapshot__56_readability.py
deleted file mode 100644
index 2c083fb6..00000000
--- a/archivebox/plugins/readability/on_Snapshot__56_readability.py
+++ /dev/null
@@ -1,199 +0,0 @@
-#!/usr/bin/env python3
-"""
-Extract article content using Mozilla's Readability.
-
-Usage: on_Snapshot__readability.py --url=<url> --snapshot-id=<uuid>
-Output: Creates readability/ directory with content.html, content.txt, article.json
-
-Environment variables:
-    READABILITY_BINARY: Path to readability-extractor binary
-    READABILITY_TIMEOUT: Timeout in seconds (default: 60)
-    READABILITY_ARGS: Default Readability arguments (JSON array)
-    READABILITY_ARGS_EXTRA: Extra arguments to append (JSON array)
-    TIMEOUT: Fallback timeout
-
-Note: Requires readability-extractor from https://github.com/ArchiveBox/readability-extractor
-      This extractor looks for HTML source from other extractors (wget, singlefile, dom)
-"""
-
-import json
-import os
-import subprocess
-import sys
-import tempfile
-from pathlib import Path
-from urllib.parse import urlparse
-
-import rich_click as click
-
-
-# Extractor metadata
-PLUGIN_NAME = 'readability'
-BIN_NAME = 'readability-extractor'
-BIN_PROVIDERS = 'npm,env'
-OUTPUT_DIR = '.'
-OUTPUT_FILE = 'content.html'
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-
-def get_env_int(name: str, default: int = 0) -> int:
-    try:
-        return int(get_env(name, str(default)))
-    except ValueError:
-        return default
-
-
-def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
-    """Parse a JSON array from environment variable."""
-    val = get_env(name, '')
-    if not val:
-        return default if default is not None else []
-    try:
-        result = json.loads(val)
-        if isinstance(result, list):
-            return [str(item) for item in result]
-        return default if default is not None else []
-    except json.JSONDecodeError:
-        return default if default is not None else []
-
-
-def find_html_source() -> str | None:
-    """Find HTML content from other extractors in the snapshot directory."""
-    # Hooks run in snapshot_dir, sibling extractor outputs are in subdirectories
-    search_patterns = [
-        'singlefile/singlefile.html',
-        '*_singlefile/singlefile.html',
-        'singlefile/*.html',
-        '*_singlefile/*.html',
-        'dom/output.html',
-        '*_dom/output.html',
-        'dom/*.html',
-        '*_dom/*.html',
-        'wget/**/*.html',
-        '*_wget/**/*.html',
-        'wget/**/*.htm',
-        '*_wget/**/*.htm',
-    ]
-
-    for base in (Path.cwd(), Path.cwd().parent):
-        for pattern in search_patterns:
-            matches = list(base.glob(pattern))
-            for match in matches:
-                if match.is_file() and match.stat().st_size > 0:
-                    return str(match)
-
-    return None
-
-
-def extract_readability(url: str, binary: str) -> tuple[bool, str | None, str]:
-    """
-    Extract article using Readability.
-
-    Returns: (success, output_path, error_message)
-    """
-    timeout = get_env_int('READABILITY_TIMEOUT') or get_env_int('TIMEOUT', 60)
-    readability_args = get_env_array('READABILITY_ARGS', [])
-    readability_args_extra = get_env_array('READABILITY_ARGS_EXTRA', [])
-
-    # Find HTML source
-    html_source = find_html_source()
-    if not html_source:
-        return False, None, 'No HTML source found (run singlefile, dom, or wget first)'
-
-    # Output directory is current directory (hook already runs in output dir)
-    output_dir = Path(OUTPUT_DIR)
-
-    try:
-        # Run readability-extractor (outputs JSON by default)
-        cmd = [binary, *readability_args, *readability_args_extra, html_source]
-        result = subprocess.run(cmd, stdout=subprocess.PIPE, timeout=timeout, text=True)
-
-        if result.stdout:
-            sys.stderr.write(result.stdout)
-            sys.stderr.flush()
-
-        if result.returncode != 0:
-            return False, None, f'readability-extractor failed (exit={result.returncode})'
-
-        # Parse JSON output
-        try:
-            result_json = json.loads(result.stdout)
-        except json.JSONDecodeError:
-            return False, None, 'readability-extractor returned invalid JSON'
-
-        # Extract and save content
-        # readability-extractor uses camelCase field names (textContent, content)
-        text_content = result_json.pop('textContent', result_json.pop('text-content', ''))
-        html_content = result_json.pop('content', result_json.pop('html-content', ''))
-
-        if not text_content and not html_content:
-            return False, None, 'No content extracted'
-
-        (output_dir / OUTPUT_FILE).write_text(html_content, encoding='utf-8')
-        (output_dir / 'content.txt').write_text(text_content, encoding='utf-8')
-        (output_dir / 'article.json').write_text(json.dumps(result_json, indent=2), encoding='utf-8')
-
-        # Link images/ to responses capture (if available)
-        try:
-            hostname = urlparse(url).hostname or ''
-            if hostname:
-                responses_images = (output_dir / '..' / 'responses' / 'image' / hostname / 'images').resolve()
-                link_path = output_dir / 'images'
-                if responses_images.exists() and responses_images.is_dir():
-                    if link_path.exists() or link_path.is_symlink():
-                        if link_path.is_symlink() or link_path.is_file():
-                            link_path.unlink()
-                        else:
-                            responses_images = None
-                    if responses_images:
-                        rel_target = os.path.relpath(str(responses_images), str(output_dir))
-                        link_path.symlink_to(rel_target)
-        except Exception:
-            pass
-
-        return True, OUTPUT_FILE, ''
-
-    except subprocess.TimeoutExpired:
-        return False, None, f'Timed out after {timeout} seconds'
-    except Exception as e:
-        return False, None, f'{type(e).__name__}: {e}'
-
-
-@click.command()
-@click.option('--url', required=True, help='URL to extract article from')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
-def main(url: str, snapshot_id: str):
-    """Extract article content using Mozilla's Readability."""
-
-    try:
-        # Get binary from environment
-        binary = get_env('READABILITY_BINARY', 'readability-extractor')
-
-        # Run extraction
-        success, output, error = extract_readability(url, binary)
-
-        if success:
-            # Success - emit ArchiveResult
-            result = {
-                'type': 'ArchiveResult',
-                'status': 'succeeded',
-                'output_str': output or ''
-            }
-            print(json.dumps(result))
-            sys.exit(0)
-        else:
-            # Transient error - emit NO JSONL
-            print(f'ERROR: {error}', file=sys.stderr)
-            sys.exit(1)
-
-    except Exception as e:
-        # Transient error - emit NO JSONL
-        print(f'ERROR: {type(e).__name__}: {e}', file=sys.stderr)
-        sys.exit(1)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/readability/templates/card.html b/archivebox/plugins/readability/templates/card.html
deleted file mode 100644
index 5e118e55..00000000
--- a/archivebox/plugins/readability/templates/card.html
+++ /dev/null
@@ -1,8 +0,0 @@
-<!-- Readability thumbnail - shows reader-mode extracted article content -->
-<div class="extractor-thumbnail readability-thumbnail" style="width: 100%; height: 100px; overflow: hidden; background: #fefefe; padding: 8px; font-family: Georgia, serif; font-size: 11px; line-height: 1.4; color: #333;">
-    <iframe src="{{ output_path }}"
-            style="width: 100%; height: 300px; border: none; pointer-events: none;"
-            loading="lazy"
-            sandbox="allow-same-origin">
-    </iframe>
-</div>
diff --git a/archivebox/plugins/readability/templates/full.html b/archivebox/plugins/readability/templates/full.html
deleted file mode 100644
index d55d6c1c..00000000
--- a/archivebox/plugins/readability/templates/full.html
+++ /dev/null
@@ -1,6 +0,0 @@
-<!-- Readability fullscreen - show extracted article HTML -->
-<iframe class="full-page-iframe"
-        src="{{ output_path }}"
-        name="preview"
-        sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms">
-</iframe>
diff --git a/archivebox/plugins/readability/templates/icon.html b/archivebox/plugins/readability/templates/icon.html
deleted file mode 100644
index ae67c26f..00000000
--- a/archivebox/plugins/readability/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--readability" title="Readability"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M3 6h7a3 3 0 0 1 3 3v10H6a3 3 0 0 0-3 3z"/><path d="M21 6h-7a3 3 0 0 0-3 3v10h7a3 3 0 0 1 3 3z"/></svg></span>
diff --git a/archivebox/plugins/readability/tests/test_readability.py b/archivebox/plugins/readability/tests/test_readability.py
deleted file mode 100644
index b416169e..00000000
--- a/archivebox/plugins/readability/tests/test_readability.py
+++ /dev/null
@@ -1,223 +0,0 @@
-"""
-Integration tests for readability plugin
-
-Tests verify:
-1. Validate hook checks for readability-extractor binary
-2. Verify deps with abx-pkg
-3. Plugin reports missing dependency correctly
-4. Extraction works against real example.com content
-"""
-
-import json
-import shutil
-import subprocess
-import sys
-import tempfile
-from pathlib import Path
-
-import pytest
-
-from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-    get_plugin_dir,
-    get_hook_script,
-    PLUGINS_ROOT,
-)
-
-
-PLUGIN_DIR = get_plugin_dir(__file__)
-READABILITY_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_readability.*')
-TEST_URL = 'https://example.com'
-
-
-def create_example_html(tmpdir: Path) -> Path:
-    """Create sample HTML that looks like example.com with enough content for Readability."""
-    singlefile_dir = tmpdir / 'singlefile'
-    singlefile_dir.mkdir()
-
-    html_file = singlefile_dir / 'singlefile.html'
-    html_file.write_text('''
-<!DOCTYPE html>
-<html>
-<head>
-    <meta charset="utf-8">
-    <title>Example Domain</title>
-    <meta name="viewport" content="width=device-width, initial-scale=1">
-</head>
-<body>
-    <article>
-        <header>
-            <h1>Example Domain</h1>
-        </header>
-        <div class="content">
-            <p>This domain is for use in illustrative examples in documents. You may use this
-            domain in literature without prior coordination or asking for permission.</p>
-
-            <p>Example domains are maintained by the Internet Assigned Numbers Authority (IANA)
-            to provide a well-known address for documentation purposes. This helps authors create
-            examples that readers can understand without confusion about actual domain ownership.</p>
-
-            <p>The practice of using example domains dates back to the early days of the internet.
-            These reserved domains ensure that example code and documentation doesn't accidentally
-            point to real, active websites that might change or disappear over time.</p>
-
-            <p>For more information about example domains and their history, you can visit the
-            IANA website. They maintain several example domains including example.com, example.net,
-            and example.org, all specifically reserved for this purpose.</p>
-
-            <p><a href="https://www.iana.org/domains/example">More information about example domains...</a></p>
-        </div>
-    </article>
-</body>
-</html>
-    ''')
-
-    return html_file
-
-
-def test_hook_script_exists():
-    """Verify hook script exists."""
-    assert READABILITY_HOOK.exists(), f"Hook script not found: {READABILITY_HOOK}"
-
-
-def test_reports_missing_dependency_when_not_installed():
-    """Test that script reports DEPENDENCY_NEEDED when readability-extractor is not found."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Create HTML source so it doesn't fail on missing HTML
-        create_example_html(tmpdir)
-
-        # Run with empty PATH so binary won't be found
-        env = {'PATH': '/nonexistent', 'HOME': str(tmpdir)}
-
-        result = subprocess.run(
-            [sys.executable, str(READABILITY_HOOK), '--url', TEST_URL, '--snapshot-id', 'test123'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env
-        )
-
-        # Missing binary is a transient error - should exit 1 with no JSONL
-        assert result.returncode == 1, "Should exit 1 when dependency missing"
-
-        # Should NOT emit JSONL (transient error - will be retried)
-        jsonl_lines = [line for line in result.stdout.strip().split('\n')
-                      if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, "Should not emit JSONL for transient error (missing binary)"
-
-        # Should log error to stderr
-        assert 'readability-extractor' in result.stderr.lower() or 'error' in result.stderr.lower(), \
-            "Should report error in stderr"
-
-
-def test_verify_deps_with_abx_pkg():
-    """Verify readability-extractor is available via abx-pkg."""
-    from abx_pkg import Binary, NpmProvider, EnvProvider, BinProviderOverrides
-
-    readability_binary = Binary(
-        name='readability-extractor',
-        binproviders=[NpmProvider(), EnvProvider()],
-        overrides={'npm': {'packages': ['github:ArchiveBox/readability-extractor']}}
-    )
-    readability_loaded = readability_binary.load()
-
-    if readability_loaded and readability_loaded.abspath:
-        assert True, "readability-extractor is available"
-    else:
-        pass
-
-
-def test_extracts_article_after_installation():
-    """Test full workflow: extract article using readability-extractor from real HTML."""
-    # Prerequisites checked by earlier test (install hook should have run)
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Create example.com HTML for readability to process
-        create_example_html(tmpdir)
-
-        # Run readability extraction (should find the binary)
-        result = subprocess.run(
-            [sys.executable, str(READABILITY_HOOK), '--url', TEST_URL, '--snapshot-id', 'test789'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            timeout=30
-        )
-
-        assert result.returncode == 0, f"Extraction failed: {result.stderr}"
-
-        # Parse clean JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                pass
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-        # Verify output files exist (hook writes to current directory)
-        html_file = tmpdir / 'content.html'
-        txt_file = tmpdir / 'content.txt'
-        json_file = tmpdir / 'article.json'
-
-        assert html_file.exists(), "content.html not created"
-        assert txt_file.exists(), "content.txt not created"
-        assert json_file.exists(), "article.json not created"
-
-        # Verify HTML content contains REAL example.com text
-        html_content = html_file.read_text()
-        assert len(html_content) > 100, f"HTML content too short: {len(html_content)} bytes"
-        assert 'example domain' in html_content.lower(), "Missing 'Example Domain' in HTML"
-        assert ('illustrative examples' in html_content.lower() or
-                'use in' in html_content.lower() or
-                'literature' in html_content.lower()), \
-            "Missing example.com description in HTML"
-
-        # Verify text content contains REAL example.com text
-        txt_content = txt_file.read_text()
-        assert len(txt_content) > 50, f"Text content too short: {len(txt_content)} bytes"
-        assert 'example' in txt_content.lower(), "Missing 'example' in text"
-
-        # Verify JSON metadata
-        json_data = json.loads(json_file.read_text())
-        assert isinstance(json_data, dict), "article.json should be a dict"
-
-
-def test_fails_gracefully_without_html_source():
-    """Test that extraction fails gracefully when no HTML source is available."""
-    # Prerequisites checked by earlier test (install hook should have run)
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Don't create any HTML source files
-
-        result = subprocess.run(
-            [sys.executable, str(READABILITY_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            timeout=30
-        )
-
-        assert result.returncode != 0, "Should fail without HTML source"
-        combined_output = result.stdout + result.stderr
-        assert ('no html source' in combined_output.lower() or
-                'not found' in combined_output.lower() or
-                'ERROR=' in combined_output), \
-            "Should report missing HTML source"
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/redirects/config.json b/archivebox/plugins/redirects/config.json
deleted file mode 100644
index 64a8f38b..00000000
--- a/archivebox/plugins/redirects/config.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "required_plugins": ["chrome"],
-  "properties": {
-    "REDIRECTS_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_REDIRECTS", "USE_REDIRECTS"],
-      "description": "Enable redirect chain capture"
-    },
-    "REDIRECTS_TIMEOUT": {
-      "type": "integer",
-      "default": 30,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for redirect capture in seconds"
-    }
-  }
-}
diff --git a/archivebox/plugins/redirects/on_Snapshot__25_redirects.bg.js b/archivebox/plugins/redirects/on_Snapshot__25_redirects.bg.js
deleted file mode 100755
index 96defe1b..00000000
--- a/archivebox/plugins/redirects/on_Snapshot__25_redirects.bg.js
+++ /dev/null
@@ -1,238 +0,0 @@
-#!/usr/bin/env node
-/**
- * Capture redirect chain using CDP during page navigation.
- *
- * This hook sets up CDP listeners BEFORE chrome_navigate to capture the
- * redirect chain from the initial request. It stays alive through navigation
- * and emits JSONL on SIGTERM.
- *
- * Usage: on_Snapshot__25_redirects.bg.js --url=<url> --snapshot-id=<uuid>
- * Output: Writes redirects.jsonl
- */
-
-const fs = require('fs');
-const path = require('path');
-
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-
-const puppeteer = require('puppeteer-core');
-
-// Import shared utilities from chrome_utils.js
-const {
-    getEnvBool,
-    getEnvInt,
-    parseArgs,
-    connectToPage,
-    waitForPageLoaded,
-} = require('../chrome/chrome_utils.js');
-
-const PLUGIN_NAME = 'redirects';
-const OUTPUT_DIR = '.';
-const OUTPUT_FILE = 'redirects.jsonl';
-const CHROME_SESSION_DIR = '../chrome';
-
-// Global state
-let redirectChain = [];
-let originalUrl = '';
-let finalUrl = '';
-let page = null;
-let browser = null;
-let initialRecorded = false;
-
-async function setupRedirectListener() {
-    const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
-    const timeout = getEnvInt('REDIRECTS_TIMEOUT', 30) * 1000;
-
-    fs.writeFileSync(outputPath, ''); // Clear existing
-
-    // Connect to Chrome page using shared utility
-    const connection = await connectToPage({
-        chromeSessionDir: CHROME_SESSION_DIR,
-        timeoutMs: timeout,
-        puppeteer,
-    });
-    browser = connection.browser;
-    page = connection.page;
-
-    // Enable CDP Network domain to capture redirects
-    const client = await page.target().createCDPSession();
-    await client.send('Network.enable');
-
-    // Track redirect chain using CDP
-    client.on('Network.requestWillBeSent', (params) => {
-        const { requestId, request, redirectResponse } = params;
-
-        if (!initialRecorded && request.url && request.url.startsWith('http')) {
-            const initialEntry = {
-                timestamp: new Date().toISOString(),
-                from_url: null,
-                to_url: request.url,
-                status: null,
-                type: 'initial',
-                request_id: requestId,
-            };
-            redirectChain.push(initialEntry);
-            fs.appendFileSync(outputPath, JSON.stringify(initialEntry) + '\n');
-            initialRecorded = true;
-        }
-
-        if (redirectResponse) {
-            // This is a redirect
-            const redirectEntry = {
-                timestamp: new Date().toISOString(),
-                from_url: redirectResponse.url,
-                to_url: request.url,
-                status: redirectResponse.status,
-                type: 'http',
-                request_id: requestId,
-            };
-            redirectChain.push(redirectEntry);
-            fs.appendFileSync(outputPath, JSON.stringify(redirectEntry) + '\n');
-        }
-
-        // Update final URL
-        if (request.url && request.url.startsWith('http')) {
-            finalUrl = request.url;
-        }
-    });
-
-    // After page loads, check for meta refresh and JS redirects
-    page.on('load', async () => {
-        try {
-            // Small delay to let page settle
-            await new Promise(resolve => setTimeout(resolve, 500));
-
-            // Check for meta refresh
-            const metaRefresh = await page.evaluate(() => {
-                const meta = document.querySelector('meta[http-equiv="refresh"]');
-                if (meta) {
-                    const content = meta.getAttribute('content') || '';
-                    const match = content.match(/url=['"]?([^'";\s]+)['"]?/i);
-                    return { content, url: match ? match[1] : null };
-                }
-                return null;
-            });
-
-            if (metaRefresh && metaRefresh.url) {
-                const entry = {
-                    timestamp: new Date().toISOString(),
-                    from_url: page.url(),
-                    to_url: metaRefresh.url,
-                    type: 'meta_refresh',
-                    content: metaRefresh.content,
-                };
-                redirectChain.push(entry);
-                fs.appendFileSync(outputPath, JSON.stringify(entry) + '\n');
-            }
-
-            // Check for JS redirects
-            const jsRedirect = await page.evaluate(() => {
-                const html = document.documentElement.outerHTML;
-                const patterns = [
-                    /window\.location\s*=\s*['"]([^'"]+)['"]/i,
-                    /window\.location\.href\s*=\s*['"]([^'"]+)['"]/i,
-                    /window\.location\.replace\s*\(\s*['"]([^'"]+)['"]\s*\)/i,
-                ];
-                for (const pattern of patterns) {
-                    const match = html.match(pattern);
-                    if (match) return { url: match[1], pattern: pattern.toString() };
-                }
-                return null;
-            });
-
-            if (jsRedirect && jsRedirect.url) {
-                const entry = {
-                    timestamp: new Date().toISOString(),
-                    from_url: page.url(),
-                    to_url: jsRedirect.url,
-                    type: 'javascript',
-                };
-                redirectChain.push(entry);
-                fs.appendFileSync(outputPath, JSON.stringify(entry) + '\n');
-            }
-        } catch (e) {
-            // Ignore errors during meta/js redirect detection
-        }
-    });
-
-    return { browser, page };
-}
-
-function handleShutdown(signal) {
-    console.error(`\nReceived ${signal}, emitting final results...`);
-
-    // Emit final JSONL result to stdout
-    const result = {
-        type: 'ArchiveResult',
-        status: 'succeeded',
-        output_str: OUTPUT_FILE,
-        plugin: PLUGIN_NAME,
-        original_url: originalUrl,
-        final_url: finalUrl || originalUrl,
-        redirect_count: redirectChain.length,
-        is_redirect: redirectChain.length > 0 || (finalUrl && finalUrl !== originalUrl),
-    };
-
-    console.log(JSON.stringify(result));
-    process.exit(0);
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-    const snapshotId = args.snapshot_id;
-
-    if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__25_redirects.bg.js --url=<url> --snapshot-id=<uuid>');
-        process.exit(1);
-    }
-
-    originalUrl = url;
-
-    if (!getEnvBool('REDIRECTS_ENABLED', true)) {
-        console.error('Skipping (REDIRECTS_ENABLED=False)');
-        console.log(JSON.stringify({type: 'ArchiveResult', status: 'skipped', output_str: 'REDIRECTS_ENABLED=False'}));
-        process.exit(0);
-    }
-
-    const timeout = getEnvInt('REDIRECTS_TIMEOUT', 30) * 1000;
-
-    // Register signal handlers for graceful shutdown
-    process.on('SIGTERM', () => handleShutdown('SIGTERM'));
-    process.on('SIGINT', () => handleShutdown('SIGINT'));
-
-    try {
-        // Set up redirect listener BEFORE navigation
-        await setupRedirectListener();
-
-        // Wait for chrome_navigate to complete (non-fatal)
-        try {
-            await waitForPageLoaded(CHROME_SESSION_DIR, timeout * 4, 1000);
-        } catch (e) {
-            console.error(`WARN: ${e.message}`);
-        }
-
-        // Keep process alive until killed by cleanup
-        // console.error('Redirect tracking complete, waiting for cleanup signal...');
-
-        // Keep the process alive indefinitely
-        await new Promise(() => {}); // Never resolves
-
-    } catch (e) {
-        const error = `${e.name}: ${e.message}`;
-        console.error(`ERROR: ${error}`);
-
-        console.log(JSON.stringify({
-            type: 'ArchiveResult',
-            status: 'failed',
-            output_str: error,
-        }));
-        process.exit(1);
-    }
-}
-
-main().catch(e => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
diff --git a/archivebox/plugins/redirects/templates/icon.html b/archivebox/plugins/redirects/templates/icon.html
deleted file mode 100644
index 8f32e981..00000000
--- a/archivebox/plugins/redirects/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--redirects" title="Redirects"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M4 7h11"/><path d="M12 4l3 3-3 3"/><path d="M20 17H9"/><path d="M12 14l-3 3 3 3"/></svg></span>
diff --git a/archivebox/plugins/redirects/tests/test_redirects.py b/archivebox/plugins/redirects/tests/test_redirects.py
deleted file mode 100644
index c26ac273..00000000
--- a/archivebox/plugins/redirects/tests/test_redirects.py
+++ /dev/null
@@ -1,149 +0,0 @@
-"""
-Tests for the redirects plugin.
-
-Tests the real redirects hook with actual URLs to verify
-redirect chain capture.
-"""
-
-import json
-import shutil
-import subprocess
-import sys
-import tempfile
-import time
-from pathlib import Path
-
-import pytest
-from django.test import TestCase
-
-# Import chrome test helpers
-sys.path.insert(0, str(Path(__file__).parent.parent.parent / 'chrome' / 'tests'))
-from chrome_test_helpers import (
-    chrome_session,
-    get_test_env,
-    get_plugin_dir,
-    get_hook_script,
-)
-
-
-def chrome_available() -> bool:
-    """Check if Chrome/Chromium is available."""
-    for name in ['chromium', 'chromium-browser', 'google-chrome', 'chrome']:
-        if shutil.which(name):
-            return True
-    return False
-
-
-# Get the path to the redirects hook
-PLUGIN_DIR = get_plugin_dir(__file__)
-REDIRECTS_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_redirects.*')
-
-
-class TestRedirectsPlugin(TestCase):
-    """Test the redirects plugin."""
-
-    def test_redirects_hook_exists(self):
-        """Redirects hook script should exist."""
-        self.assertIsNotNone(REDIRECTS_HOOK, "Redirects hook not found in plugin directory")
-        self.assertTrue(REDIRECTS_HOOK.exists(), f"Hook not found: {REDIRECTS_HOOK}")
-
-
-class TestRedirectsWithChrome(TestCase):
-    """Integration tests for redirects plugin with Chrome."""
-
-    def setUp(self):
-        """Set up test environment."""
-        self.temp_dir = Path(tempfile.mkdtemp())
-
-    def tearDown(self):
-        """Clean up."""
-        shutil.rmtree(self.temp_dir, ignore_errors=True)
-
-    def test_redirects_captures_navigation(self):
-        """Redirects hook should capture URL navigation without errors."""
-        # Use a URL that doesn't redirect (simple case)
-        test_url = 'https://example.com'
-        snapshot_id = 'test-redirects-snapshot'
-
-        try:
-            with chrome_session(
-                self.temp_dir,
-                crawl_id='test-redirects-crawl',
-                snapshot_id=snapshot_id,
-                test_url=test_url,
-                navigate=True,
-                timeout=30,
-            ) as (chrome_process, chrome_pid, snapshot_chrome_dir, env):
-                # Use the environment from chrome_session (already has CHROME_HEADLESS=true)
-
-
-                # Run redirects hook with the active Chrome session (background hook)
-                result = subprocess.Popen(
-                    ['node', str(REDIRECTS_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                    cwd=str(snapshot_chrome_dir),
-                    stdout=subprocess.PIPE,
-                    stderr=subprocess.PIPE,
-                    text=True,
-                    env=env
-                )
-
-                # Check for output file
-                redirects_output = snapshot_chrome_dir / 'redirects.jsonl'
-
-                redirects_data = None
-
-                # Wait briefly for background hook to write output
-                for _ in range(10):
-                    if redirects_output.exists() and redirects_output.stat().st_size > 0:
-                        break
-                    time.sleep(1)
-
-                # Try parsing from file first
-                if redirects_output.exists():
-                    with open(redirects_output) as f:
-                        for line in f:
-                            line = line.strip()
-                            if line.startswith('{'):
-                                try:
-                                    redirects_data = json.loads(line)
-                                    break
-                                except json.JSONDecodeError:
-                                    continue
-
-                # Try parsing from stdout if not in file
-                if not redirects_data:
-                    try:
-                        stdout, stderr = result.communicate(timeout=5)
-                    except subprocess.TimeoutExpired:
-                        stdout, stderr = "", ""
-                    for line in stdout.split('\n'):
-                        line = line.strip()
-                        if line.startswith('{'):
-                            try:
-                                record = json.loads(line)
-                                if 'chain' in record or 'redirects' in record or record.get('type') == 'Redirects':
-                                    redirects_data = record
-                                    break
-                            except json.JSONDecodeError:
-                                continue
-
-                # Verify hook ran successfully
-                # example.com typically doesn't redirect, so we just verify no errors
-                if result.poll() is None:
-                    result.terminate()
-                    try:
-                        stdout, stderr = result.communicate(timeout=5)
-                    except subprocess.TimeoutExpired:
-                        result.kill()
-                        stdout, stderr = result.communicate()
-                else:
-                    stdout, stderr = result.communicate()
-                self.assertNotIn('Traceback', stderr)
-                self.assertNotIn('Error:', stderr)
-
-        except RuntimeError:
-            raise
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/responses/config.json b/archivebox/plugins/responses/config.json
deleted file mode 100644
index 5849fbb9..00000000
--- a/archivebox/plugins/responses/config.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "required_plugins": ["chrome"],
-  "properties": {
-    "RESPONSES_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_RESPONSES", "USE_RESPONSES"],
-      "description": "Enable HTTP response capture"
-    },
-    "RESPONSES_TIMEOUT": {
-      "type": "integer",
-      "default": 30,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for response capture in seconds"
-    }
-  }
-}
diff --git a/archivebox/plugins/responses/on_Snapshot__24_responses.bg.js b/archivebox/plugins/responses/on_Snapshot__24_responses.bg.js
deleted file mode 100755
index 7f4587c1..00000000
--- a/archivebox/plugins/responses/on_Snapshot__24_responses.bg.js
+++ /dev/null
@@ -1,302 +0,0 @@
-#!/usr/bin/env node
-/**
- * Archive all network responses during page load.
- *
- * This hook sets up CDP listeners BEFORE chrome_navigate loads the page,
- * then waits for navigation to complete. The listeners capture all network
- * responses during the navigation.
- *
- * Usage: on_Snapshot__24_responses.js --url=<url> --snapshot-id=<uuid>
- * Output: Creates responses/ directory with index.jsonl
- */
-
-const fs = require('fs');
-const path = require('path');
-const crypto = require('crypto');
-
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-
-const puppeteer = require('puppeteer-core');
-
-// Import shared utilities from chrome_utils.js
-const {
-    getEnv,
-    getEnvBool,
-    getEnvInt,
-    parseArgs,
-    connectToPage,
-    waitForPageLoaded,
-} = require('../chrome/chrome_utils.js');
-
-const PLUGIN_NAME = 'responses';
-const OUTPUT_DIR = '.';
-const CHROME_SESSION_DIR = '../chrome';
-
-let browser = null;
-let page = null;
-let responseCount = 0;
-let shuttingDown = false;
-
-// Resource types to capture (by default, capture everything)
-const DEFAULT_TYPES = ['document', 'script', 'stylesheet', 'font', 'image', 'media', 'xhr', 'websocket'];
-
-function getExtensionFromMimeType(mimeType) {
-    const mimeMap = {
-        'text/html': 'html',
-        'text/css': 'css',
-        'text/javascript': 'js',
-        'application/javascript': 'js',
-        'application/x-javascript': 'js',
-        'application/json': 'json',
-        'application/xml': 'xml',
-        'text/xml': 'xml',
-        'image/png': 'png',
-        'image/jpeg': 'jpg',
-        'image/gif': 'gif',
-        'image/svg+xml': 'svg',
-        'image/webp': 'webp',
-        'font/woff': 'woff',
-        'font/woff2': 'woff2',
-        'font/ttf': 'ttf',
-        'font/otf': 'otf',
-        'application/font-woff': 'woff',
-        'application/font-woff2': 'woff2',
-        'video/mp4': 'mp4',
-        'video/webm': 'webm',
-        'audio/mpeg': 'mp3',
-        'audio/ogg': 'ogg',
-    };
-
-    const mimeBase = (mimeType || '').split(';')[0].trim().toLowerCase();
-    return mimeMap[mimeBase] || '';
-}
-
-function getExtensionFromUrl(url) {
-    try {
-        const pathname = new URL(url).pathname;
-        const match = pathname.match(/\.([a-z0-9]+)$/i);
-        return match ? match[1].toLowerCase() : '';
-    } catch (e) {
-        return '';
-    }
-}
-
-function sanitizeFilename(str, maxLen = 200) {
-    return str
-        .replace(/[^a-zA-Z0-9._-]/g, '_')
-        .slice(0, maxLen);
-}
-
-async function createSymlink(target, linkPath) {
-    try {
-        const dir = path.dirname(linkPath);
-        if (!fs.existsSync(dir)) {
-            fs.mkdirSync(dir, { recursive: true });
-        }
-
-        if (fs.existsSync(linkPath)) {
-            fs.unlinkSync(linkPath);
-        }
-
-        const relativePath = path.relative(dir, target);
-        fs.symlinkSync(relativePath, linkPath);
-    } catch (e) {
-        // Ignore symlink errors
-    }
-}
-
-async function setupListener() {
-    const timeout = getEnvInt('RESPONSES_TIMEOUT', 30) * 1000;
-    const typesStr = getEnv('RESPONSES_TYPES', DEFAULT_TYPES.join(','));
-    const typesToSave = typesStr.split(',').map(t => t.trim().toLowerCase());
-
-    // Create subdirectories
-    const allDir = path.join(OUTPUT_DIR, 'all');
-    if (!fs.existsSync(allDir)) {
-        fs.mkdirSync(allDir, { recursive: true });
-    }
-
-    const indexPath = path.join(OUTPUT_DIR, 'index.jsonl');
-    fs.writeFileSync(indexPath, '');
-
-    // Connect to Chrome page using shared utility
-    const { browser, page } = await connectToPage({
-        chromeSessionDir: CHROME_SESSION_DIR,
-        timeoutMs: timeout,
-        puppeteer,
-    });
-
-    // Set up response listener
-    page.on('response', async (response) => {
-        try {
-            const request = response.request();
-            const url = response.url();
-            const resourceType = request.resourceType().toLowerCase();
-            const method = request.method();
-            const status = response.status();
-
-            // Skip redirects and errors
-            if (status >= 300 && status < 400) return;
-            if (status >= 400 && status < 600) return;
-
-            // Check if we should save this resource type
-            if (typesToSave.length && !typesToSave.includes(resourceType)) {
-                return;
-            }
-
-            // Get response body
-            let bodyBuffer = null;
-            try {
-                bodyBuffer = await response.buffer();
-            } catch (e) {
-                return;
-            }
-
-            if (!bodyBuffer || bodyBuffer.length === 0) {
-                return;
-            }
-
-            // Determine file extension
-            const mimeType = response.headers()['content-type'] || '';
-            let extension = getExtensionFromMimeType(mimeType) || getExtensionFromUrl(url);
-
-            // Create timestamp-based unique filename
-            const timestamp = new Date().toISOString().replace(/[-:]/g, '').replace(/\..+/, '');
-            const urlHash = sanitizeFilename(encodeURIComponent(url).slice(0, 64));
-            const uniqueFilename = `${timestamp}__${method}__${urlHash}${extension ? '.' + extension : ''}`;
-            const uniquePath = path.join(allDir, uniqueFilename);
-
-            // Save to unique file
-            fs.writeFileSync(uniquePath, bodyBuffer);
-
-            // Create URL-organized symlink
-            try {
-                const urlObj = new URL(url);
-                const hostname = urlObj.hostname;
-                const pathname = urlObj.pathname || '/';
-                const filename = path.basename(pathname) || 'index' + (extension ? '.' + extension : '');
-                const dirPathRaw = path.dirname(pathname);
-                const dirPath = dirPathRaw === '.' ? '' : dirPathRaw.replace(/^\/+/, '');
-
-                const symlinkDir = path.join(OUTPUT_DIR, resourceType, hostname, dirPath);
-                const symlinkPath = path.join(symlinkDir, filename);
-                await createSymlink(uniquePath, symlinkPath);
-
-                // Also create a site-style symlink without resource type for easy browsing
-                const siteDir = path.join(OUTPUT_DIR, hostname, dirPath);
-                const sitePath = path.join(siteDir, filename);
-                await createSymlink(uniquePath, sitePath);
-            } catch (e) {
-                // URL parsing or symlink creation failed, skip
-            }
-
-            // Calculate SHA256
-            const sha256 = crypto.createHash('sha256').update(bodyBuffer).digest('hex');
-            const urlSha256 = crypto.createHash('sha256').update(url).digest('hex');
-
-            // Write to index
-            const indexEntry = {
-                ts: timestamp,
-                method,
-                url: method === 'DATA' ? url.slice(0, 128) : url,
-                urlSha256,
-                status,
-                resourceType,
-                mimeType: mimeType.split(';')[0],
-                responseSha256: sha256,
-                path: './' + path.relative(OUTPUT_DIR, uniquePath),
-                extension,
-            };
-
-            fs.appendFileSync(indexPath, JSON.stringify(indexEntry) + '\n');
-            responseCount += 1;
-
-        } catch (e) {
-            // Ignore errors
-        }
-    });
-
-    return { browser, page };
-}
-
-function emitResult(status = 'succeeded') {
-    if (shuttingDown) return;
-    shuttingDown = true;
-
-    const outputStr = responseCount > 0
-        ? `responses/ (${responseCount} responses)`
-        : 'responses/';
-    console.log(JSON.stringify({
-        type: 'ArchiveResult',
-        status,
-        output_str: outputStr,
-    }));
-}
-
-async function handleShutdown(signal) {
-    console.error(`\nReceived ${signal}, emitting final results...`);
-    emitResult('succeeded');
-    if (browser) {
-        try {
-            browser.disconnect();
-        } catch (e) {}
-    }
-    process.exit(0);
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-    const snapshotId = args.snapshot_id;
-
-    if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__24_responses.js --url=<url> --snapshot-id=<uuid>');
-        process.exit(1);
-    }
-
-    if (!getEnvBool('RESPONSES_ENABLED', true)) {
-        console.error('Skipping (RESPONSES_ENABLED=False)');
-        console.log(JSON.stringify({type: 'ArchiveResult', status: 'skipped', output_str: 'RESPONSES_ENABLED=False'}));
-        process.exit(0);
-    }
-
-    try {
-        // Set up listener BEFORE navigation
-        const connection = await setupListener();
-        browser = connection.browser;
-        page = connection.page;
-
-        // Register signal handlers for graceful shutdown
-        process.on('SIGTERM', () => handleShutdown('SIGTERM'));
-        process.on('SIGINT', () => handleShutdown('SIGINT'));
-
-        // Wait for chrome_navigate to complete (non-fatal)
-        try {
-            const timeout = getEnvInt('RESPONSES_TIMEOUT', 30) * 1000;
-            await waitForPageLoaded(CHROME_SESSION_DIR, timeout * 4, 1000);
-        } catch (e) {
-            console.error(`WARN: ${e.message}`);
-        }
-
-        // console.error('Responses listener active, waiting for cleanup signal...');
-        await new Promise(() => {}); // Keep alive until SIGTERM
-        return;
-
-    } catch (e) {
-        const error = `${e.name}: ${e.message}`;
-        console.error(`ERROR: ${error}`);
-
-        console.log(JSON.stringify({
-            type: 'ArchiveResult',
-            status: 'failed',
-            output_str: error,
-        }));
-        process.exit(1);
-    }
-}
-
-main().catch(e => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
diff --git a/archivebox/plugins/responses/templates/icon.html b/archivebox/plugins/responses/templates/icon.html
deleted file mode 100644
index 51210acb..00000000
--- a/archivebox/plugins/responses/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--responses" title="Responses"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><rect x="4" y="4.5" width="16" height="6" rx="2"/><rect x="4" y="13.5" width="16" height="6" rx="2"/><circle cx="8" cy="7.5" r="1" fill="currentColor" stroke="none"/><circle cx="8" cy="16.5" r="1" fill="currentColor" stroke="none"/></svg></span>
diff --git a/archivebox/plugins/responses/tests/test_responses.py b/archivebox/plugins/responses/tests/test_responses.py
deleted file mode 100644
index b6404dcd..00000000
--- a/archivebox/plugins/responses/tests/test_responses.py
+++ /dev/null
@@ -1,127 +0,0 @@
-"""
-Tests for the responses plugin.
-
-Tests the real responses hook with an actual URL to verify
-network response capture.
-"""
-
-import json
-import shutil
-import subprocess
-import sys
-import tempfile
-import time
-from pathlib import Path
-
-from django.test import TestCase
-
-# Import chrome test helpers
-sys.path.insert(0, str(Path(__file__).parent.parent.parent / 'chrome' / 'tests'))
-from chrome_test_helpers import (
-    chrome_session,
-    CHROME_NAVIGATE_HOOK,
-    get_plugin_dir,
-    get_hook_script,
-)
-
-
-# Get the path to the responses hook
-PLUGIN_DIR = get_plugin_dir(__file__)
-RESPONSES_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_responses.*')
-
-
-class TestResponsesPlugin(TestCase):
-    """Test the responses plugin."""
-
-    def test_responses_hook_exists(self):
-        """Responses hook script should exist."""
-        self.assertIsNotNone(RESPONSES_HOOK, "Responses hook not found in plugin directory")
-        self.assertTrue(RESPONSES_HOOK.exists(), f"Hook not found: {RESPONSES_HOOK}")
-
-
-class TestResponsesWithChrome(TestCase):
-    """Integration tests for responses plugin with Chrome."""
-
-    def setUp(self):
-        """Set up test environment."""
-        self.temp_dir = Path(tempfile.mkdtemp())
-
-    def tearDown(self):
-        """Clean up."""
-        shutil.rmtree(self.temp_dir, ignore_errors=True)
-
-    def test_responses_captures_network_responses(self):
-        """Responses hook should capture network responses from page load."""
-        test_url = 'https://example.com'
-        snapshot_id = 'test-responses-snapshot'
-
-        with chrome_session(
-            self.temp_dir,
-            crawl_id='test-responses-crawl',
-            snapshot_id=snapshot_id,
-            test_url=test_url,
-            navigate=False,
-            timeout=30,
-        ) as (chrome_process, chrome_pid, snapshot_chrome_dir, env):
-            responses_dir = snapshot_chrome_dir.parent / 'responses'
-            responses_dir.mkdir(exist_ok=True)
-
-            # Run responses hook with the active Chrome session (background hook)
-            result = subprocess.Popen(
-                ['node', str(RESPONSES_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                cwd=str(responses_dir),
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE,
-                text=True,
-                env=env
-            )
-
-            nav_result = subprocess.run(
-                ['node', str(CHROME_NAVIGATE_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                cwd=str(snapshot_chrome_dir),
-                capture_output=True,
-                text=True,
-                timeout=120,
-                env=env
-            )
-            self.assertEqual(nav_result.returncode, 0, f"Navigation failed: {nav_result.stderr}")
-
-            # Check for output directory and index file
-            index_output = responses_dir / 'index.jsonl'
-
-            # Wait briefly for background hook to write output
-            for _ in range(30):
-                if index_output.exists() and index_output.stat().st_size > 0:
-                    break
-                time.sleep(1)
-
-            # Verify hook ran (may keep running waiting for cleanup signal)
-            if result.poll() is None:
-                result.terminate()
-                try:
-                    stdout, stderr = result.communicate(timeout=5)
-                except subprocess.TimeoutExpired:
-                    result.kill()
-                    stdout, stderr = result.communicate()
-            else:
-                stdout, stderr = result.communicate()
-            self.assertNotIn('Traceback', stderr)
-
-            # If index file exists, verify it's valid JSONL
-            if index_output.exists():
-                with open(index_output) as f:
-                    content = f.read().strip()
-                    self.assertTrue(content, "Responses output should not be empty")
-                    for line in content.split('\n'):
-                        if line.strip():
-                            try:
-                                record = json.loads(line)
-                                # Verify structure
-                                self.assertIn('url', record)
-                                self.assertIn('resourceType', record)
-                            except json.JSONDecodeError:
-                                pass  # Some lines may be incomplete
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/screenshot/config.json b/archivebox/plugins/screenshot/config.json
deleted file mode 100644
index 48fae845..00000000
--- a/archivebox/plugins/screenshot/config.json
+++ /dev/null
@@ -1,28 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "required_plugins": ["chrome"],
-  "properties": {
-    "SCREENSHOT_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_SCREENSHOT", "USE_SCREENSHOT"],
-      "description": "Enable screenshot capture"
-    },
-    "SCREENSHOT_TIMEOUT": {
-      "type": "integer",
-      "default": 60,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for screenshot capture in seconds"
-    },
-    "SCREENSHOT_RESOLUTION": {
-      "type": "string",
-      "default": "1440,2000",
-      "pattern": "^\\d+,\\d+$",
-      "x-fallback": "RESOLUTION",
-      "description": "Screenshot resolution (width,height)"
-    }
-  }
-}
diff --git a/archivebox/plugins/screenshot/on_Snapshot__51_screenshot.js b/archivebox/plugins/screenshot/on_Snapshot__51_screenshot.js
deleted file mode 100644
index 34cd7a44..00000000
--- a/archivebox/plugins/screenshot/on_Snapshot__51_screenshot.js
+++ /dev/null
@@ -1,179 +0,0 @@
-#!/usr/bin/env node
-/**
- * Take a screenshot of a URL using an existing Chrome session.
- *
- * Requires chrome plugin to have already created a Chrome session.
- * Connects to the existing session via CDP and takes a screenshot.
- *
- * Usage: on_Snapshot__51_screenshot.js --url=<url> --snapshot-id=<uuid>
- * Output: Writes screenshot/screenshot.png
- *
- * Environment variables:
- *     SCREENSHOT_ENABLED: Enable screenshot capture (default: true)
- */
-
-const fs = require('fs');
-const path = require('path');
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-
-// Flush V8 coverage before exiting (for NODE_V8_COVERAGE support)
-function flushCoverageAndExit(exitCode) {
-    if (process.env.NODE_V8_COVERAGE) {
-        try {
-            const v8 = require('v8');
-            v8.takeCoverage();
-        } catch (e) {
-            // Ignore errors during coverage flush
-        }
-    }
-    process.exit(exitCode);
-}
-
-const {
-    getEnv,
-    getEnvBool,
-    parseArgs,
-    connectToPage,
-    waitForPageLoaded,
-    readTargetId,
-} = require('../chrome/chrome_utils.js');
-
-// Check if screenshot is enabled BEFORE requiring puppeteer
-if (!getEnvBool('SCREENSHOT_ENABLED', true)) {
-    console.error('Skipping screenshot (SCREENSHOT_ENABLED=False)');
-    // Temporary failure (config disabled) - NO JSONL emission
-    flushCoverageAndExit(0);
-}
-
-// Now safe to require puppeteer
-const puppeteer = require('puppeteer-core');
-
-// Extractor metadata
-const PLUGIN_NAME = 'screenshot';
-const OUTPUT_DIR = '.';
-const OUTPUT_FILE = 'screenshot.png';
-const CHROME_SESSION_DIR = '../chrome';
-
-// Check if staticfile extractor already downloaded this URL
-const STATICFILE_DIR = '../staticfile';
-function hasStaticFileOutput() {
-    if (!fs.existsSync(STATICFILE_DIR)) return false;
-    const stdoutPath = path.join(STATICFILE_DIR, 'stdout.log');
-    if (!fs.existsSync(stdoutPath)) return false;
-    const stdout = fs.readFileSync(stdoutPath, 'utf8');
-    for (const line of stdout.split('\n')) {
-        const trimmed = line.trim();
-        if (!trimmed.startsWith('{')) continue;
-        try {
-            const record = JSON.parse(trimmed);
-            if (record.type === 'ArchiveResult' && record.status === 'succeeded') {
-                return true;
-            }
-        } catch (e) {}
-    }
-    return false;
-}
-
-async function takeScreenshot(url) {
-    // Output directory is current directory (hook already runs in output dir)
-    const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
-
-    // Wait for chrome_navigate to complete (writes navigation.json)
-    const timeoutSeconds = parseInt(getEnv('SCREENSHOT_TIMEOUT', '10'), 10);
-    const timeoutMs = timeoutSeconds * 1000;
-    const navigationFile = path.join(CHROME_SESSION_DIR, 'navigation.json');
-    if (!fs.existsSync(navigationFile)) {
-        await waitForPageLoaded(CHROME_SESSION_DIR, timeoutMs);
-    }
-
-    const cdpFile = path.join(CHROME_SESSION_DIR, 'cdp_url.txt');
-    const targetFile = path.join(CHROME_SESSION_DIR, 'target_id.txt');
-    if (!fs.existsSync(cdpFile)) {
-        throw new Error('No Chrome session found (chrome plugin must run first)');
-    }
-    if (!fs.existsSync(targetFile)) {
-        throw new Error('No target_id.txt found (chrome_tab must run first)');
-    }
-    const cdpUrl = fs.readFileSync(cdpFile, 'utf8').trim();
-    if (!cdpUrl.startsWith('ws://') && !cdpUrl.startsWith('wss://')) {
-        throw new Error('Invalid CDP URL in cdp_url.txt');
-    }
-
-    const { browser, page } = await connectToPage({
-        chromeSessionDir: CHROME_SESSION_DIR,
-        timeoutMs,
-        puppeteer,
-    });
-
-    try {
-        const expectedTargetId = readTargetId(CHROME_SESSION_DIR);
-        if (!expectedTargetId) {
-            throw new Error('No target_id.txt found (chrome_tab must run first)');
-        }
-        const actualTargetId = page.target()._targetId;
-        if (actualTargetId !== expectedTargetId) {
-            throw new Error(`Target ${expectedTargetId} not found in Chrome session`);
-        }
-
-        const captureTimeoutMs = Math.max(timeoutMs, 10000);
-        const timeoutPromise = new Promise((_, reject) => {
-            setTimeout(() => reject(new Error('Screenshot capture timed out')), captureTimeoutMs);
-        });
-
-        await page.bringToFront();
-        await Promise.race([
-            page.screenshot({ path: outputPath, fullPage: true }),
-            timeoutPromise,
-        ]);
-
-        return outputPath;
-
-    } finally {
-        // Disconnect from browser (don't close it - we're connected to a shared session)
-        // The chrome_launch hook manages the browser lifecycle
-        await browser.disconnect();
-    }
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-    const snapshotId = args.snapshot_id;
-
-    if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__51_screenshot.js --url=<url> --snapshot-id=<uuid>');
-        flushCoverageAndExit(1);
-    }
-
-    // Check if staticfile extractor already handled this (permanent skip)
-    if (hasStaticFileOutput()) {
-        console.error(`Skipping screenshot - staticfile extractor already downloaded this`);
-        // Permanent skip - emit ArchiveResult
-        console.log(JSON.stringify({
-            type: 'ArchiveResult',
-            status: 'skipped',
-            output_str: 'staticfile already handled',
-        }));
-        flushCoverageAndExit(0);
-    }
-
-    // Take screenshot (throws on error)
-    const outputPath = await takeScreenshot(url);
-
-    // Success - emit ArchiveResult
-    const size = fs.statSync(outputPath).size;
-    console.error(`Screenshot saved (${size} bytes)`);
-    console.log(JSON.stringify({
-        type: 'ArchiveResult',
-        status: 'succeeded',
-        output_str: outputPath,
-    }));
-    flushCoverageAndExit(0);
-}
-
-main().catch(e => {
-    // Transient error - emit NO JSONL
-    console.error(`ERROR: ${e.message}`);
-    flushCoverageAndExit(1);
-});
diff --git a/archivebox/plugins/screenshot/templates/card.html b/archivebox/plugins/screenshot/templates/card.html
deleted file mode 100644
index 83cc2adc..00000000
--- a/archivebox/plugins/screenshot/templates/card.html
+++ /dev/null
@@ -1,8 +0,0 @@
-<!-- Screenshot thumbnail - shows the captured screenshot image -->
-<img src="{{ output_path }}"
-     alt="Screenshot of page"
-     class="extractor-thumbnail screenshot-thumbnail"
-     style="width: 100%; height: 100px; object-fit: cover; object-position: top center; background: #333; transform: scale(1.05); transform-origin: top center;"
-     loading="lazy"
-     onerror="this.style.display='none'; this.nextElementSibling.style.display='block';">
-<div style="display: none; text-align: center; padding: 20px; color: #999;">📷 Screenshot</div>
diff --git a/archivebox/plugins/screenshot/templates/full.html b/archivebox/plugins/screenshot/templates/full.html
deleted file mode 100644
index 62226828..00000000
--- a/archivebox/plugins/screenshot/templates/full.html
+++ /dev/null
@@ -1,7 +0,0 @@
-<!-- Screenshot fullscreen - full-width image with vertical scroll -->
-<div style="width: 100%; min-height: 100vh; overflow: auto; background: #222; padding: 0; box-sizing: border-box; display: flex; justify-content: center; align-items: flex-start;">
-    <img src="{{ output_path }}"
-         alt="Screenshot of page"
-         class="extractor-fullscreen screenshot-fullscreen"
-         style="width: auto; max-width: 100%; height: auto; display: block;">
-</div>
diff --git a/archivebox/plugins/screenshot/templates/icon.html b/archivebox/plugins/screenshot/templates/icon.html
deleted file mode 100644
index 4236aee3..00000000
--- a/archivebox/plugins/screenshot/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--screenshot" title="Screenshot"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><rect x="3" y="6" width="18" height="12" rx="2"/><circle cx="12" cy="12" r="3"/><path d="M8 6l1.5-2h5L16 6"/></svg></span>
diff --git a/archivebox/plugins/screenshot/tests/test_screenshot.py b/archivebox/plugins/screenshot/tests/test_screenshot.py
deleted file mode 100644
index ddc466d3..00000000
--- a/archivebox/plugins/screenshot/tests/test_screenshot.py
+++ /dev/null
@@ -1,454 +0,0 @@
-"""
-Integration tests for screenshot plugin
-
-Tests verify:
-1. Hook script exists
-2. Dependencies installed via chrome validation hooks
-3. Verify deps with abx-pkg
-4. Screenshot extraction works on https://example.com
-5. JSONL output is correct
-6. Filesystem output is valid PNG image
-7. Config options work
-"""
-
-import json
-import os
-import subprocess
-import sys
-import tempfile
-from pathlib import Path
-
-import pytest
-
-from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-    get_test_env,
-    get_plugin_dir,
-    get_hook_script,
-    run_hook_and_parse,
-    chrome_session,
-    LIB_DIR,
-    NODE_MODULES_DIR,
-    CHROME_PLUGIN_DIR,
-)
-
-# Import chrome test fixture to ensure puppeteer is installed
-from archivebox.plugins.chrome.tests.test_chrome import ensure_chromium_and_puppeteer_installed
-
-
-PLUGIN_DIR = get_plugin_dir(__file__)
-SCREENSHOT_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_screenshot.*')
-
-# Get Chrome hooks for setting up sessions
-CHROME_LAUNCH_HOOK = get_hook_script(CHROME_PLUGIN_DIR, 'on_Crawl__*_chrome_launch.*')
-CHROME_TAB_HOOK = get_hook_script(CHROME_PLUGIN_DIR, 'on_Snapshot__*_chrome_tab.*')
-CHROME_NAVIGATE_HOOK = get_hook_script(CHROME_PLUGIN_DIR, 'on_Snapshot__*_chrome_navigate.*')
-
-TEST_URL = 'https://example.com'
-
-
-def test_hook_script_exists():
-    """Verify on_Snapshot hook exists."""
-    assert SCREENSHOT_HOOK.exists(), f"Hook not found: {SCREENSHOT_HOOK}"
-
-
-def test_verify_deps_with_abx_pkg():
-    """Verify dependencies are available via abx-pkg after hook installation."""
-    from abx_pkg import Binary, EnvProvider, BinProviderOverrides
-
-    EnvProvider.model_rebuild()
-
-    # Verify node is available
-    node_binary = Binary(name='node', binproviders=[EnvProvider()])
-    node_loaded = node_binary.load()
-    assert node_loaded and node_loaded.abspath, "Node.js required for screenshot plugin"
-
-
-def test_screenshot_with_chrome_session():
-    """Test multiple screenshot scenarios with one Chrome session to save time."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        test_url = 'https://example.com'
-        snapshot_id = 'test-screenshot-snap'
-
-        try:
-            with chrome_session(
-                Path(tmpdir),
-                crawl_id='test-screenshot-crawl',
-                snapshot_id=snapshot_id,
-                test_url=test_url,
-                navigate=True,
-                timeout=30,
-            ) as (chrome_process, chrome_pid, snapshot_chrome_dir, env):
-
-                # Scenario 1: Basic screenshot extraction
-                screenshot_dir = snapshot_chrome_dir.parent / 'screenshot'
-                screenshot_dir.mkdir()
-
-                result = subprocess.run(
-                    ['node', str(SCREENSHOT_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                    cwd=str(screenshot_dir),
-                    capture_output=True,
-                    text=True,
-                    timeout=30,
-                    env=env
-                )
-
-                assert result.returncode == 0, f"Screenshot extraction failed:\nStderr: {result.stderr}"
-
-                # Parse JSONL output
-                result_json = None
-                for line in result.stdout.strip().split('\n'):
-                    line = line.strip()
-                    if line.startswith('{'):
-                        try:
-                            record = json.loads(line)
-                            if record.get('type') == 'ArchiveResult':
-                                result_json = record
-                                break
-                        except json.JSONDecodeError:
-                            pass
-
-                assert result_json and result_json['status'] == 'succeeded'
-                screenshot_file = screenshot_dir / 'screenshot.png'
-                assert screenshot_file.exists() and screenshot_file.stat().st_size > 1000
-                assert screenshot_file.read_bytes()[:8] == b'\x89PNG\r\n\x1a\n'
-
-                # Scenario 2: Wrong target ID (error case)
-                screenshot_dir3 = snapshot_chrome_dir.parent / 'screenshot3'
-                screenshot_dir3.mkdir()
-                (snapshot_chrome_dir / 'target_id.txt').write_text('nonexistent-target-id')
-
-                result = subprocess.run(
-                    ['node', str(SCREENSHOT_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                    cwd=str(screenshot_dir3),
-                    capture_output=True,
-                    text=True,
-                    timeout=5,
-                    env=env
-                )
-
-                assert result.returncode != 0
-                assert 'target' in result.stderr.lower() and 'not found' in result.stderr.lower()
-
-        except RuntimeError:
-            raise
-
-
-def test_skips_when_staticfile_exists():
-    """Test that screenshot skips when staticfile extractor already handled the URL."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        data_dir = Path(tmpdir)
-        snapshot_dir = data_dir / 'users' / 'testuser' / 'snapshots' / '20240101' / 'example.com' / 'snap-skip'
-        screenshot_dir = snapshot_dir / 'screenshot'
-        screenshot_dir.mkdir(parents=True)
-
-        # Create staticfile output to simulate staticfile extractor already ran
-        staticfile_dir = snapshot_dir / 'staticfile'
-        staticfile_dir.mkdir()
-        (staticfile_dir / 'stdout.log').write_text('{"type":"ArchiveResult","status":"succeeded","output_str":"index.html"}\n')
-
-        env = get_test_env()
-        result = subprocess.run(
-            ['node', str(SCREENSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-skip'],
-            cwd=str(screenshot_dir),
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=env
-        )
-
-        assert result.returncode == 0, f"Should exit successfully: {result.stderr}"
-
-        # Should emit skipped status
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'skipped', f"Should skip: {result_json}"
-
-
-def test_config_save_screenshot_false_skips():
-    """Test that SCREENSHOT_ENABLED=False exits without emitting JSONL."""
-    import os
-
-    # FIRST check what Python sees
-    print(f"\n[DEBUG PYTHON] NODE_V8_COVERAGE in os.environ: {'NODE_V8_COVERAGE' in os.environ}")
-    print(f"[DEBUG PYTHON] Value: {os.environ.get('NODE_V8_COVERAGE', 'NOT SET')}")
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-        env = os.environ.copy()
-        env['SCREENSHOT_ENABLED'] = 'False'
-
-        # Check what's in the copied env
-        print(f"[DEBUG ENV COPY] NODE_V8_COVERAGE in env: {'NODE_V8_COVERAGE' in env}")
-        print(f"[DEBUG ENV COPY] Value: {env.get('NODE_V8_COVERAGE', 'NOT SET')}")
-
-        result = subprocess.run(
-            ['node', str(SCREENSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=test999'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=30
-        )
-
-        print(f"[DEBUG RESULT] Exit code: {result.returncode}")
-        print(f"[DEBUG RESULT] Stderr: {result.stderr[:200]}")
-
-        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
-
-        # Feature disabled - temporary failure, should NOT emit JSONL
-        assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
-
-        # Should NOT emit any JSONL
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
-
-
-def test_reports_missing_chrome():
-    """Test that script reports error when Chrome is not found."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Set CHROME_BINARY to nonexistent path
-        env = get_test_env()
-        env['CHROME_BINARY'] = '/nonexistent/chrome'
-
-        result = subprocess.run(
-            ['node', str(SCREENSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=test123'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=30
-        )
-
-        # Should fail and report missing Chrome
-        if result.returncode != 0:
-            combined = result.stdout + result.stderr
-            assert 'chrome' in combined.lower() or 'browser' in combined.lower() or 'ERROR=' in combined
-
-
-def test_waits_for_navigation_timeout():
-    """Test that screenshot waits for navigation.json and times out quickly if missing."""
-    import time
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Create chrome directory without navigation.json to trigger timeout
-        chrome_dir = tmpdir.parent / 'chrome'
-        chrome_dir.mkdir(parents=True, exist_ok=True)
-        (chrome_dir / 'cdp_url.txt').write_text('ws://localhost:9222/devtools/browser/test')
-        (chrome_dir / 'target_id.txt').write_text('test-target-id')
-        # Intentionally NOT creating navigation.json to test timeout
-
-        screenshot_dir = tmpdir / 'screenshot'
-        screenshot_dir.mkdir()
-
-        env = get_test_env()
-        env['SCREENSHOT_TIMEOUT'] = '2'  # Set 2 second timeout
-
-        start_time = time.time()
-        result = subprocess.run(
-            ['node', str(SCREENSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=test-timeout'],
-            cwd=str(screenshot_dir),
-            capture_output=True,
-            text=True,
-            timeout=5,  # Test timeout slightly higher than SCREENSHOT_TIMEOUT
-            env=env
-        )
-        elapsed = time.time() - start_time
-
-        # Should fail when navigation.json doesn't appear
-        assert result.returncode != 0, "Should fail when navigation.json missing"
-        assert 'not loaded' in result.stderr.lower() or 'navigate' in result.stderr.lower(), f"Should mention navigation timeout: {result.stderr}"
-        # Should complete within 3s (2s wait + 1s overhead)
-        assert elapsed < 3, f"Should timeout within 3s, took {elapsed:.1f}s"
-
-
-def test_config_timeout_honored():
-    """Test that CHROME_TIMEOUT config is respected."""
-    import os
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Set very short timeout
-        env = os.environ.copy()
-        env['CHROME_TIMEOUT'] = '5'
-
-        result = subprocess.run(
-            ['node', str(SCREENSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=testtimeout'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=30
-        )
-
-        # Should complete (success or fail, but not hang)
-        assert result.returncode in (0, 1), "Should complete without hanging"
-
-
-def test_missing_url_argument():
-    """Test that hook fails gracefully when URL argument is missing."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        env = get_test_env()
-        result = subprocess.run(
-            ['node', str(SCREENSHOT_HOOK), '--snapshot-id=test-missing-url'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=env
-        )
-
-        # Should exit with error
-        assert result.returncode != 0, "Should fail when URL is missing"
-        assert 'Usage:' in result.stderr or 'url' in result.stderr.lower()
-
-
-def test_missing_snapshot_id_argument():
-    """Test that hook fails gracefully when snapshot-id argument is missing."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        env = get_test_env()
-        result = subprocess.run(
-            ['node', str(SCREENSHOT_HOOK), f'--url={TEST_URL}'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=env
-        )
-
-        # Should exit with error
-        assert result.returncode != 0, "Should fail when snapshot-id is missing"
-        assert 'Usage:' in result.stderr or 'snapshot' in result.stderr.lower()
-
-
-def test_no_cdp_url_fails():
-    """Test error when chrome dir exists but no cdp_url.txt."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-        chrome_dir = tmpdir / 'chrome'
-        chrome_dir.mkdir()
-        # Create target_id.txt and navigation.json but NOT cdp_url.txt
-        (chrome_dir / 'target_id.txt').write_text('test-target')
-        (chrome_dir / 'navigation.json').write_text('{}')
-
-        screenshot_dir = tmpdir / 'screenshot'
-        screenshot_dir.mkdir()
-
-        result = subprocess.run(
-            ['node', str(SCREENSHOT_HOOK), '--url=https://example.com', '--snapshot-id=test'],
-            cwd=str(screenshot_dir),
-            capture_output=True,
-            text=True,
-            timeout=7,
-            env=get_test_env()
-        )
-
-        assert result.returncode != 0
-        assert 'no chrome session' in result.stderr.lower()
-
-
-def test_no_target_id_fails():
-    """Test error when cdp_url exists but no target_id.txt."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-        chrome_dir = tmpdir / 'chrome'
-        chrome_dir.mkdir()
-        # Create cdp_url.txt and navigation.json but NOT target_id.txt
-        (chrome_dir / 'cdp_url.txt').write_text('ws://localhost:9222/devtools/browser/test')
-        (chrome_dir / 'navigation.json').write_text('{}')
-
-        screenshot_dir = tmpdir / 'screenshot'
-        screenshot_dir.mkdir()
-
-        result = subprocess.run(
-            ['node', str(SCREENSHOT_HOOK), '--url=https://example.com', '--snapshot-id=test'],
-            cwd=str(screenshot_dir),
-            capture_output=True,
-            text=True,
-            timeout=7,
-            env=get_test_env()
-        )
-
-        assert result.returncode != 0
-        assert 'target_id.txt' in result.stderr.lower()
-
-
-def test_invalid_cdp_url_fails():
-    """Test error with malformed CDP URL."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-        chrome_dir = tmpdir / 'chrome'
-        chrome_dir.mkdir()
-        (chrome_dir / 'cdp_url.txt').write_text('invalid-url')
-        (chrome_dir / 'target_id.txt').write_text('test-target')
-        (chrome_dir / 'navigation.json').write_text('{}')
-
-        screenshot_dir = tmpdir / 'screenshot'
-        screenshot_dir.mkdir()
-
-        result = subprocess.run(
-            ['node', str(SCREENSHOT_HOOK), '--url=https://example.com', '--snapshot-id=test'],
-            cwd=str(screenshot_dir),
-            capture_output=True,
-            text=True,
-            timeout=7,
-            env=get_test_env()
-        )
-
-        assert result.returncode != 0
-
-
-def test_invalid_timeout_uses_default():
-    """Test that invalid SCREENSHOT_TIMEOUT falls back to default."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-        chrome_dir = tmpdir / 'chrome'
-        chrome_dir.mkdir()
-        # No navigation.json to trigger timeout
-        (chrome_dir / 'cdp_url.txt').write_text('ws://localhost:9222/test')
-        (chrome_dir / 'target_id.txt').write_text('test')
-
-        screenshot_dir = tmpdir / 'screenshot'
-        screenshot_dir.mkdir()
-
-        env = get_test_env()
-        env['SCREENSHOT_TIMEOUT'] = 'invalid'  # Should fallback to default (10s becomes NaN, treated as 0)
-
-        import time
-        start = time.time()
-        result = subprocess.run(
-            ['node', str(SCREENSHOT_HOOK), '--url=https://example.com', '--snapshot-id=test'],
-            cwd=str(screenshot_dir),
-            capture_output=True,
-            text=True,
-            timeout=5,
-            env=env
-        )
-        elapsed = time.time() - start
-
-        # With invalid timeout, parseInt returns NaN, which should be handled
-        assert result.returncode != 0
-        assert elapsed < 2  # Should fail quickly, not wait 10s
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/search_backend_ripgrep/config.json b/archivebox/plugins/search_backend_ripgrep/config.json
deleted file mode 100644
index 49c5c885..00000000
--- a/archivebox/plugins/search_backend_ripgrep/config.json
+++ /dev/null
@@ -1,34 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "RIPGREP_BINARY": {
-      "type": "string",
-      "default": "rg",
-      "description": "Path to ripgrep binary"
-    },
-    "RIPGREP_TIMEOUT": {
-      "type": "integer",
-      "default": 90,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "x-aliases": ["SEARCH_BACKEND_TIMEOUT"],
-      "description": "Search timeout in seconds"
-    },
-    "RIPGREP_ARGS": {
-      "type": "array",
-      "items": { "type": "string" },
-      "default": ["--files-with-matches", "--no-messages", "--ignore-case"],
-      "x-aliases": ["RIPGREP_DEFAULT_ARGS"],
-      "description": "Default ripgrep arguments"
-    },
-    "RIPGREP_ARGS_EXTRA": {
-      "type": "array",
-      "items": { "type": "string" },
-      "default": [],
-      "x-aliases": ["RIPGREP_EXTRA_ARGS"],
-      "description": "Extra arguments to append to ripgrep command"
-    }
-  }
-}
diff --git a/archivebox/plugins/search_backend_ripgrep/on_Crawl__50_ripgrep_install.py b/archivebox/plugins/search_backend_ripgrep/on_Crawl__50_ripgrep_install.py
deleted file mode 100755
index 071dbb5b..00000000
--- a/archivebox/plugins/search_backend_ripgrep/on_Crawl__50_ripgrep_install.py
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/usr/bin/env python3
-"""
-Emit ripgrep Binary dependency for the crawl.
-"""
-
-import os
-import sys
-import json
-
-
-def main():
-    # Only proceed if ripgrep backend is enabled
-    search_backend_engine = os.environ.get('SEARCH_BACKEND_ENGINE', 'ripgrep').strip()
-    if search_backend_engine != 'ripgrep':
-        # Not using ripgrep, exit successfully without output
-        sys.exit(0)
-
-    machine_id = os.environ.get('MACHINE_ID', '')
-    print(json.dumps({
-        'type': 'Binary',
-        'name': 'rg',
-        'binproviders': 'apt,brew,env',
-        'overrides': {
-            'apt': {'packages': ['ripgrep']},
-        },
-        'machine_id': machine_id,
-    }))
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/search_backend_ripgrep/search.py b/archivebox/plugins/search_backend_ripgrep/search.py
deleted file mode 100644
index dd94f153..00000000
--- a/archivebox/plugins/search_backend_ripgrep/search.py
+++ /dev/null
@@ -1,116 +0,0 @@
-"""
-Ripgrep search backend - searches files directly without indexing.
-
-This backend doesn't maintain an index - it searches archived files directly
-using ripgrep (rg). This is simpler but slower for large archives.
-
-Environment variables:
-    RIPGREP_BINARY: Path to ripgrep binary (default: rg)
-    RIPGREP_ARGS: Default ripgrep arguments (JSON array)
-    RIPGREP_ARGS_EXTRA: Extra arguments to append (JSON array)
-    RIPGREP_TIMEOUT: Search timeout in seconds (default: 90)
-"""
-
-import json
-import os
-import subprocess
-import shutil
-from pathlib import Path
-from typing import List, Iterable
-
-from django.conf import settings
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-
-def get_env_int(name: str, default: int = 0) -> int:
-    try:
-        return int(get_env(name, str(default)))
-    except ValueError:
-        return default
-
-
-def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
-    """Parse a JSON array from environment variable."""
-    val = get_env(name, '')
-    if not val:
-        return default if default is not None else []
-    try:
-        result = json.loads(val)
-        if isinstance(result, list):
-            return [str(item) for item in result]
-        return default if default is not None else []
-    except json.JSONDecodeError:
-        return default if default is not None else []
-
-
-def _get_archive_dir() -> Path:
-    archive_dir = os.environ.get('ARCHIVE_DIR', '').strip()
-    if archive_dir:
-        return Path(archive_dir)
-    data_dir = os.environ.get('DATA_DIR', '').strip()
-    if data_dir:
-        return Path(data_dir) / 'archive'
-    settings_archive_dir = getattr(settings, 'ARCHIVE_DIR', None)
-    if settings_archive_dir:
-        return Path(settings_archive_dir)
-    settings_data_dir = getattr(settings, 'DATA_DIR', None)
-    if settings_data_dir:
-        return Path(settings_data_dir) / 'archive'
-    return Path.cwd() / 'archive'
-
-
-def search(query: str) -> List[str]:
-    """Search for snapshots using ripgrep."""
-    rg_binary = get_env('RIPGREP_BINARY', 'rg')
-    rg_binary = shutil.which(rg_binary) or rg_binary
-    if not rg_binary or not Path(rg_binary).exists():
-        raise RuntimeError(f'ripgrep binary not found. Install with: apt install ripgrep')
-
-    timeout = get_env_int('RIPGREP_TIMEOUT', 90)
-    ripgrep_args = get_env_array('RIPGREP_ARGS', [])
-    ripgrep_args_extra = get_env_array('RIPGREP_ARGS_EXTRA', [])
-
-    archive_dir = _get_archive_dir()
-    if not archive_dir.exists():
-        return []
-
-    cmd = [
-        rg_binary,
-        *ripgrep_args,
-        *ripgrep_args_extra,
-        '--regexp',
-        query,
-        str(archive_dir),
-    ]
-
-    try:
-        result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
-
-        # Extract snapshot IDs from file paths
-        # Paths look like: archive/<snapshot_id>/<extractor>/file.txt
-        snapshot_ids = set()
-        for line in result.stdout.strip().split('\n'):
-            if not line:
-                continue
-            path = Path(line)
-            try:
-                relative = path.relative_to(archive_dir)
-                snapshot_id = relative.parts[0]
-                snapshot_ids.add(snapshot_id)
-            except (ValueError, IndexError):
-                continue
-
-        return list(snapshot_ids)
-
-    except subprocess.TimeoutExpired:
-        return []
-    except Exception:
-        return []
-
-
-def flush(snapshot_ids: Iterable[str]) -> None:
-    """No-op for ripgrep - it searches files directly."""
-    pass
diff --git a/archivebox/plugins/search_backend_ripgrep/templates/icon.html b/archivebox/plugins/search_backend_ripgrep/templates/icon.html
deleted file mode 100644
index e69de29b..00000000
diff --git a/archivebox/plugins/search_backend_ripgrep/tests/test_ripgrep_detection.py b/archivebox/plugins/search_backend_ripgrep/tests/test_ripgrep_detection.py
deleted file mode 100644
index 26b3f118..00000000
--- a/archivebox/plugins/search_backend_ripgrep/tests/test_ripgrep_detection.py
+++ /dev/null
@@ -1,253 +0,0 @@
-#!/usr/bin/env python3
-"""
-Tests for ripgrep binary detection and archivebox install functionality.
-
-Guards against regressions in:
-    pass
-1. Machine.config overrides not being used in version command
-2. Ripgrep hook not resolving binary names via shutil.which()
-3. SEARCH_BACKEND_ENGINE not being passed to hook environment
-"""
-
-import os
-import sys
-import json
-import shutil
-import tempfile
-import subprocess
-from pathlib import Path
-from unittest.mock import patch, MagicMock
-
-import pytest
-
-
-def test_ripgrep_hook_detects_binary_from_path():
-    """Test that ripgrep hook finds binary using abx-pkg when env var is just a name."""
-    hook_path = Path(__file__).parent.parent / 'on_Crawl__50_ripgrep_install.py'
-
-    assert shutil.which('rg'), "ripgrep not installed"
-
-    # Set SEARCH_BACKEND_ENGINE to enable the hook
-    env = os.environ.copy()
-    env['SEARCH_BACKEND_ENGINE'] = 'ripgrep'
-    env['RIPGREP_BINARY'] = 'rg'  # Just the name, not the full path (this was the bug)
-
-    result = subprocess.run(
-        [sys.executable, str(hook_path)],
-        capture_output=True,
-        text=True,
-        env=env,
-        timeout=10,
-    )
-
-    assert result.returncode == 0, f"Hook failed: {result.stderr}"
-
-    # Parse JSONL output (filter out non-JSON lines)
-    lines = [line for line in result.stdout.strip().split('\n') if line.strip() and line.strip().startswith('{')]
-    assert len(lines) >= 1, "Expected at least 1 JSONL line (Binary)"
-
-    binary = json.loads(lines[0])
-    assert binary['type'] == 'Binary'
-    assert binary['name'] == 'rg'
-    assert 'binproviders' in binary, "Expected binproviders declaration"
-
-
-def test_ripgrep_hook_skips_when_backend_not_ripgrep():
-    """Test that ripgrep hook exits silently when search backend is not ripgrep."""
-    hook_path = Path(__file__).parent.parent / 'on_Crawl__50_ripgrep_install.py'
-
-    env = os.environ.copy()
-    env['SEARCH_BACKEND_ENGINE'] = 'sqlite'  # Different backend
-
-    result = subprocess.run(
-        [sys.executable, str(hook_path)],
-        capture_output=True,
-        text=True,
-        env=env,
-        timeout=10,
-    )
-
-    assert result.returncode == 0, "Hook should exit successfully when backend is not ripgrep"
-    assert result.stdout.strip() == '', "Hook should produce no output when backend is not ripgrep"
-
-
-def test_ripgrep_hook_handles_absolute_path():
-    """Test that ripgrep hook exits successfully when RIPGREP_BINARY is a valid absolute path."""
-    hook_path = Path(__file__).parent.parent / 'on_Crawl__50_ripgrep_install.py'
-
-    rg_path = shutil.which('rg')
-    assert rg_path, "ripgrep not installed"
-
-    env = os.environ.copy()
-    env['SEARCH_BACKEND_ENGINE'] = 'ripgrep'
-    env['RIPGREP_BINARY'] = rg_path  # Full absolute path
-
-    result = subprocess.run(
-        [sys.executable, str(hook_path)],
-        capture_output=True,
-        text=True,
-        env=env,
-        timeout=10,
-    )
-
-    assert result.returncode == 0, f"Hook should exit successfully when binary already configured: {result.stderr}"
-    lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-    assert lines, "Expected Binary JSONL output when backend is ripgrep"
-
-
-@pytest.mark.django_db
-def test_machine_config_overrides_base_config():
-    """
-    Test that Machine.config overrides take precedence over base config.
-
-    Guards against regression where archivebox version was showing binaries
-    as "not installed" even though they were detected and stored in Machine.config.
-    """
-    from archivebox.machine.models import Machine, Binary
-
-    import archivebox.machine.models as models
-    models._CURRENT_MACHINE = None
-    machine = Machine.current()
-
-    # Simulate a hook detecting chrome and storing it with a different path than base config
-    detected_chrome_path = '/custom/path/to/chrome'
-    machine.config['CHROME_BINARY'] = detected_chrome_path
-    machine.config['CHROME_VERSION'] = '143.0.7499.170'
-    machine.save()
-
-    # Create Binary record
-    Binary.objects.create(
-        machine=machine,
-        name='chrome',
-        abspath=detected_chrome_path,
-        version='143.0.7499.170',
-        binprovider='env',
-    )
-
-    # Verify Machine.config takes precedence
-    from archivebox.config.configset import get_config
-    config = get_config()
-
-    # Machine.config should override the base config value
-    assert machine.config.get('CHROME_BINARY') == detected_chrome_path
-
-    # The version command should use Machine.config, not base config
-    # (Base config might have 'chromium' while Machine.config has the full path)
-    bin_value = machine.config.get('CHROME_BINARY') or config.get('CHROME_BINARY', '')
-    assert bin_value == detected_chrome_path, \
-        "Machine.config override should take precedence over base config"
-
-
-@pytest.mark.django_db
-def test_search_backend_engine_passed_to_hooks():
-    """
-    Test that SEARCH_BACKEND_ENGINE is configured properly.
-
-    Guards against regression where hooks couldn't determine which search backend was active.
-    """
-    from archivebox.config.configset import get_config
-    import os
-
-    config = get_config()
-    search_backend = config.get('SEARCH_BACKEND_ENGINE', 'ripgrep')
-
-    # Verify config contains SEARCH_BACKEND_ENGINE
-    assert search_backend in ('ripgrep', 'sqlite', 'sonic'), \
-        f"SEARCH_BACKEND_ENGINE should be valid backend, got {search_backend}"
-
-    # Verify it's accessible via environment (hooks read from os.environ)
-    # Hooks receive environment variables, so this verifies the mechanism works
-    assert 'SEARCH_BACKEND_ENGINE' in os.environ or search_backend == config.get('SEARCH_BACKEND_ENGINE'), \
-        "SEARCH_BACKEND_ENGINE must be accessible to hooks"
-
-
-@pytest.mark.django_db
-def test_install_creates_binary_records():
-    """
-    Test that Binary records can be created and queried properly.
-
-    This verifies the Binary model works correctly with the database.
-    """
-    from archivebox.machine.models import Machine, Binary
-    import archivebox.machine.models as models
-
-    models._CURRENT_MACHINE = None
-    machine = Machine.current()
-    initial_binary_count = Binary.objects.filter(machine=machine).count()
-
-    # Create a test binary record
-    test_binary = Binary.objects.create(
-        machine=machine,
-        name='test-binary',
-        abspath='/usr/bin/test-binary',
-        version='1.0.0',
-        binprovider='env',
-        status=Binary.StatusChoices.INSTALLED
-    )
-
-    # Verify Binary record was created
-    final_binary_count = Binary.objects.filter(machine=machine).count()
-    assert final_binary_count == initial_binary_count + 1, \
-        "Binary record should be created"
-
-    # Verify the binary can be queried
-    found_binary = Binary.objects.filter(machine=machine, name='test-binary').first()
-    assert found_binary is not None, "Binary should be found"
-    assert found_binary.abspath == '/usr/bin/test-binary', "Binary path should match"
-    assert found_binary.version == '1.0.0', "Binary version should match"
-
-    # Clean up
-    test_binary.delete()
-
-
-@pytest.mark.django_db
-def test_ripgrep_only_detected_when_backend_enabled():
-    """
-    Test ripgrep validation hook behavior with different SEARCH_BACKEND_ENGINE settings.
-
-    Guards against ripgrep being detected when not needed.
-    """
-    import subprocess
-    import sys
-    from pathlib import Path
-
-    assert shutil.which('rg'), "ripgrep not installed"
-
-    hook_path = Path(__file__).parent.parent / 'on_Crawl__50_ripgrep_install.py'
-
-    # Test 1: With ripgrep backend - should output Binary record
-    env1 = os.environ.copy()
-    env1['SEARCH_BACKEND_ENGINE'] = 'ripgrep'
-    env1['RIPGREP_BINARY'] = 'rg'
-
-    result1 = subprocess.run(
-        [sys.executable, str(hook_path)],
-        capture_output=True,
-        text=True,
-        env=env1,
-        timeout=10,
-    )
-
-    assert result1.returncode == 0, f"Hook should succeed with ripgrep backend: {result1.stderr}"
-    # Should output Binary JSONL when backend is ripgrep
-    assert 'Binary' in result1.stdout, "Should output Binary when backend=ripgrep"
-
-    # Test 2: With different backend - should output nothing
-    env2 = os.environ.copy()
-    env2['SEARCH_BACKEND_ENGINE'] = 'sqlite'
-    env2['RIPGREP_BINARY'] = 'rg'
-
-    result2 = subprocess.run(
-        [sys.executable, str(hook_path)],
-        capture_output=True,
-        text=True,
-        env=env2,
-        timeout=10,
-    )
-
-    assert result2.returncode == 0, "Hook should exit successfully when backend is not ripgrep"
-    assert result2.stdout.strip() == '', "Hook should produce no output when backend is not ripgrep"
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/search_backend_ripgrep/tests/test_ripgrep_search.py b/archivebox/plugins/search_backend_ripgrep/tests/test_ripgrep_search.py
deleted file mode 100644
index 8c1f957a..00000000
--- a/archivebox/plugins/search_backend_ripgrep/tests/test_ripgrep_search.py
+++ /dev/null
@@ -1,297 +0,0 @@
-"""
-Tests for the ripgrep search backend.
-
-Tests cover:
-1. Search with ripgrep binary
-2. Snapshot ID extraction from file paths
-3. Timeout handling
-4. Error handling
-5. Environment variable configuration
-"""
-
-import os
-import shutil
-import subprocess
-import tempfile
-from pathlib import Path
-from unittest.mock import patch, MagicMock
-
-import pytest
-from django.test import TestCase
-
-from archivebox.plugins.search_backend_ripgrep.search import (
-    search,
-    flush,
-    get_env,
-    get_env_int,
-    get_env_array,
-)
-
-
-class TestEnvHelpers(TestCase):
-    """Test environment variable helper functions."""
-
-    def test_get_env_default(self):
-        """get_env should return default for unset vars."""
-        result = get_env('NONEXISTENT_VAR_12345', 'default')
-        self.assertEqual(result, 'default')
-
-    def test_get_env_set(self):
-        """get_env should return value for set vars."""
-        with patch.dict(os.environ, {'TEST_VAR': 'value'}):
-            result = get_env('TEST_VAR', 'default')
-            self.assertEqual(result, 'value')
-
-    def test_get_env_strips_whitespace(self):
-        """get_env should strip whitespace."""
-        with patch.dict(os.environ, {'TEST_VAR': '  value  '}):
-            result = get_env('TEST_VAR', '')
-            self.assertEqual(result, 'value')
-
-    def test_get_env_int_default(self):
-        """get_env_int should return default for unset vars."""
-        result = get_env_int('NONEXISTENT_VAR_12345', 42)
-        self.assertEqual(result, 42)
-
-    def test_get_env_int_valid(self):
-        """get_env_int should parse integer values."""
-        with patch.dict(os.environ, {'TEST_INT': '100'}):
-            result = get_env_int('TEST_INT', 0)
-            self.assertEqual(result, 100)
-
-    def test_get_env_int_invalid(self):
-        """get_env_int should return default for invalid integers."""
-        with patch.dict(os.environ, {'TEST_INT': 'not a number'}):
-            result = get_env_int('TEST_INT', 42)
-            self.assertEqual(result, 42)
-
-    def test_get_env_array_default(self):
-        """get_env_array should return default for unset vars."""
-        result = get_env_array('NONEXISTENT_VAR_12345', ['default'])
-        self.assertEqual(result, ['default'])
-
-    def test_get_env_array_valid(self):
-        """get_env_array should parse JSON arrays."""
-        with patch.dict(os.environ, {'TEST_ARRAY': '["a", "b", "c"]'}):
-            result = get_env_array('TEST_ARRAY', [])
-            self.assertEqual(result, ['a', 'b', 'c'])
-
-    def test_get_env_array_invalid_json(self):
-        """get_env_array should return default for invalid JSON."""
-        with patch.dict(os.environ, {'TEST_ARRAY': 'not json'}):
-            result = get_env_array('TEST_ARRAY', ['default'])
-            self.assertEqual(result, ['default'])
-
-    def test_get_env_array_not_array(self):
-        """get_env_array should return default for non-array JSON."""
-        with patch.dict(os.environ, {'TEST_ARRAY': '{"key": "value"}'}):
-            result = get_env_array('TEST_ARRAY', ['default'])
-            self.assertEqual(result, ['default'])
-
-
-class TestRipgrepFlush(TestCase):
-    """Test the flush function."""
-
-    def test_flush_is_noop(self):
-        """flush should be a no-op for ripgrep backend."""
-        # Should not raise
-        flush(['snap-001', 'snap-002'])
-
-
-class TestRipgrepSearch(TestCase):
-    """Test the ripgrep search function."""
-
-    def setUp(self):
-        """Create temporary archive directory with test files."""
-        self.temp_dir = tempfile.mkdtemp()
-        self.archive_dir = Path(self.temp_dir) / 'archive'
-        self.archive_dir.mkdir()
-
-        # Create snapshot directories with searchable content
-        self._create_snapshot('snap-001', {
-            'singlefile/index.html': '<html><body>Python programming tutorial</body></html>',
-            'title/title.txt': 'Learn Python Programming',
-        })
-        self._create_snapshot('snap-002', {
-            'singlefile/index.html': '<html><body>JavaScript guide</body></html>',
-            'title/title.txt': 'JavaScript Basics',
-        })
-        self._create_snapshot('snap-003', {
-            'wget/index.html': '<html><body>Web archiving guide and best practices</body></html>',
-            'title/title.txt': 'Web Archiving guide',
-        })
-
-        # Patch settings
-        self.settings_patch = patch(
-            'archivebox.plugins.search_backend_ripgrep.search.settings'
-        )
-        self.mock_settings = self.settings_patch.start()
-        self.mock_settings.ARCHIVE_DIR = str(self.archive_dir)
-
-    def tearDown(self):
-        """Clean up temporary directory."""
-        self.settings_patch.stop()
-        shutil.rmtree(self.temp_dir, ignore_errors=True)
-
-    def _create_snapshot(self, snapshot_id: str, files: dict):
-        """Create a snapshot directory with files."""
-        snap_dir = self.archive_dir / snapshot_id
-        for path, content in files.items():
-            file_path = snap_dir / path
-            file_path.parent.mkdir(parents=True, exist_ok=True)
-            file_path.write_text(content)
-
-    def _has_ripgrep(self) -> bool:
-        """Check if ripgrep is available."""
-        return shutil.which('rg') is not None
-
-    def test_search_no_archive_dir(self):
-        """search should return empty list when archive dir doesn't exist."""
-        self.mock_settings.ARCHIVE_DIR = '/nonexistent/path'
-        results = search('test')
-        self.assertEqual(results, [])
-
-    def test_search_single_match(self):
-        """search should find matching snapshot."""
-        results = search('Python programming')
-
-        self.assertIn('snap-001', results)
-        self.assertNotIn('snap-002', results)
-        self.assertNotIn('snap-003', results)
-
-    def test_search_multiple_matches(self):
-        """search should find all matching snapshots."""
-        # 'guide' appears in snap-002 (JavaScript guide) and snap-003 (Archiving Guide)
-        results = search('guide')
-
-        self.assertIn('snap-002', results)
-        self.assertIn('snap-003', results)
-        self.assertNotIn('snap-001', results)
-
-    def test_search_case_insensitive_by_default(self):
-        """search should be case-sensitive (ripgrep default)."""
-        # By default rg is case-sensitive
-        results_upper = search('PYTHON')
-        results_lower = search('python')
-
-        # Depending on ripgrep config, results may differ
-        self.assertIsInstance(results_upper, list)
-        self.assertIsInstance(results_lower, list)
-
-    def test_search_no_results(self):
-        """search should return empty list for no matches."""
-        results = search('xyznonexistent123')
-        self.assertEqual(results, [])
-
-    def test_search_regex(self):
-        """search should support regex patterns."""
-        results = search('(Python|JavaScript)')
-
-        self.assertIn('snap-001', results)
-        self.assertIn('snap-002', results)
-
-    def test_search_distinct_snapshots(self):
-        """search should return distinct snapshot IDs."""
-        # Query matches both files in snap-001
-        results = search('Python')
-
-        # Should only appear once
-        self.assertEqual(results.count('snap-001'), 1)
-
-    def test_search_missing_binary(self):
-        """search should raise when ripgrep binary not found."""
-        with patch.dict(os.environ, {'RIPGREP_BINARY': '/nonexistent/rg'}):
-            with patch('shutil.which', return_value=None):
-                with self.assertRaises(RuntimeError) as context:
-                    search('test')
-                self.assertIn('ripgrep binary not found', str(context.exception))
-
-    def test_search_with_custom_args(self):
-        """search should use custom RIPGREP_ARGS."""
-        with patch.dict(os.environ, {'RIPGREP_ARGS': '["-i"]'}):  # Case insensitive
-            results = search('PYTHON')
-            # With -i flag, should find regardless of case
-            self.assertIn('snap-001', results)
-
-    def test_search_timeout(self):
-        """search should handle timeout gracefully."""
-        with patch.dict(os.environ, {'RIPGREP_TIMEOUT': '1'}):
-            # Short timeout, should still complete for small archive
-            results = search('Python')
-            self.assertIsInstance(results, list)
-
-
-class TestRipgrepSearchIntegration(TestCase):
-    """Integration tests with realistic archive structure."""
-
-    def setUp(self):
-        """Create archive with realistic structure."""
-        self.temp_dir = tempfile.mkdtemp()
-        self.archive_dir = Path(self.temp_dir) / 'archive'
-        self.archive_dir.mkdir()
-
-        # Realistic snapshot structure
-        self._create_snapshot('1704067200.123456', {  # 2024-01-01
-            'singlefile.html': '''<!DOCTYPE html>
-<html>
-<head><title>ArchiveBox Documentation</title></head>
-<body>
-<h1>Getting Started with ArchiveBox</h1>
-<p>ArchiveBox is a powerful, self-hosted web archiving tool.</p>
-<p>Install with: pip install archivebox</p>
-</body>
-</html>''',
-            'title/title.txt': 'ArchiveBox Documentation',
-            'screenshot/screenshot.png': b'PNG IMAGE DATA',  # Binary file
-        })
-        self._create_snapshot('1704153600.654321', {  # 2024-01-02
-            'wget/index.html': '''<html>
-<head><title>Python News</title></head>
-<body>
-<h1>Python 3.12 Released</h1>
-<p>New features include improved error messages and performance.</p>
-</body>
-</html>''',
-            'readability/content.html': '<p>Python 3.12 has been released with exciting new features.</p>',
-        })
-
-        self.settings_patch = patch(
-            'archivebox.plugins.search_backend_ripgrep.search.settings'
-        )
-        self.mock_settings = self.settings_patch.start()
-        self.mock_settings.ARCHIVE_DIR = str(self.archive_dir)
-
-    def tearDown(self):
-        """Clean up."""
-        self.settings_patch.stop()
-        shutil.rmtree(self.temp_dir, ignore_errors=True)
-
-    def _create_snapshot(self, timestamp: str, files: dict):
-        """Create snapshot with timestamp-based ID."""
-        snap_dir = self.archive_dir / timestamp
-        for path, content in files.items():
-            file_path = snap_dir / path
-            file_path.parent.mkdir(parents=True, exist_ok=True)
-            if isinstance(content, bytes):
-                file_path.write_bytes(content)
-            else:
-                file_path.write_text(content)
-
-    def test_search_archivebox(self):
-        """Search for archivebox should find documentation snapshot."""
-        results = search('archivebox')
-        self.assertIn('1704067200.123456', results)
-
-    def test_search_python(self):
-        """Search for python should find Python news snapshot."""
-        results = search('Python')
-        self.assertIn('1704153600.654321', results)
-
-    def test_search_pip_install(self):
-        """Search for installation command."""
-        results = search('pip install')
-        self.assertIn('1704067200.123456', results)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/search_backend_sonic/__init__.py b/archivebox/plugins/search_backend_sonic/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/archivebox/plugins/search_backend_sonic/config.json b/archivebox/plugins/search_backend_sonic/config.json
deleted file mode 100644
index c44aa9f3..00000000
--- a/archivebox/plugins/search_backend_sonic/config.json
+++ /dev/null
@@ -1,39 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "SEARCH_BACKEND_SONIC_HOST_NAME": {
-      "type": "string",
-      "default": "127.0.0.1",
-      "x-aliases": ["SEARCH_BACKEND_HOST_NAME", "SONIC_HOST"],
-      "description": "Sonic server hostname"
-    },
-    "SEARCH_BACKEND_SONIC_PORT": {
-      "type": "integer",
-      "default": 1491,
-      "minimum": 1,
-      "maximum": 65535,
-      "x-aliases": ["SEARCH_BACKEND_PORT", "SONIC_PORT"],
-      "description": "Sonic server port"
-    },
-    "SEARCH_BACKEND_SONIC_PASSWORD": {
-      "type": "string",
-      "default": "SecretPassword",
-      "x-aliases": ["SEARCH_BACKEND_PASSWORD", "SONIC_PASSWORD"],
-      "description": "Sonic server password"
-    },
-    "SEARCH_BACKEND_SONIC_COLLECTION": {
-      "type": "string",
-      "default": "archivebox",
-      "x-aliases": ["SONIC_COLLECTION"],
-      "description": "Sonic collection name"
-    },
-    "SEARCH_BACKEND_SONIC_BUCKET": {
-      "type": "string",
-      "default": "snapshots",
-      "x-aliases": ["SONIC_BUCKET"],
-      "description": "Sonic bucket name"
-    }
-  }
-}
diff --git a/archivebox/plugins/search_backend_sonic/on_Snapshot__91_index_sonic.py b/archivebox/plugins/search_backend_sonic/on_Snapshot__91_index_sonic.py
deleted file mode 100644
index a44d773a..00000000
--- a/archivebox/plugins/search_backend_sonic/on_Snapshot__91_index_sonic.py
+++ /dev/null
@@ -1,191 +0,0 @@
-#!/usr/bin/env python3
-"""
-Sonic search backend - indexes snapshot content in Sonic server.
-
-This hook runs after all extractors and indexes text content in Sonic.
-Only runs if SEARCH_BACKEND_ENGINE=sonic.
-
-Usage: on_Snapshot__91_index_sonic.py --url=<url> --snapshot-id=<uuid>
-
-Environment variables:
-    SEARCH_BACKEND_ENGINE: Must be 'sonic' for this hook to run
-    USE_INDEXING_BACKEND: Enable search indexing (default: true)
-    SEARCH_BACKEND_HOST_NAME: Sonic server host (default: 127.0.0.1)
-    SEARCH_BACKEND_PORT: Sonic server port (default: 1491)
-    SEARCH_BACKEND_PASSWORD: Sonic server password (default: SecretPassword)
-    SONIC_COLLECTION: Collection name (default: archivebox)
-    SONIC_BUCKET: Bucket name (default: snapshots)
-"""
-
-import json
-import os
-import re
-import sys
-from pathlib import Path
-
-import rich_click as click
-
-
-# Extractor metadata
-PLUGIN_NAME = 'index_sonic'
-OUTPUT_DIR = '.'
-
-# Text file patterns to index
-INDEXABLE_FILES = [
-    ('readability', 'content.txt'),
-    ('readability', 'content.html'),
-    ('mercury', 'content.txt'),
-    ('mercury', 'content.html'),
-    ('htmltotext', 'output.txt'),
-    ('singlefile', 'singlefile.html'),
-    ('dom', 'output.html'),
-    ('wget', '**/*.html'),
-    ('wget', '**/*.htm'),
-    ('title', 'title.txt'),
-]
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def get_env_int(name: str, default: int = 0) -> int:
-    try:
-        return int(get_env(name, str(default)))
-    except ValueError:
-        return default
-
-
-def strip_html_tags(html: str) -> str:
-    """Remove HTML tags, keeping text content."""
-    html = re.sub(r'<script[^>]*>.*?</script>', '', html, flags=re.DOTALL | re.IGNORECASE)
-    html = re.sub(r'<style[^>]*>.*?</style>', '', html, flags=re.DOTALL | re.IGNORECASE)
-    html = re.sub(r'<[^>]+>', ' ', html)
-    html = html.replace('&nbsp;', ' ').replace('&amp;', '&')
-    html = html.replace('&lt;', '<').replace('&gt;', '>')
-    html = html.replace('&quot;', '"')
-    html = re.sub(r'\s+', ' ', html)
-    return html.strip()
-
-
-def find_indexable_content() -> list[tuple[str, str]]:
-    """Find text content to index from extractor outputs."""
-    results = []
-    cwd = Path.cwd()
-
-    for extractor, file_pattern in INDEXABLE_FILES:
-        plugin_dir = cwd / extractor
-        if not plugin_dir.exists():
-            continue
-
-        if '*' in file_pattern:
-            matches = list(plugin_dir.glob(file_pattern))
-        else:
-            match = plugin_dir / file_pattern
-            matches = [match] if match.exists() else []
-
-        for match in matches:
-            if match.is_file() and match.stat().st_size > 0:
-                try:
-                    content = match.read_text(encoding='utf-8', errors='ignore')
-                    if content.strip():
-                        if match.suffix in ('.html', '.htm'):
-                            content = strip_html_tags(content)
-                        results.append((f'{extractor}/{match.name}', content))
-                except Exception:
-                    continue
-
-    return results
-
-
-def get_sonic_config() -> dict:
-    """Get Sonic connection configuration."""
-    return {
-        'host': get_env('SEARCH_BACKEND_HOST_NAME', '127.0.0.1'),
-        'port': get_env_int('SEARCH_BACKEND_PORT', 1491),
-        'password': get_env('SEARCH_BACKEND_PASSWORD', 'SecretPassword'),
-        'collection': get_env('SONIC_COLLECTION', 'archivebox'),
-        'bucket': get_env('SONIC_BUCKET', 'snapshots'),
-    }
-
-
-def index_in_sonic(snapshot_id: str, texts: list[str]) -> None:
-    """Index texts in Sonic."""
-    try:
-        from sonic import IngestClient
-    except ImportError:
-        raise RuntimeError('sonic-client not installed. Run: pip install sonic-client')
-
-    config = get_sonic_config()
-
-    with IngestClient(config['host'], config['port'], config['password']) as ingest:
-        # Flush existing content
-        try:
-            ingest.flush_object(config['collection'], config['bucket'], snapshot_id)
-        except Exception:
-            pass
-
-        # Index new content in chunks (Sonic has size limits)
-        content = ' '.join(texts)
-        chunk_size = 10000
-        for i in range(0, len(content), chunk_size):
-            chunk = content[i:i + chunk_size]
-            ingest.push(config['collection'], config['bucket'], snapshot_id, chunk)
-
-
-@click.command()
-@click.option('--url', required=True, help='URL that was archived')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
-def main(url: str, snapshot_id: str):
-    """Index snapshot content in Sonic."""
-
-    output = None
-    status = 'failed'
-    error = ''
-    indexed_sources = []
-
-    try:
-        # Check if this backend is enabled (permanent skips - don't retry)
-        backend = get_env('SEARCH_BACKEND_ENGINE', 'sqlite')
-        if backend != 'sonic':
-            print(f'Skipping Sonic indexing (SEARCH_BACKEND_ENGINE={backend})', file=sys.stderr)
-            sys.exit(0)  # Permanent skip - different backend selected
-        if not get_env_bool('USE_INDEXING_BACKEND', True):
-            print('Skipping indexing (USE_INDEXING_BACKEND=False)', file=sys.stderr)
-            sys.exit(0)  # Permanent skip - indexing disabled
-        else:
-            contents = find_indexable_content()
-            indexed_sources = [source for source, _ in contents]
-
-            if not contents:
-                status = 'skipped'
-                print('No indexable content found', file=sys.stderr)
-            else:
-                texts = [content for _, content in contents]
-                index_in_sonic(snapshot_id, texts)
-                status = 'succeeded'
-                output = OUTPUT_DIR
-
-    except Exception as e:
-        error = f'{type(e).__name__}: {e}'
-        status = 'failed'
-
-    if error:
-        print(f'ERROR: {error}', file=sys.stderr)
-
-    # Search indexing hooks don't emit ArchiveResult - they're utility hooks
-    # Exit code indicates success/failure
-    sys.exit(0 if status == 'succeeded' else 1)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/search_backend_sonic/search.py b/archivebox/plugins/search_backend_sonic/search.py
deleted file mode 100644
index f9c518fd..00000000
--- a/archivebox/plugins/search_backend_sonic/search.py
+++ /dev/null
@@ -1,50 +0,0 @@
-"""
-Sonic search backend - search and flush operations.
-
-This module provides the search interface for the Sonic backend.
-"""
-
-import os
-from typing import List, Iterable
-
-
-def get_sonic_config() -> dict:
-    """Get Sonic connection configuration."""
-    return {
-        'host': os.environ.get('SEARCH_BACKEND_HOST_NAME', '127.0.0.1').strip(),
-        'port': int(os.environ.get('SEARCH_BACKEND_PORT', '1491')),
-        'password': os.environ.get('SEARCH_BACKEND_PASSWORD', 'SecretPassword').strip(),
-        'collection': os.environ.get('SONIC_COLLECTION', 'archivebox').strip(),
-        'bucket': os.environ.get('SONIC_BUCKET', 'snapshots').strip(),
-    }
-
-
-def search(query: str) -> List[str]:
-    """Search for snapshots in Sonic."""
-    try:
-        from sonic import SearchClient
-    except ImportError:
-        raise RuntimeError('sonic-client not installed. Run: pip install sonic-client')
-
-    config = get_sonic_config()
-
-    with SearchClient(config['host'], config['port'], config['password']) as search_client:
-        results = search_client.query(config['collection'], config['bucket'], query, limit=100)
-        return results
-
-
-def flush(snapshot_ids: Iterable[str]) -> None:
-    """Remove snapshots from Sonic index."""
-    try:
-        from sonic import IngestClient
-    except ImportError:
-        raise RuntimeError('sonic-client not installed. Run: pip install sonic-client')
-
-    config = get_sonic_config()
-
-    with IngestClient(config['host'], config['port'], config['password']) as ingest:
-        for snapshot_id in snapshot_ids:
-            try:
-                ingest.flush_object(config['collection'], config['bucket'], snapshot_id)
-            except Exception:
-                pass
diff --git a/archivebox/plugins/search_backend_sonic/templates/icon.html b/archivebox/plugins/search_backend_sonic/templates/icon.html
deleted file mode 100644
index bf81a372..00000000
--- a/archivebox/plugins/search_backend_sonic/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--search_backend_sonic" title="Search"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><circle cx="11" cy="11" r="5"/><path d="M16 16l4 4"/></svg></span>
diff --git a/archivebox/plugins/search_backend_sqlite/__init__.py b/archivebox/plugins/search_backend_sqlite/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/archivebox/plugins/search_backend_sqlite/config.json b/archivebox/plugins/search_backend_sqlite/config.json
deleted file mode 100644
index aff5f1b3..00000000
--- a/archivebox/plugins/search_backend_sqlite/config.json
+++ /dev/null
@@ -1,25 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "SEARCH_BACKEND_SQLITE_DB": {
-      "type": "string",
-      "default": "search.sqlite3",
-      "x-aliases": ["SQLITEFTS_DB"],
-      "description": "SQLite FTS database filename"
-    },
-    "SEARCH_BACKEND_SQLITE_SEPARATE_DATABASE": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["FTS_SEPARATE_DATABASE", "SQLITEFTS_SEPARATE_DATABASE"],
-      "description": "Use separate database file for FTS index"
-    },
-    "SEARCH_BACKEND_SQLITE_TOKENIZERS": {
-      "type": "string",
-      "default": "porter unicode61 remove_diacritics 2",
-      "x-aliases": ["FTS_TOKENIZERS", "SQLITEFTS_TOKENIZERS"],
-      "description": "FTS5 tokenizer configuration"
-    }
-  }
-}
diff --git a/archivebox/plugins/search_backend_sqlite/on_Snapshot__90_index_sqlite.py b/archivebox/plugins/search_backend_sqlite/on_Snapshot__90_index_sqlite.py
deleted file mode 100644
index 8a8a21b6..00000000
--- a/archivebox/plugins/search_backend_sqlite/on_Snapshot__90_index_sqlite.py
+++ /dev/null
@@ -1,181 +0,0 @@
-#!/usr/bin/env python3
-"""
-SQLite FTS5 search backend - indexes snapshot content for full-text search.
-
-This hook runs after all extractors and indexes text content in SQLite FTS5.
-Only runs if SEARCH_BACKEND_ENGINE=sqlite.
-
-Usage: on_Snapshot__90_index_sqlite.py --url=<url> --snapshot-id=<uuid>
-
-Environment variables:
-    SEARCH_BACKEND_ENGINE: Must be 'sqlite' for this hook to run
-    USE_INDEXING_BACKEND: Enable search indexing (default: true)
-    SQLITEFTS_DB: Database filename (default: search.sqlite3)
-    FTS_TOKENIZERS: FTS5 tokenizer config (default: porter unicode61 remove_diacritics 2)
-"""
-
-import json
-import os
-import re
-import sqlite3
-import sys
-from pathlib import Path
-
-import rich_click as click
-
-
-# Extractor metadata
-PLUGIN_NAME = 'index_sqlite'
-OUTPUT_DIR = '.'
-
-# Text file patterns to index, in priority order
-INDEXABLE_FILES = [
-    ('readability', 'content.txt'),
-    ('readability', 'content.html'),
-    ('mercury', 'content.txt'),
-    ('mercury', 'content.html'),
-    ('htmltotext', 'output.txt'),
-    ('singlefile', 'singlefile.html'),
-    ('dom', 'output.html'),
-    ('wget', '**/*.html'),
-    ('wget', '**/*.htm'),
-    ('title', 'title.txt'),
-]
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def strip_html_tags(html: str) -> str:
-    """Remove HTML tags, keeping text content."""
-    html = re.sub(r'<script[^>]*>.*?</script>', '', html, flags=re.DOTALL | re.IGNORECASE)
-    html = re.sub(r'<style[^>]*>.*?</style>', '', html, flags=re.DOTALL | re.IGNORECASE)
-    html = re.sub(r'<[^>]+>', ' ', html)
-    html = html.replace('&nbsp;', ' ').replace('&amp;', '&')
-    html = html.replace('&lt;', '<').replace('&gt;', '>')
-    html = html.replace('&quot;', '"')
-    html = re.sub(r'\s+', ' ', html)
-    return html.strip()
-
-
-def find_indexable_content() -> list[tuple[str, str]]:
-    """Find text content to index from extractor outputs."""
-    results = []
-    cwd = Path.cwd()
-
-    for extractor, file_pattern in INDEXABLE_FILES:
-        plugin_dir = cwd / extractor
-        if not plugin_dir.exists():
-            continue
-
-        if '*' in file_pattern:
-            matches = list(plugin_dir.glob(file_pattern))
-        else:
-            match = plugin_dir / file_pattern
-            matches = [match] if match.exists() else []
-
-        for match in matches:
-            if match.is_file() and match.stat().st_size > 0:
-                try:
-                    content = match.read_text(encoding='utf-8', errors='ignore')
-                    if content.strip():
-                        if match.suffix in ('.html', '.htm'):
-                            content = strip_html_tags(content)
-                        results.append((f'{extractor}/{match.name}', content))
-                except Exception:
-                    continue
-
-    return results
-
-
-def get_db_path() -> Path:
-    """Get path to the search index database."""
-    data_dir = get_env('DATA_DIR', str(Path.cwd().parent.parent))
-    db_name = get_env('SQLITEFTS_DB', 'search.sqlite3')
-    return Path(data_dir) / db_name
-
-
-def index_in_sqlite(snapshot_id: str, texts: list[str]) -> None:
-    """Index texts in SQLite FTS5."""
-    db_path = get_db_path()
-    tokenizers = get_env('FTS_TOKENIZERS', 'porter unicode61 remove_diacritics 2')
-    conn = sqlite3.connect(str(db_path))
-
-    try:
-        # Create FTS5 table if needed
-        conn.execute(f'''
-            CREATE VIRTUAL TABLE IF NOT EXISTS search_index
-            USING fts5(snapshot_id, content, tokenize='{tokenizers}')
-        ''')
-
-        # Remove existing entries
-        conn.execute('DELETE FROM search_index WHERE snapshot_id = ?', (snapshot_id,))
-
-        # Insert new content
-        content = '\n\n'.join(texts)
-        conn.execute(
-            'INSERT INTO search_index (snapshot_id, content) VALUES (?, ?)',
-            (snapshot_id, content)
-        )
-        conn.commit()
-    finally:
-        conn.close()
-
-
-@click.command()
-@click.option('--url', required=True, help='URL that was archived')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
-def main(url: str, snapshot_id: str):
-    """Index snapshot content in SQLite FTS5."""
-
-    output = None
-    status = 'failed'
-    error = ''
-    indexed_sources = []
-
-    try:
-        # Check if this backend is enabled (permanent skips - don't retry)
-        backend = get_env('SEARCH_BACKEND_ENGINE', 'sqlite')
-        if backend != 'sqlite':
-            print(f'Skipping SQLite indexing (SEARCH_BACKEND_ENGINE={backend})', file=sys.stderr)
-            sys.exit(0)  # Permanent skip - different backend selected
-        if not get_env_bool('USE_INDEXING_BACKEND', True):
-            print('Skipping indexing (USE_INDEXING_BACKEND=False)', file=sys.stderr)
-            sys.exit(0)  # Permanent skip - indexing disabled
-        else:
-            contents = find_indexable_content()
-            indexed_sources = [source for source, _ in contents]
-
-            if not contents:
-                status = 'skipped'
-                print('No indexable content found', file=sys.stderr)
-            else:
-                texts = [content for _, content in contents]
-                index_in_sqlite(snapshot_id, texts)
-                status = 'succeeded'
-                output = OUTPUT_DIR
-
-    except Exception as e:
-        error = f'{type(e).__name__}: {e}'
-        status = 'failed'
-
-    if error:
-        print(f'ERROR: {error}', file=sys.stderr)
-
-    # Search indexing hooks don't emit ArchiveResult - they're utility hooks
-    # Exit code indicates success/failure
-    sys.exit(0 if status == 'succeeded' else 1)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/search_backend_sqlite/search.py b/archivebox/plugins/search_backend_sqlite/search.py
deleted file mode 100644
index 0d3f5539..00000000
--- a/archivebox/plugins/search_backend_sqlite/search.py
+++ /dev/null
@@ -1,70 +0,0 @@
-"""
-SQLite FTS5 search backend - search and flush operations.
-
-This module provides the search interface for the SQLite FTS backend.
-
-Environment variables:
-    SQLITEFTS_DB: Database filename (default: search.sqlite3)
-    FTS_SEPARATE_DATABASE: Use separate database file (default: true)
-    FTS_TOKENIZERS: FTS5 tokenizer config (default: porter unicode61 remove_diacritics 2)
-"""
-
-import os
-import sqlite3
-from pathlib import Path
-from typing import List, Iterable
-
-
-# Config with old var names for backwards compatibility
-SQLITEFTS_DB = os.environ.get('SQLITEFTS_DB', 'search.sqlite3').strip()
-FTS_SEPARATE_DATABASE = os.environ.get('FTS_SEPARATE_DATABASE', 'true').lower() in ('true', '1', 'yes')
-FTS_TOKENIZERS = os.environ.get('FTS_TOKENIZERS', 'porter unicode61 remove_diacritics 2').strip()
-
-
-def _get_data_dir() -> Path:
-    data_dir = os.environ.get('DATA_DIR', '').strip()
-    if data_dir:
-        return Path(data_dir)
-    return Path.cwd() / 'data'
-
-
-def get_db_path() -> Path:
-    """Get path to the search index database."""
-    return _get_data_dir() / SQLITEFTS_DB
-
-
-def search(query: str) -> List[str]:
-    """Search for snapshots matching the query."""
-    db_path = get_db_path()
-    if not db_path.exists():
-        return []
-
-    conn = sqlite3.connect(str(db_path))
-    try:
-        cursor = conn.execute(
-            'SELECT DISTINCT snapshot_id FROM search_index WHERE search_index MATCH ?',
-            (query,)
-        )
-        return [row[0] for row in cursor.fetchall()]
-    except sqlite3.OperationalError:
-        # Table doesn't exist yet
-        return []
-    finally:
-        conn.close()
-
-
-def flush(snapshot_ids: Iterable[str]) -> None:
-    """Remove snapshots from the index."""
-    db_path = get_db_path()
-    if not db_path.exists():
-        return
-
-    conn = sqlite3.connect(str(db_path))
-    try:
-        for snapshot_id in snapshot_ids:
-            conn.execute('DELETE FROM search_index WHERE snapshot_id = ?', (snapshot_id,))
-        conn.commit()
-    except sqlite3.OperationalError:
-        pass  # Table doesn't exist
-    finally:
-        conn.close()
diff --git a/archivebox/plugins/search_backend_sqlite/templates/icon.html b/archivebox/plugins/search_backend_sqlite/templates/icon.html
deleted file mode 100644
index 3c9f8646..00000000
--- a/archivebox/plugins/search_backend_sqlite/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--search_backend_sqlite" title="Search"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><circle cx="11" cy="11" r="5"/><path d="M16 16l4 4"/></svg></span>
diff --git a/archivebox/plugins/search_backend_sqlite/tests/test_sqlite_search.py b/archivebox/plugins/search_backend_sqlite/tests/test_sqlite_search.py
deleted file mode 100644
index d8d6035f..00000000
--- a/archivebox/plugins/search_backend_sqlite/tests/test_sqlite_search.py
+++ /dev/null
@@ -1,351 +0,0 @@
-"""
-Tests for the SQLite FTS5 search backend.
-
-Tests cover:
-1. Search index creation
-2. Indexing snapshots
-3. Search queries with real test data
-4. Flush operations
-5. Edge cases (empty index, special characters)
-"""
-
-import os
-import sqlite3
-import tempfile
-from pathlib import Path
-from unittest.mock import patch
-
-import pytest
-from django.test import TestCase, override_settings
-
-from archivebox.plugins.search_backend_sqlite.search import (
-    get_db_path,
-    search,
-    flush,
-    SQLITEFTS_DB,
-    FTS_TOKENIZERS,
-)
-
-
-class TestSqliteSearchBackend(TestCase):
-    """Test SQLite FTS5 search backend."""
-
-    def setUp(self):
-        """Create a temporary data directory with search index."""
-        self.temp_dir = tempfile.mkdtemp()
-        self.db_path = Path(self.temp_dir) / SQLITEFTS_DB
-
-        # Patch DATA_DIR
-        self.settings_patch = patch(
-            'archivebox.plugins.search_backend_sqlite.search.settings'
-        )
-        self.mock_settings = self.settings_patch.start()
-        self.mock_settings.DATA_DIR = self.temp_dir
-
-        # Create FTS5 table
-        self._create_index()
-
-    def tearDown(self):
-        """Clean up temporary directory."""
-        self.settings_patch.stop()
-        import shutil
-        shutil.rmtree(self.temp_dir, ignore_errors=True)
-
-    def _create_index(self):
-        """Create the FTS5 search index table."""
-        conn = sqlite3.connect(str(self.db_path))
-        try:
-            conn.execute(f'''
-                CREATE VIRTUAL TABLE IF NOT EXISTS search_index
-                USING fts5(
-                    snapshot_id,
-                    url,
-                    title,
-                    content,
-                    tokenize = '{FTS_TOKENIZERS}'
-                )
-            ''')
-            conn.commit()
-        finally:
-            conn.close()
-
-    def _index_snapshot(self, snapshot_id: str, url: str, title: str, content: str):
-        """Add a snapshot to the index."""
-        conn = sqlite3.connect(str(self.db_path))
-        try:
-            conn.execute(
-                'INSERT INTO search_index (snapshot_id, url, title, content) VALUES (?, ?, ?, ?)',
-                (snapshot_id, url, title, content)
-            )
-            conn.commit()
-        finally:
-            conn.close()
-
-    def test_get_db_path(self):
-        """get_db_path should return correct path."""
-        path = get_db_path()
-        self.assertEqual(path, Path(self.temp_dir) / SQLITEFTS_DB)
-
-    def test_search_empty_index(self):
-        """search should return empty list for empty index."""
-        results = search('nonexistent')
-        self.assertEqual(results, [])
-
-    def test_search_no_index_file(self):
-        """search should return empty list when index file doesn't exist."""
-        os.remove(self.db_path)
-        results = search('test')
-        self.assertEqual(results, [])
-
-    def test_search_single_result(self):
-        """search should find matching snapshot."""
-        self._index_snapshot(
-            'snap-001',
-            'https://example.com/page1',
-            'Example Page',
-            'This is example content about testing.'
-        )
-
-        results = search('example')
-        self.assertEqual(len(results), 1)
-        self.assertEqual(results[0], 'snap-001')
-
-    def test_search_multiple_results(self):
-        """search should find all matching snapshots."""
-        self._index_snapshot('snap-001', 'https://example.com/1', 'Python Tutorial', 'Learn Python programming')
-        self._index_snapshot('snap-002', 'https://example.com/2', 'Python Guide', 'Advanced Python concepts')
-        self._index_snapshot('snap-003', 'https://example.com/3', 'JavaScript Basics', 'Learn JavaScript')
-
-        results = search('Python')
-        self.assertEqual(len(results), 2)
-        self.assertIn('snap-001', results)
-        self.assertIn('snap-002', results)
-        self.assertNotIn('snap-003', results)
-
-    def test_search_title_match(self):
-        """search should match against title."""
-        self._index_snapshot('snap-001', 'https://example.com', 'Django Web Framework', 'Content here')
-
-        results = search('Django')
-        self.assertEqual(len(results), 1)
-        self.assertEqual(results[0], 'snap-001')
-
-    def test_search_url_match(self):
-        """search should match against URL."""
-        self._index_snapshot('snap-001', 'https://archivebox.io/docs', 'Title', 'Content')
-
-        results = search('archivebox')
-        self.assertEqual(len(results), 1)
-
-    def test_search_content_match(self):
-        """search should match against content."""
-        self._index_snapshot(
-            'snap-001',
-            'https://example.com',
-            'Generic Title',
-            'This document contains information about cryptography and security.'
-        )
-
-        results = search('cryptography')
-        self.assertEqual(len(results), 1)
-
-    def test_search_case_insensitive(self):
-        """search should be case insensitive."""
-        self._index_snapshot('snap-001', 'https://example.com', 'Title', 'PYTHON programming')
-
-        results = search('python')
-        self.assertEqual(len(results), 1)
-
-    def test_search_stemming(self):
-        """search should use porter stemmer for word stems."""
-        self._index_snapshot('snap-001', 'https://example.com', 'Title', 'Programming concepts')
-
-        # 'program' should match 'programming' with porter stemmer
-        results = search('program')
-        self.assertEqual(len(results), 1)
-
-    def test_search_multiple_words(self):
-        """search should match documents with all words."""
-        self._index_snapshot('snap-001', 'https://example.com', 'Web Development', 'Learn web development skills')
-        self._index_snapshot('snap-002', 'https://example.com', 'Web Design', 'Design beautiful websites')
-
-        results = search('web development')
-        # FTS5 defaults to OR, so both might match
-        # With porter stemmer, both should match 'web'
-        self.assertIn('snap-001', results)
-
-    def test_search_phrase(self):
-        """search should support phrase queries."""
-        self._index_snapshot('snap-001', 'https://example.com', 'Title', 'machine learning algorithms')
-        self._index_snapshot('snap-002', 'https://example.com', 'Title', 'machine algorithms learning')
-
-        # Phrase search with quotes
-        results = search('"machine learning"')
-        self.assertEqual(len(results), 1)
-        self.assertEqual(results[0], 'snap-001')
-
-    def test_search_distinct_results(self):
-        """search should return distinct snapshot IDs."""
-        # Index same snapshot twice (could happen with multiple fields matching)
-        self._index_snapshot('snap-001', 'https://python.org', 'Python', 'Python programming language')
-
-        results = search('Python')
-        self.assertEqual(len(results), 1)
-
-    def test_flush_single(self):
-        """flush should remove snapshot from index."""
-        self._index_snapshot('snap-001', 'https://example.com', 'Title', 'Content')
-        self._index_snapshot('snap-002', 'https://example.com', 'Title', 'Content')
-
-        flush(['snap-001'])
-
-        results = search('Content')
-        self.assertEqual(len(results), 1)
-        self.assertEqual(results[0], 'snap-002')
-
-    def test_flush_multiple(self):
-        """flush should remove multiple snapshots."""
-        self._index_snapshot('snap-001', 'https://example.com', 'Title', 'Test')
-        self._index_snapshot('snap-002', 'https://example.com', 'Title', 'Test')
-        self._index_snapshot('snap-003', 'https://example.com', 'Title', 'Test')
-
-        flush(['snap-001', 'snap-003'])
-
-        results = search('Test')
-        self.assertEqual(len(results), 1)
-        self.assertEqual(results[0], 'snap-002')
-
-    def test_flush_nonexistent(self):
-        """flush should not raise for nonexistent snapshots."""
-        # Should not raise
-        flush(['nonexistent-snap'])
-
-    def test_flush_no_index(self):
-        """flush should not raise when index doesn't exist."""
-        os.remove(self.db_path)
-        # Should not raise
-        flush(['snap-001'])
-
-    def test_search_special_characters(self):
-        """search should handle special characters in queries."""
-        self._index_snapshot('snap-001', 'https://example.com', 'C++ Programming', 'Learn C++ basics')
-
-        # FTS5 handles special chars
-        results = search('C++')
-        # May or may not match depending on tokenizer config
-        # At minimum, should not raise
-        self.assertIsInstance(results, list)
-
-    def test_search_unicode(self):
-        """search should handle unicode content."""
-        self._index_snapshot('snap-001', 'https://example.com', 'Titre Francais', 'cafe resume')
-        self._index_snapshot('snap-002', 'https://example.com', 'Japanese', 'Hello world')
-
-        # With remove_diacritics, 'cafe' should match
-        results = search('cafe')
-        self.assertEqual(len(results), 1)
-
-
-class TestSqliteSearchWithRealData(TestCase):
-    """Integration tests with realistic archived content."""
-
-    def setUp(self):
-        """Create index with realistic test data."""
-        self.temp_dir = tempfile.mkdtemp()
-        self.db_path = Path(self.temp_dir) / SQLITEFTS_DB
-
-        self.settings_patch = patch(
-            'archivebox.plugins.search_backend_sqlite.search.settings'
-        )
-        self.mock_settings = self.settings_patch.start()
-        self.mock_settings.DATA_DIR = self.temp_dir
-
-        # Create index
-        conn = sqlite3.connect(str(self.db_path))
-        try:
-            conn.execute(f'''
-                CREATE VIRTUAL TABLE IF NOT EXISTS search_index
-                USING fts5(
-                    snapshot_id,
-                    url,
-                    title,
-                    content,
-                    tokenize = '{FTS_TOKENIZERS}'
-                )
-            ''')
-            # Index realistic data
-            test_data = [
-                ('snap-001', 'https://github.com/ArchiveBox/ArchiveBox',
-                 'ArchiveBox - Self-hosted web archiving',
-                 'Open source self-hosted web archiving. Collects, saves, and displays various types of content.'),
-                ('snap-002', 'https://docs.python.org/3/tutorial/',
-                 'Python 3 Tutorial',
-                 'An informal introduction to Python. Python is an easy to learn, powerful programming language.'),
-                ('snap-003', 'https://developer.mozilla.org/docs/Web/JavaScript',
-                 'JavaScript - MDN Web Docs',
-                 'JavaScript (JS) is a lightweight, interpreted programming language with first-class functions.'),
-                ('snap-004', 'https://news.ycombinator.com',
-                 'Hacker News',
-                 'Social news website focusing on computer science and entrepreneurship.'),
-                ('snap-005', 'https://en.wikipedia.org/wiki/Web_archiving',
-                 'Web archiving - Wikipedia',
-                 'Web archiving is the process of collecting portions of the World Wide Web to ensure the information is preserved.'),
-            ]
-            conn.executemany(
-                'INSERT INTO search_index (snapshot_id, url, title, content) VALUES (?, ?, ?, ?)',
-                test_data
-            )
-            conn.commit()
-        finally:
-            conn.close()
-
-    def tearDown(self):
-        """Clean up."""
-        self.settings_patch.stop()
-        import shutil
-        shutil.rmtree(self.temp_dir, ignore_errors=True)
-
-    def test_search_archivebox(self):
-        """Search for 'archivebox' should find relevant results."""
-        results = search('archivebox')
-        self.assertIn('snap-001', results)
-
-    def test_search_programming(self):
-        """Search for 'programming' should find Python and JS docs."""
-        results = search('programming')
-        self.assertIn('snap-002', results)
-        self.assertIn('snap-003', results)
-
-    def test_search_web_archiving(self):
-        """Search for 'web archiving' should find relevant results."""
-        results = search('web archiving')
-        # Both ArchiveBox and Wikipedia should match
-        self.assertIn('snap-001', results)
-        self.assertIn('snap-005', results)
-
-    def test_search_github(self):
-        """Search for 'github' should find URL match."""
-        results = search('github')
-        self.assertIn('snap-001', results)
-
-    def test_search_tutorial(self):
-        """Search for 'tutorial' should find Python tutorial."""
-        results = search('tutorial')
-        self.assertIn('snap-002', results)
-
-    def test_flush_and_search(self):
-        """Flushing a snapshot should remove it from search results."""
-        # Verify it's there first
-        results = search('archivebox')
-        self.assertIn('snap-001', results)
-
-        # Flush it
-        flush(['snap-001'])
-
-        # Should no longer be found
-        results = search('archivebox')
-        self.assertNotIn('snap-001', results)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/seo/config.json b/archivebox/plugins/seo/config.json
deleted file mode 100644
index 43fca2ad..00000000
--- a/archivebox/plugins/seo/config.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "required_plugins": ["chrome"],
-  "properties": {
-    "SEO_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_SEO", "USE_SEO"],
-      "description": "Enable SEO metadata capture"
-    },
-    "SEO_TIMEOUT": {
-      "type": "integer",
-      "default": 30,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for SEO capture in seconds"
-    }
-  }
-}
diff --git a/archivebox/plugins/seo/on_Snapshot__38_seo.js b/archivebox/plugins/seo/on_Snapshot__38_seo.js
deleted file mode 100755
index cc107d64..00000000
--- a/archivebox/plugins/seo/on_Snapshot__38_seo.js
+++ /dev/null
@@ -1,169 +0,0 @@
-#!/usr/bin/env node
-/**
- * Extract SEO metadata from a URL.
- *
- * Extracts all <meta> tags including:
- * - og:* (Open Graph)
- * - twitter:*
- * - description, keywords, author
- * - Any other meta tags
- *
- * Usage: on_Snapshot__38_seo.js --url=<url> --snapshot-id=<uuid>
- * Output: Writes seo/seo.json
- *
- * Environment variables:
- *     SAVE_SEO: Enable SEO extraction (default: true)
- */
-
-const fs = require('fs');
-const path = require('path');
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-const puppeteer = require('puppeteer-core');
-
-// Import shared utilities from chrome_utils.js
-const {
-    getEnvBool,
-    getEnvInt,
-    parseArgs,
-    connectToPage,
-    waitForPageLoaded,
-} = require('../chrome/chrome_utils.js');
-
-// Extractor metadata
-const PLUGIN_NAME = 'seo';
-const OUTPUT_DIR = '.';
-const OUTPUT_FILE = 'seo.json';
-const CHROME_SESSION_DIR = '../chrome';
-
-// Extract SEO metadata
-async function extractSeo(url) {
-    // Output directory is current directory (hook already runs in output dir)
-    const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
-    const timeout = getEnvInt('SEO_TIMEOUT', getEnvInt('TIMEOUT', 30)) * 1000;
-    let browser = null;
-
-    try {
-        // Connect to existing Chrome session and get target page
-        const connection = await connectToPage({
-            chromeSessionDir: CHROME_SESSION_DIR,
-            timeoutMs: timeout,
-            puppeteer,
-        });
-        browser = connection.browser;
-        const page = connection.page;
-
-        // Extract all meta tags
-        const seoData = await page.evaluate(() => {
-            const metaTags = Array.from(document.querySelectorAll('meta'));
-            const seo = {
-                url: window.location.href,
-                title: document.title || '',
-            };
-
-            // Process each meta tag
-            metaTags.forEach(tag => {
-                // Get the key (name or property attribute)
-                const key = tag.getAttribute('name') || tag.getAttribute('property') || '';
-                const content = tag.getAttribute('content') || '';
-
-                if (key && content) {
-                    // Store by key
-                    seo[key] = content;
-                }
-            });
-
-            // Also get canonical URL if present
-            const canonical = document.querySelector('link[rel="canonical"]');
-            if (canonical) {
-                seo.canonical = canonical.getAttribute('href');
-            }
-
-            // Get language
-            const htmlLang = document.documentElement.lang;
-            if (htmlLang) {
-                seo.language = htmlLang;
-            }
-
-            return seo;
-        });
-
-        // Write output
-        fs.writeFileSync(outputPath, JSON.stringify(seoData, null, 2));
-
-        return { success: true, output: outputPath, seoData };
-
-    } catch (e) {
-        return { success: false, error: `${e.name}: ${e.message}` };
-    } finally {
-        if (browser) {
-            browser.disconnect();
-        }
-    }
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-    const snapshotId = args.snapshot_id;
-
-    if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__38_seo.js --url=<url> --snapshot-id=<uuid>');
-        process.exit(1);
-    }
-
-    const startTs = new Date();
-    let status = 'failed';
-    let output = null;
-    let error = '';
-
-    try {
-        // Check if enabled
-        if (!getEnvBool('SEO_ENABLED', true)) {
-            console.log('Skipping SEO (SEO_ENABLED=False)');
-            // Output clean JSONL (no RESULT_JSON= prefix)
-            console.log(JSON.stringify({
-                type: 'ArchiveResult',
-                status: 'skipped',
-                output_str: 'SEO_ENABLED=False',
-            }));
-            process.exit(0);
-        }
-
-        const timeout = getEnvInt('SEO_TIMEOUT', getEnvInt('TIMEOUT', 30)) * 1000;
-        await waitForPageLoaded(CHROME_SESSION_DIR, timeout * 4, 200);
-
-        const result = await extractSeo(url);
-
-        if (result.success) {
-            status = 'succeeded';
-            output = result.output;
-            const metaCount = Object.keys(result.seoData).length - 2;  // Subtract url and title
-            console.log(`SEO metadata extracted: ${metaCount} meta tags`);
-        } else {
-            status = 'failed';
-            error = result.error;
-        }
-    } catch (e) {
-        error = `${e.name}: ${e.message}`;
-        status = 'failed';
-    }
-
-    const endTs = new Date();
-
-    if (error) console.error(`ERROR: ${error}`);
-
-    // Output clean JSONL (no RESULT_JSON= prefix)
-    console.log(JSON.stringify({
-        type: 'ArchiveResult',
-        status,
-        output_str: output || error || '',
-    }));
-
-    process.exit(status === 'succeeded' ? 0 : 1);
-}
-
-main().catch(e => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
diff --git a/archivebox/plugins/seo/templates/icon.html b/archivebox/plugins/seo/templates/icon.html
deleted file mode 100644
index 1306d22d..00000000
--- a/archivebox/plugins/seo/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--seo" title="SEO"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M4 16l6-6 4 4 6-6"/><path d="M14 8h6v6"/></svg></span>
diff --git a/archivebox/plugins/seo/tests/test_seo.py b/archivebox/plugins/seo/tests/test_seo.py
deleted file mode 100644
index d0e2f09f..00000000
--- a/archivebox/plugins/seo/tests/test_seo.py
+++ /dev/null
@@ -1,129 +0,0 @@
-"""
-Tests for the SEO plugin.
-
-Tests the real SEO hook with an actual URL to verify
-meta tag extraction.
-"""
-
-import json
-import subprocess
-import sys
-import tempfile
-import shutil
-from pathlib import Path
-
-from django.test import TestCase
-
-# Import chrome test helpers
-sys.path.insert(0, str(Path(__file__).parent.parent.parent / 'chrome' / 'tests'))
-from chrome_test_helpers import (
-    chrome_session,
-    CHROME_NAVIGATE_HOOK,
-    get_plugin_dir,
-    get_hook_script,
-)
-
-
-# Get the path to the SEO hook
-PLUGIN_DIR = get_plugin_dir(__file__)
-SEO_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_seo.*')
-
-
-class TestSEOPlugin(TestCase):
-    """Test the SEO plugin."""
-
-    def test_seo_hook_exists(self):
-        """SEO hook script should exist."""
-        self.assertIsNotNone(SEO_HOOK, "SEO hook not found in plugin directory")
-        self.assertTrue(SEO_HOOK.exists(), f"Hook not found: {SEO_HOOK}")
-
-
-class TestSEOWithChrome(TestCase):
-    """Integration tests for SEO plugin with Chrome."""
-
-    def setUp(self):
-        """Set up test environment."""
-        self.temp_dir = Path(tempfile.mkdtemp())
-
-    def tearDown(self):
-        """Clean up."""
-        shutil.rmtree(self.temp_dir, ignore_errors=True)
-
-    def test_seo_extracts_meta_tags(self):
-        """SEO hook should extract meta tags from a real URL."""
-        test_url = 'https://example.com'
-        snapshot_id = 'test-seo-snapshot'
-
-        with chrome_session(
-            self.temp_dir,
-            crawl_id='test-seo-crawl',
-            snapshot_id=snapshot_id,
-            test_url=test_url,
-            navigate=False,
-            timeout=30,
-        ) as (chrome_process, chrome_pid, snapshot_chrome_dir, env):
-            seo_dir = snapshot_chrome_dir.parent / 'seo'
-            seo_dir.mkdir(exist_ok=True)
-
-            nav_result = subprocess.run(
-                ['node', str(CHROME_NAVIGATE_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                cwd=str(snapshot_chrome_dir),
-                capture_output=True,
-                text=True,
-                timeout=120,
-                env=env
-            )
-            self.assertEqual(nav_result.returncode, 0, f"Navigation failed: {nav_result.stderr}")
-
-            # Run SEO hook with the active Chrome session
-            result = subprocess.run(
-                ['node', str(SEO_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                cwd=str(seo_dir),
-                capture_output=True,
-                text=True,
-                timeout=60,
-                env=env
-            )
-
-            # Check for output file
-            seo_output = seo_dir / 'seo.json'
-
-            seo_data = None
-
-            # Try parsing from file first
-            if seo_output.exists():
-                with open(seo_output) as f:
-                    try:
-                        seo_data = json.load(f)
-                    except json.JSONDecodeError:
-                        pass
-
-            # Try parsing from stdout if not in file
-            if not seo_data:
-                for line in result.stdout.split('\n'):
-                    line = line.strip()
-                    if line.startswith('{'):
-                        try:
-                            record = json.loads(line)
-                            # SEO data typically has title, description, or og: tags
-                            if any(key in record for key in ['title', 'description', 'og:title', 'canonical']):
-                                seo_data = record
-                                break
-                        except json.JSONDecodeError:
-                            continue
-
-            # Verify hook ran successfully
-            self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}")
-            self.assertNotIn('Traceback', result.stderr)
-            self.assertNotIn('Error:', result.stderr)
-
-            # example.com has a title, so we MUST get SEO data
-            self.assertIsNotNone(seo_data, "No SEO data extracted from file or stdout")
-
-            # Verify we got some SEO data
-            has_seo_data = any(key in seo_data for key in ['title', 'description', 'og:title', 'canonical', 'meta'])
-            self.assertTrue(has_seo_data, f"No SEO data extracted: {seo_data}")
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/singlefile/config.json b/archivebox/plugins/singlefile/config.json
deleted file mode 100644
index c522efba..00000000
--- a/archivebox/plugins/singlefile/config.json
+++ /dev/null
@@ -1,77 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "SINGLEFILE_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_SINGLEFILE", "USE_SINGLEFILE"],
-      "description": "Enable SingleFile archiving"
-    },
-    "SINGLEFILE_BINARY": {
-      "type": "string",
-      "default": "single-file",
-      "x-aliases": ["SINGLE_FILE_BINARY"],
-      "description": "Path to single-file binary"
-    },
-    "SINGLEFILE_NODE_BINARY": {
-      "type": "string",
-      "default": "node",
-      "x-fallback": "NODE_BINARY",
-      "description": "Path to Node.js binary"
-    },
-    "SINGLEFILE_CHROME_BINARY": {
-      "type": "string",
-      "default": "",
-      "x-fallback": "CHROME_BINARY",
-      "description": "Path to Chromium binary"
-    },
-    "SINGLEFILE_TIMEOUT": {
-      "type": "integer",
-      "default": 60,
-      "minimum": 10,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for SingleFile in seconds"
-    },
-    "SINGLEFILE_USER_AGENT": {
-      "type": "string",
-      "default": "",
-      "x-fallback": "USER_AGENT",
-      "description": "User agent string"
-    },
-    "SINGLEFILE_COOKIES_FILE": {
-      "type": "string",
-      "default": "",
-      "x-fallback": "COOKIES_FILE",
-      "description": "Path to cookies file"
-    },
-    "SINGLEFILE_CHECK_SSL_VALIDITY": {
-      "type": "boolean",
-      "default": true,
-      "x-fallback": "CHECK_SSL_VALIDITY",
-      "description": "Whether to verify SSL certificates"
-    },
-    "SINGLEFILE_CHROME_ARGS": {
-      "type": "array",
-      "items": {"type": "string"},
-      "default": [],
-      "x-fallback": "CHROME_ARGS",
-      "description": "Chrome command-line arguments for SingleFile"
-    },
-    "SINGLEFILE_ARGS": {
-      "type": "array",
-      "items": {"type": "string"},
-      "default": ["--browser-headless"],
-      "x-aliases": ["SINGLEFILE_DEFAULT_ARGS"],
-      "description": "Default single-file arguments"
-    },
-    "SINGLEFILE_ARGS_EXTRA": {
-      "type": "array",
-      "items": {"type": "string"},
-      "default": [],
-      "x-aliases": ["SINGLEFILE_EXTRA_ARGS"],
-      "description": "Extra arguments to append to single-file command"
-    }
-  }
-}
diff --git a/archivebox/plugins/singlefile/on_Crawl__45_singlefile_install.py b/archivebox/plugins/singlefile/on_Crawl__45_singlefile_install.py
deleted file mode 100755
index f2d22b3e..00000000
--- a/archivebox/plugins/singlefile/on_Crawl__45_singlefile_install.py
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/usr/bin/env python3
-"""
-Emit single-file Binary dependency for the crawl.
-"""
-
-import json
-import os
-import sys
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def output_binary(name: str, binproviders: str, overrides: dict | None = None):
-    """Output Binary JSONL record for a dependency."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,
-        'machine_id': machine_id,
-    }
-    if overrides:
-        record['overrides'] = overrides
-    print(json.dumps(record))
-
-
-def main():
-    singlefile_enabled = get_env_bool('SINGLEFILE_ENABLED', True)
-
-    if not singlefile_enabled:
-        sys.exit(0)
-
-    output_binary(
-        name='single-file',
-        binproviders='npm,env',
-        overrides={'npm': {'packages': ['single-file-cli']}},
-    )
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/singlefile/on_Crawl__82_singlefile_install.js b/archivebox/plugins/singlefile/on_Crawl__82_singlefile_install.js
deleted file mode 100755
index 8abefe4f..00000000
--- a/archivebox/plugins/singlefile/on_Crawl__82_singlefile_install.js
+++ /dev/null
@@ -1,341 +0,0 @@
-#!/usr/bin/env node
-/**
- * SingleFile Extension Plugin
- *
- * Installs and uses the SingleFile Chrome extension for archiving complete web pages.
- * Falls back to single-file-cli if the extension is not available.
- *
- * Extension: https://chromewebstore.google.com/detail/mpiodijhokgodhhofbcjdecpffjipkle
- *
- * Priority: 82 - Must install before Chrome session starts at Crawl level
- * Hook: on_Crawl (runs once per crawl, not per snapshot)
- *
- * This extension automatically:
- * - Saves complete web pages as single HTML files
- * - Inlines all resources (CSS, JS, images, fonts)
- * - Preserves page fidelity better than wget/curl
- * - Works with SPAs and dynamically loaded content
- */
-
-const path = require('path');
-const fs = require('fs');
-const { promisify } = require('util');
-const { exec } = require('child_process');
-
-const execAsync = promisify(exec);
-
-// Import extension utilities
-const extensionUtils = require('../chrome/chrome_utils.js');
-
-// Extension metadata
-const EXTENSION = {
-    webstore_id: 'mpiodijhokgodhhofbcjdecpffjipkle',
-    name: 'singlefile',
-};
-
-// Get extensions directory from environment or use default
-const EXTENSIONS_DIR = process.env.CHROME_EXTENSIONS_DIR ||
-    path.join(process.env.DATA_DIR || './data', 'personas', process.env.ACTIVE_PERSONA || 'Default', 'chrome_extensions');
-
-const CHROME_DOWNLOADS_DIR = process.env.CHROME_DOWNLOADS_DIR ||
-    path.join(process.env.DATA_DIR || './data', 'personas', process.env.ACTIVE_PERSONA || 'Default', 'chrome_downloads');
-
-const OUTPUT_DIR = '.';
-const OUTPUT_FILE = 'singlefile.html';
-
-/**
- * Install the SingleFile extension
- */
-async function installSinglefileExtension() {
-    console.log('[*] Installing SingleFile extension...');
-
-    // Install the extension
-    const extension = await extensionUtils.loadOrInstallExtension(EXTENSION, EXTENSIONS_DIR);
-
-    if (!extension) {
-        console.error('[❌] Failed to install SingleFile extension');
-        return null;
-    }
-
-    console.log('[+] SingleFile extension installed');
-    console.log('[+] Web pages will be saved as single HTML files');
-
-    return extension;
-}
-
-/**
- * Wait for a specified amount of time
- */
-function wait(ms) {
-    return new Promise(resolve => setTimeout(resolve, ms));
-}
-
-/**
- * Save a page using the SingleFile extension
- *
- * @param {Object} page - Puppeteer page object
- * @param {Object} extension - Extension metadata with dispatchAction method
- * @param {Object} options - Additional options
- * @returns {Promise<string|null>} - Path to saved file or null on failure
- */
-async function saveSinglefileWithExtension(page, extension, options = {}) {
-    if (!extension || !extension.version) {
-        throw new Error('SingleFile extension not found or not loaded');
-    }
-
-    const url = await page.url();
-    console.error(`[singlefile] Triggering extension for: ${url}`);
-
-    // Check for unsupported URL schemes
-    const URL_SCHEMES_IGNORED = ['about', 'chrome', 'chrome-extension', 'data', 'javascript', 'blob'];
-    const scheme = url.split(':')[0];
-    if (URL_SCHEMES_IGNORED.includes(scheme)) {
-        console.log(`[⚠️] Skipping SingleFile for URL scheme: ${scheme}`);
-        return null;
-    }
-
-    const downloadsDir = options.downloadsDir || CHROME_DOWNLOADS_DIR;
-    console.error(`[singlefile] Watching downloads dir: ${downloadsDir}`);
-
-    // Ensure downloads directory exists
-    await fs.promises.mkdir(downloadsDir, { recursive: true });
-
-    // Get list of existing files to ignore
-    const files_before = new Set(
-        (await fs.promises.readdir(downloadsDir))
-            .filter(fn => fn.toLowerCase().endsWith('.html') || fn.toLowerCase().endsWith('.htm'))
-    );
-
-    // Output directory is current directory (hook already runs in output dir)
-    const out_path = path.join(OUTPUT_DIR, OUTPUT_FILE);
-
-    console.error(`[singlefile] Saving via extension (${extension.id})...`);
-
-    // Bring page to front (extension action button acts on foreground tab)
-    await page.bringToFront();
-
-    // Trigger the extension's action (toolbar button click)
-    console.error('[singlefile] Dispatching extension action...');
-    try {
-        const actionTimeoutMs = options.actionTimeoutMs || 5000;
-        const actionPromise = extension.dispatchAction();
-        const actionResult = await Promise.race([
-            actionPromise,
-            wait(actionTimeoutMs).then(() => 'timeout'),
-        ]);
-        if (actionResult === 'timeout') {
-            console.error(`[singlefile] Extension action did not resolve within ${actionTimeoutMs}ms, continuing...`);
-        }
-    } catch (err) {
-        console.error(`[singlefile] Extension action error: ${err.message || err}`);
-    }
-
-    // Wait for file to appear in downloads directory
-    const check_delay = 3000; // 3 seconds
-    const max_tries = 10;
-    let files_new = [];
-
-    console.error(`[singlefile] Waiting up to ${(check_delay * max_tries) / 1000}s for download...`);
-    for (let attempt = 0; attempt < max_tries; attempt++) {
-        await wait(check_delay);
-
-        const files_after = (await fs.promises.readdir(downloadsDir))
-            .filter(fn => fn.toLowerCase().endsWith('.html') || fn.toLowerCase().endsWith('.htm'));
-
-        files_new = files_after.filter(file => !files_before.has(file));
-
-        if (files_new.length === 0) {
-            console.error(`[singlefile] No new downloads yet (${attempt + 1}/${max_tries})`);
-            continue;
-        }
-
-        console.error(`[singlefile] New download(s) detected: ${files_new.join(', ')}`);
-
-        // Prefer files that match the URL or have SingleFile markers
-        const url_variants = new Set([url]);
-        if (url.endsWith('/')) {
-            url_variants.add(url.slice(0, -1));
-        } else {
-            url_variants.add(`${url}/`);
-        }
-
-        const scored = [];
-        for (const file of files_new) {
-            const dl_path = path.join(downloadsDir, file);
-            let header = '';
-            try {
-                const dl_text = await fs.promises.readFile(dl_path, 'utf-8');
-                header = dl_text.slice(0, 200000);
-                const stat = await fs.promises.stat(dl_path);
-                console.error(`[singlefile] Download ${file} size=${stat.size} bytes`);
-            } catch (err) {
-                // Skip unreadable files
-                continue;
-            }
-
-            const header_lower = header.toLowerCase();
-            const has_url = Array.from(url_variants).some(v => header.includes(v));
-            const has_singlefile_marker = header_lower.includes('singlefile') || header_lower.includes('single-file');
-            const score = (has_url ? 2 : 0) + (has_singlefile_marker ? 1 : 0);
-            scored.push({ file, dl_path, score });
-        }
-
-        scored.sort((a, b) => b.score - a.score);
-
-        if (scored.length > 0) {
-            const best = scored[0];
-            if (best.score > 0 || files_new.length === 1) {
-                console.error(`[singlefile] Moving download from ${best.file} -> ${out_path}`);
-                await fs.promises.rename(best.dl_path, out_path);
-                const out_stat = await fs.promises.stat(out_path);
-                console.error(`[singlefile] Moved file size=${out_stat.size} bytes`);
-                return out_path;
-            }
-        }
-
-        if (files_new.length > 0) {
-            // Fallback: move the newest file if no clear match found
-            let newest = null;
-            let newest_mtime = -1;
-            for (const file of files_new) {
-                const dl_path = path.join(downloadsDir, file);
-                try {
-                    const stat = await fs.promises.stat(dl_path);
-                    if (stat.mtimeMs > newest_mtime) {
-                        newest_mtime = stat.mtimeMs;
-                        newest = { file, dl_path };
-                    }
-                } catch (err) {}
-            }
-            if (newest) {
-                console.error(`[singlefile] Moving newest download from ${newest.file} -> ${out_path}`);
-                await fs.promises.rename(newest.dl_path, out_path);
-                const out_stat = await fs.promises.stat(out_path);
-                console.error(`[singlefile] Moved file size=${out_stat.size} bytes`);
-                return out_path;
-            }
-        }
-    }
-
-    console.error(`[singlefile] Failed to find SingleFile HTML in ${downloadsDir} after ${(check_delay * max_tries) / 1000}s`);
-    console.error(`[singlefile] New files seen: ${files_new.join(', ')}`);
-    return null;
-}
-
-/**
- * Save a page using single-file-cli (fallback method)
- *
- * @param {string} url - URL to archive
- * @param {Object} options - Additional options
- * @returns {Promise<string|null>} - Path to saved file or null on failure
- */
-async function saveSinglefileWithCLI(url, options = {}) {
-    console.log('[*] Falling back to single-file-cli...');
-
-    // Find single-file binary
-    let binary = null;
-    try {
-        const { stdout } = await execAsync('which single-file');
-        binary = stdout.trim();
-    } catch (err) {
-        console.error('[❌] single-file-cli not found. Install with: npm install -g single-file-cli');
-        return null;
-    }
-
-    // Output directory is current directory (hook already runs in output dir)
-    const out_path = path.join(OUTPUT_DIR, OUTPUT_FILE);
-
-    // Build command
-    const cmd = [
-        binary,
-        '--browser-headless',
-        url,
-        out_path,
-    ];
-
-    // Add optional args
-    if (options.userAgent) {
-        cmd.splice(2, 0, '--browser-user-agent', options.userAgent);
-    }
-    if (options.cookiesFile && fs.existsSync(options.cookiesFile)) {
-        cmd.splice(2, 0, '--browser-cookies-file', options.cookiesFile);
-    }
-    if (options.ignoreSSL) {
-        cmd.splice(2, 0, '--browser-ignore-insecure-certs');
-    }
-
-    // Execute
-    try {
-        const timeout = options.timeout || 120000;
-        await execAsync(cmd.join(' '), { timeout });
-
-        if (fs.existsSync(out_path) && fs.statSync(out_path).size > 0) {
-            console.log(`[+] SingleFile saved via CLI: ${out_path}`);
-            return out_path;
-        }
-
-        console.error('[❌] SingleFile CLI completed but no output file found');
-        return null;
-    } catch (err) {
-        console.error(`[❌] SingleFile CLI error: ${err.message}`);
-        return null;
-    }
-}
-
-/**
- * Main entry point - install extension before archiving
- */
-async function main() {
-    // Check if extension is already cached
-    const cacheFile = path.join(EXTENSIONS_DIR, 'singlefile.extension.json');
-
-    if (fs.existsSync(cacheFile)) {
-        try {
-            const cached = JSON.parse(fs.readFileSync(cacheFile, 'utf-8'));
-            const manifestPath = path.join(cached.unpacked_path, 'manifest.json');
-
-            if (fs.existsSync(manifestPath)) {
-                console.log('[*] SingleFile extension already installed (using cache)');
-                return cached;
-            }
-        } catch (e) {
-            // Cache file corrupted, re-install
-            console.warn('[⚠️] Extension cache corrupted, re-installing...');
-        }
-    }
-
-    // Install extension
-    const extension = await installSinglefileExtension();
-
-    // Export extension metadata for chrome plugin to load
-    if (extension) {
-        // Write extension info to a cache file that chrome plugin can read
-        await fs.promises.mkdir(EXTENSIONS_DIR, { recursive: true });
-        await fs.promises.writeFile(
-            cacheFile,
-            JSON.stringify(extension, null, 2)
-        );
-        console.log(`[+] Extension metadata written to ${cacheFile}`);
-    }
-
-    return extension;
-}
-
-// Export functions for use by other plugins
-module.exports = {
-    EXTENSION,
-    installSinglefileExtension,
-    saveSinglefileWithExtension,
-    saveSinglefileWithCLI,
-};
-
-// Run if executed directly
-if (require.main === module) {
-    main().then(() => {
-        console.log('[✓] SingleFile extension setup complete');
-        process.exit(0);
-    }).catch(err => {
-        console.error('[❌] SingleFile extension setup failed:', err);
-        process.exit(1);
-    });
-}
diff --git a/archivebox/plugins/singlefile/on_Snapshot__50_singlefile.py b/archivebox/plugins/singlefile/on_Snapshot__50_singlefile.py
deleted file mode 100644
index 4d91e0e7..00000000
--- a/archivebox/plugins/singlefile/on_Snapshot__50_singlefile.py
+++ /dev/null
@@ -1,397 +0,0 @@
-#!/usr/bin/env python3
-"""
-Archive a URL using SingleFile.
-
-Usage: on_Snapshot__singlefile.py --url=<url> --snapshot-id=<uuid>
-Output: Writes singlefile.html to $PWD
-
-Environment variables:
-    SINGLEFILE_ENABLED: Enable SingleFile archiving (default: True)
-    SINGLEFILE_BINARY: Path to SingleFile binary (default: single-file)
-    SINGLEFILE_NODE_BINARY: Path to Node.js binary (x-fallback: NODE_BINARY)
-    SINGLEFILE_CHROME_BINARY: Path to Chrome binary (x-fallback: CHROME_BINARY) [unused; shared Chrome session required]
-    SINGLEFILE_TIMEOUT: Timeout in seconds (x-fallback: TIMEOUT)
-    SINGLEFILE_USER_AGENT: User agent string (x-fallback: USER_AGENT)
-    SINGLEFILE_COOKIES_FILE: Path to cookies file (x-fallback: COOKIES_FILE)
-    SINGLEFILE_CHECK_SSL_VALIDITY: Whether to verify SSL certs (x-fallback: CHECK_SSL_VALIDITY)
-    SINGLEFILE_CHROME_ARGS: Chrome command-line arguments (x-fallback: CHROME_ARGS) [unused; shared Chrome session required]
-    SINGLEFILE_ARGS: Default SingleFile arguments (JSON array)
-    SINGLEFILE_ARGS_EXTRA: Extra arguments to append (JSON array)
-"""
-
-import json
-import os
-import subprocess
-import sys
-import threading
-import time
-from urllib.request import urlopen
-from pathlib import Path
-import shutil
-
-import rich_click as click
-
-
-# Extractor metadata
-PLUGIN_NAME = 'singlefile'
-BIN_NAME = 'single-file'
-BIN_PROVIDERS = 'npm,env'
-OUTPUT_DIR = '.'
-OUTPUT_FILE = 'singlefile.html'
-EXTENSION_SAVE_SCRIPT = Path(__file__).parent / 'singlefile_extension_save.js'
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def get_env_int(name: str, default: int = 0) -> int:
-    try:
-        return int(get_env(name, str(default)))
-    except ValueError:
-        return default
-
-
-def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
-    """Parse a JSON array from environment variable."""
-    val = get_env(name, '')
-    if not val:
-        return default if default is not None else []
-    try:
-        result = json.loads(val)
-        if isinstance(result, list):
-            return [str(item) for item in result]
-        return default if default is not None else []
-    except json.JSONDecodeError:
-        return default if default is not None else []
-
-
-STATICFILE_DIR = '../staticfile'
-
-def has_staticfile_output() -> bool:
-    """Check if staticfile extractor already downloaded this URL."""
-    staticfile_dir = Path(STATICFILE_DIR)
-    if not staticfile_dir.exists():
-        return False
-    stdout_log = staticfile_dir / 'stdout.log'
-    if not stdout_log.exists():
-        return False
-    for line in stdout_log.read_text(errors='ignore').splitlines():
-        line = line.strip()
-        if not line.startswith('{'):
-            continue
-        try:
-            record = json.loads(line)
-        except json.JSONDecodeError:
-            continue
-        if record.get('type') == 'ArchiveResult' and record.get('status') == 'succeeded':
-            return True
-    return False
-
-
-# Chrome session directory (relative to extractor output dir)
-# Note: Chrome binary is obtained via CHROME_BINARY env var, not searched for.
-# The centralized Chrome binary search is in chrome_utils.js findChromium().
-CHROME_SESSION_DIR = '../chrome'
-
-
-def get_cdp_url(wait_seconds: float = 0.0) -> str | None:
-    """Get CDP URL from chrome plugin if available."""
-    cdp_file = Path(CHROME_SESSION_DIR) / 'cdp_url.txt'
-    deadline = time.time() + max(wait_seconds, 0.0)
-    while True:
-        if cdp_file.exists():
-            cdp_url = cdp_file.read_text().strip()
-            return cdp_url or None
-        if time.time() >= deadline:
-            return None
-        time.sleep(0.2)
-
-
-def get_port_from_cdp_url(cdp_url: str) -> str | None:
-    """Extract port from CDP WebSocket URL (ws://127.0.0.1:PORT/...)."""
-    import re
-    match = re.search(r':(\d+)/', cdp_url)
-    if match:
-        return match.group(1)
-    return None
-
-
-def is_cdp_server_available(cdp_remote_url: str) -> bool:
-    try:
-        with urlopen(f'{cdp_remote_url}/json/version', timeout=1) as resp:
-            return resp.status == 200
-    except Exception:
-        return False
-
-
-def save_singlefile(url: str, binary: str) -> tuple[bool, str | None, str]:
-    """
-    Archive URL using SingleFile.
-
-    Requires a Chrome session (from chrome plugin) and connects to it via CDP.
-
-    Returns: (success, output_path, error_message)
-    """
-    print(f'[singlefile] CLI mode start url={url}', file=sys.stderr)
-    # Get config from env (with SINGLEFILE_ prefix, x-fallback handled by config loader)
-    timeout = get_env_int('SINGLEFILE_TIMEOUT') or get_env_int('TIMEOUT', 120)
-    user_agent = get_env('SINGLEFILE_USER_AGENT') or get_env('USER_AGENT', '')
-    check_ssl = get_env_bool('SINGLEFILE_CHECK_SSL_VALIDITY', True) if get_env('SINGLEFILE_CHECK_SSL_VALIDITY') else get_env_bool('CHECK_SSL_VALIDITY', True)
-    cookies_file = get_env('SINGLEFILE_COOKIES_FILE') or get_env('COOKIES_FILE', '')
-    singlefile_args = get_env_array('SINGLEFILE_ARGS', [])
-    singlefile_args_extra = get_env_array('SINGLEFILE_ARGS_EXTRA', [])
-    # Chrome args/binary are intentionally ignored because we require a shared Chrome session
-
-    cmd = [binary, *singlefile_args]
-
-    # Try to use existing Chrome session via CDP (prefer HTTP base URL)
-    cdp_wait = min(10, max(1, timeout // 10))
-    cdp_url = get_cdp_url(wait_seconds=cdp_wait)
-    cdp_remote_url = None
-    if cdp_url:
-        if cdp_url.startswith(('http://', 'https://')):
-            cdp_remote_url = cdp_url
-        else:
-            port = get_port_from_cdp_url(cdp_url)
-            if port:
-                cdp_remote_url = f'http://127.0.0.1:{port}'
-            else:
-                cdp_remote_url = cdp_url
-
-    if cdp_remote_url and not is_cdp_server_available(cdp_remote_url):
-        cdp_remote_url = None
-
-    if cdp_remote_url:
-        print(f'[singlefile] Using existing Chrome session: {cdp_remote_url}', file=sys.stderr)
-        cmd.extend(['--browser-server', cdp_remote_url])
-    else:
-        return False, None, 'No Chrome session found (chrome plugin must run first)'
-
-    # SSL handling
-    if not check_ssl:
-        cmd.append('--browser-ignore-insecure-certs')
-
-    if user_agent:
-        cmd.extend(['--user-agent', user_agent])
-
-    if cookies_file and Path(cookies_file).is_file():
-        cmd.extend(['--browser-cookies-file', cookies_file])
-
-    # Add extra args from config
-    if singlefile_args_extra:
-        cmd.extend(singlefile_args_extra)
-
-    # Output directory is current directory (hook already runs in output dir)
-    output_dir = Path(OUTPUT_DIR)
-    output_path = output_dir / OUTPUT_FILE
-
-    cmd.extend([url, str(output_path)])
-    print(f'[singlefile] CLI command: {" ".join(cmd[:6])} ...', file=sys.stderr)
-
-    try:
-        output_lines: list[str] = []
-        process = subprocess.Popen(
-            cmd,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            text=True,
-            bufsize=1,
-        )
-
-        def _read_output() -> None:
-            if not process.stdout:
-                return
-            for line in process.stdout:
-                output_lines.append(line)
-                sys.stderr.write(line)
-
-        reader = threading.Thread(target=_read_output, daemon=True)
-        reader.start()
-
-        try:
-            process.wait(timeout=timeout)
-        except subprocess.TimeoutExpired:
-            process.kill()
-            reader.join(timeout=1)
-            return False, None, f'Timed out after {timeout} seconds'
-
-        reader.join(timeout=1)
-        combined_output = ''.join(output_lines)
-
-        if output_path.exists() and output_path.stat().st_size > 0:
-            return True, str(output_path), ''
-        else:
-            stderr = combined_output
-            if 'ERR_NAME_NOT_RESOLVED' in stderr:
-                return False, None, 'DNS resolution failed'
-            if 'ERR_CONNECTION_REFUSED' in stderr:
-                return False, None, 'Connection refused'
-            detail = (stderr or '').strip()
-            if len(detail) > 2000:
-                detail = detail[:2000]
-            cmd_preview = list(cmd)
-            if '--browser-args' in cmd_preview:
-                idx = cmd_preview.index('--browser-args')
-                if idx + 1 < len(cmd_preview):
-                    cmd_preview[idx + 1] = '<json>'
-            cmd_str = ' '.join(cmd_preview)
-            return False, None, f'SingleFile failed (cmd={cmd_str}): {detail}'
-
-    except subprocess.TimeoutExpired:
-        return False, None, f'Timed out after {timeout} seconds'
-    except Exception as e:
-        return False, None, f'{type(e).__name__}: {e}'
-
-
-def save_singlefile_with_extension(url: str, timeout: int) -> tuple[bool, str | None, str]:
-    """Save using the SingleFile Chrome extension via existing Chrome session."""
-    print(f'[singlefile] Extension mode start url={url}', file=sys.stderr)
-    # Only attempt if chrome session exists
-    cdp_url = get_cdp_url(wait_seconds=min(5, max(1, timeout // 10)))
-    if not cdp_url:
-        print('[singlefile] No Chrome session found (chrome plugin must run first)', file=sys.stderr)
-        return False, None, 'No Chrome session found (chrome plugin must run first)'
-
-    if not EXTENSION_SAVE_SCRIPT.exists():
-        print(f'[singlefile] Missing helper script: {EXTENSION_SAVE_SCRIPT}', file=sys.stderr)
-        return False, None, 'SingleFile extension helper script missing'
-
-    node_binary = get_env('SINGLEFILE_NODE_BINARY') or get_env('NODE_BINARY', 'node')
-    downloads_dir = get_env('CHROME_DOWNLOADS_DIR', '')
-    extensions_dir = get_env('CHROME_EXTENSIONS_DIR', '')
-    cmd = [node_binary, str(EXTENSION_SAVE_SCRIPT), f'--url={url}']
-    print(f'[singlefile] cdp_url={cdp_url}', file=sys.stderr)
-    print(f'[singlefile] node={node_binary}', file=sys.stderr)
-    node_resolved = shutil.which(node_binary) if node_binary else None
-    print(f'[singlefile] node_resolved={node_resolved}', file=sys.stderr)
-    print(f'[singlefile] PATH={os.environ.get("PATH","")}', file=sys.stderr)
-    if downloads_dir:
-        print(f'[singlefile] CHROME_DOWNLOADS_DIR={downloads_dir}', file=sys.stderr)
-    if extensions_dir:
-        print(f'[singlefile] CHROME_EXTENSIONS_DIR={extensions_dir}', file=sys.stderr)
-    print(f'[singlefile] helper_cmd={" ".join(cmd)}', file=sys.stderr)
-
-    try:
-        output_lines: list[str] = []
-        error_lines: list[str] = []
-        process = subprocess.Popen(
-            cmd,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-            bufsize=1,
-        )
-
-        def _read_stream(stream, sink, label: str) -> None:
-            if not stream:
-                return
-            for line in stream:
-                sink.append(line)
-                sys.stderr.write(line)
-                sys.stderr.flush()
-
-        stdout_thread = threading.Thread(target=_read_stream, args=(process.stdout, output_lines, 'stdout'), daemon=True)
-        stderr_thread = threading.Thread(target=_read_stream, args=(process.stderr, error_lines, 'stderr'), daemon=True)
-        stdout_thread.start()
-        stderr_thread.start()
-
-        try:
-            process.wait(timeout=timeout)
-        except subprocess.TimeoutExpired:
-            process.kill()
-            stdout_thread.join(timeout=1)
-            stderr_thread.join(timeout=1)
-            print(f'[singlefile] Extension helper timed out after {timeout}s', file=sys.stderr)
-            return False, None, f'Timed out after {timeout} seconds'
-
-        stdout_thread.join(timeout=1)
-        stderr_thread.join(timeout=1)
-
-        result_stdout = ''.join(output_lines).encode('utf-8', errors='replace')
-        result_stderr = ''.join(error_lines).encode('utf-8', errors='replace')
-        result_returncode = process.returncode
-    except Exception as e:
-        print(f'[singlefile] Extension helper error: {type(e).__name__}: {e}', file=sys.stderr)
-        return False, None, f'{type(e).__name__}: {e}'
-
-    print(f'[singlefile] helper_returncode={result_returncode}', file=sys.stderr)
-    print(f'[singlefile] helper_stdout_len={len(result_stdout or b"")}', file=sys.stderr)
-    print(f'[singlefile] helper_stderr_len={len(result_stderr or b"")}', file=sys.stderr)
-
-    if result_returncode == 0:
-        # Prefer explicit stdout path, fallback to local output file
-        out_text = result_stdout.decode('utf-8', errors='replace').strip()
-        if out_text and Path(out_text).exists():
-            print(f'[singlefile] Extension output: {out_text}', file=sys.stderr)
-            return True, out_text, ''
-        output_path = Path(OUTPUT_DIR) / OUTPUT_FILE
-        if output_path.exists() and output_path.stat().st_size > 0:
-            print(f'[singlefile] Extension output: {output_path}', file=sys.stderr)
-            return True, str(output_path), ''
-        return False, None, 'SingleFile extension completed but no output file found'
-
-    stderr = result_stderr.decode('utf-8', errors='replace').strip()
-    stdout = result_stdout.decode('utf-8', errors='replace').strip()
-    detail = stderr or stdout
-    return False, None, detail or 'SingleFile extension failed'
-
-
-@click.command()
-@click.option('--url', required=True, help='URL to archive')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
-def main(url: str, snapshot_id: str):
-    """Archive a URL using SingleFile."""
-
-    print(f'[singlefile] Hook starting pid={os.getpid()} url={url}', file=sys.stderr)
-    output = None
-    status = 'failed'
-    error = ''
-
-    try:
-        # Check if SingleFile is enabled
-        if not get_env_bool('SINGLEFILE_ENABLED', True):
-            print('Skipping SingleFile (SINGLEFILE_ENABLED=False)', file=sys.stderr)
-            # Feature disabled - no ArchiveResult, just exit
-            sys.exit(0)
-
-        # Check if staticfile extractor already handled this (permanent skip)
-        if has_staticfile_output():
-            print('Skipping SingleFile - staticfile extractor already downloaded this', file=sys.stderr)
-            print(json.dumps({'type': 'ArchiveResult', 'status': 'skipped', 'output_str': 'staticfile already exists'}))
-            sys.exit(0)
-
-        # Prefer SingleFile extension via existing Chrome session
-        timeout = get_env_int('SINGLEFILE_TIMEOUT') or get_env_int('TIMEOUT', 120)
-        success, output, error = save_singlefile_with_extension(url, timeout)
-        status = 'succeeded' if success else 'failed'
-
-    except Exception as e:
-        error = f'{type(e).__name__}: {e}'
-        status = 'failed'
-
-    if error:
-        print(f'ERROR: {error}', file=sys.stderr)
-
-    # Output clean JSONL (no RESULT_JSON= prefix)
-    result = {
-        'type': 'ArchiveResult',
-        'status': status,
-        'output_str': output or error or '',
-    }
-    print(json.dumps(result))
-
-    sys.exit(0 if status == 'succeeded' else 1)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/singlefile/singlefile_extension_save.js b/archivebox/plugins/singlefile/singlefile_extension_save.js
deleted file mode 100644
index 7bb8138e..00000000
--- a/archivebox/plugins/singlefile/singlefile_extension_save.js
+++ /dev/null
@@ -1,207 +0,0 @@
-#!/usr/bin/env node
-/**
- * Save a page using the SingleFile Chrome extension via an existing Chrome session.
- *
- * Usage: singlefile_extension_save.js --url=<url>
- * Output: prints saved file path on success
- */
-
-const fs = require('fs');
-const path = require('path');
-
-const CHROME_SESSION_DIR = '../chrome';
-const DOWNLOADS_DIR = process.env.CHROME_DOWNLOADS_DIR ||
-    path.join(process.env.DATA_DIR || './data', 'personas', process.env.ACTIVE_PERSONA || 'Default', 'chrome_downloads');
-
-process.env.CHROME_DOWNLOADS_DIR = DOWNLOADS_DIR;
-
-async function setDownloadDir(page, downloadDir) {
-    try {
-        await fs.promises.mkdir(downloadDir, { recursive: true });
-        const client = await page.target().createCDPSession();
-        try {
-            await client.send('Page.setDownloadBehavior', {
-                behavior: 'allow',
-                downloadPath: downloadDir,
-            });
-        } catch (err) {
-            // Fallback for newer protocol versions
-            await client.send('Browser.setDownloadBehavior', {
-                behavior: 'allow',
-                downloadPath: downloadDir,
-            });
-        }
-    } catch (err) {
-        console.error(`[⚠️] Failed to set download directory: ${err.message || err}`);
-    }
-}
-
-function parseArgs() {
-    const args = {};
-    process.argv.slice(2).forEach((arg) => {
-        if (arg.startsWith('--')) {
-            const [key, ...valueParts] = arg.slice(2).split('=');
-            args[key.replace(/-/g, '_')] = valueParts.join('=') || true;
-        }
-    });
-    return args;
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-
-    if (!url) {
-        console.error('Usage: singlefile_extension_save.js --url=<url>');
-        process.exit(1);
-    }
-
-    console.error(`[singlefile] helper start url=${url}`);
-    console.error(`[singlefile] downloads_dir=${DOWNLOADS_DIR}`);
-    if (process.env.CHROME_EXTENSIONS_DIR) {
-        console.error(`[singlefile] extensions_dir=${process.env.CHROME_EXTENSIONS_DIR}`);
-    }
-
-    try {
-        console.error('[singlefile] loading dependencies...');
-        const puppeteer = require('puppeteer-core');
-        const chromeUtils = require('../chrome/chrome_utils.js');
-        const {
-            EXTENSION,
-            saveSinglefileWithExtension,
-        } = require('./on_Crawl__82_singlefile_install.js');
-        console.error('[singlefile] dependencies loaded');
-
-        // Ensure extension is installed and metadata is cached
-        console.error('[singlefile] ensuring extension cache...');
-        const extension = await chromeUtils.installExtensionWithCache(
-            EXTENSION,
-            { extensionsDir: process.env.CHROME_EXTENSIONS_DIR }
-        );
-        if (!extension) {
-            console.error('[❌] SingleFile extension not installed');
-            process.exit(2);
-        }
-        if (extension.unpacked_path) {
-            const runtimeId = chromeUtils.getExtensionId(extension.unpacked_path);
-            if (runtimeId) {
-                extension.id = runtimeId;
-            }
-        }
-        console.error(`[singlefile] extension ready id=${extension.id} version=${extension.version}`);
-
-        // Connect to existing Chrome session
-        console.error('[singlefile] connecting to chrome session...');
-        const { browser, page } = await chromeUtils.connectToPage({
-            chromeSessionDir: CHROME_SESSION_DIR,
-            timeoutMs: 60000,
-            puppeteer,
-        });
-        console.error('[singlefile] connected to chrome');
-
-        try {
-            // Ensure CDP target discovery is enabled so service_worker targets appear
-            try {
-                const client = await page.createCDPSession();
-                await client.send('Target.setDiscoverTargets', { discover: true });
-                await client.send('Target.setAutoAttach', { autoAttach: true, waitForDebuggerOnStart: false, flatten: true });
-            } catch (err) {
-                console.error(`[singlefile] failed to enable target discovery: ${err.message || err}`);
-            }
-
-            // Wait for extension target to be available, then attach dispatchAction
-            console.error('[singlefile] waiting for extension target...');
-            const deadline = Date.now() + 30000;
-            let matchTarget = null;
-            let matchInfo = null;
-            let lastLog = 0;
-            const wantedName = (extension.name || 'singlefile').toLowerCase();
-
-            while (Date.now() < deadline && !matchTarget) {
-                const targets = browser.targets();
-                for (const target of targets) {
-                    const info = await chromeUtils.isTargetExtension(target);
-                    if (!info?.target_is_extension || !info?.extension_id) {
-                        continue;
-                    }
-                    const manifestName = (info.manifest_name || '').toLowerCase();
-                    const targetUrl = (info.target_url || '').toLowerCase();
-                    const nameMatches = manifestName.includes(wantedName) || manifestName.includes('singlefile') || manifestName.includes('single-file');
-                    const urlMatches = targetUrl.includes('singlefile') || targetUrl.includes('single-file') || targetUrl.includes('single-file-extension');
-                    if (nameMatches || urlMatches) {
-                        matchTarget = target;
-                        matchInfo = info;
-                        break;
-                    }
-                }
-
-                if (!matchTarget) {
-                    if (Date.now() - lastLog > 5000) {
-                        const targetsSummary = [];
-                        for (const target of targets) {
-                            const info = await chromeUtils.isTargetExtension(target);
-                            if (!info?.target_is_extension) {
-                                continue;
-                            }
-                            targetsSummary.push({
-                                type: info.target_type,
-                                url: info.target_url,
-                                extensionId: info.extension_id,
-                                manifestName: info.manifest_name,
-                            });
-                        }
-                        console.error(`[singlefile] waiting... targets total=${targets.length} extensions=${targetsSummary.length} details=${JSON.stringify(targetsSummary)}`);
-                        lastLog = Date.now();
-                    }
-                    await new Promise(r => setTimeout(r, 500));
-                }
-            }
-
-            if (!matchTarget || !matchInfo) {
-                const targets = chromeUtils.getExtensionTargets(browser);
-                console.error(`[singlefile] extension target not found (name=${extension.name})`);
-                console.error(`[singlefile] available targets: ${JSON.stringify(targets)}`);
-                await browser.disconnect();
-                process.exit(5);
-            }
-
-            // Use the runtime extension id from the matched target
-            extension.id = matchInfo.extension_id;
-
-            console.error('[singlefile] loading extension from target...');
-            await chromeUtils.loadExtensionFromTarget([extension], matchTarget);
-            if (typeof extension.dispatchAction !== 'function') {
-                const targets = chromeUtils.getExtensionTargets(browser);
-                console.error(`[singlefile] extension dispatchAction missing for id=${extension.id}`);
-                console.error(`[singlefile] available targets: ${JSON.stringify(targets)}`);
-                await browser.disconnect();
-                process.exit(6);
-            }
-            console.error('[singlefile] setting download dir...');
-            await setDownloadDir(page, DOWNLOADS_DIR);
-
-            console.error('[singlefile] triggering save via extension...');
-            const output = await saveSinglefileWithExtension(page, extension, { downloadsDir: DOWNLOADS_DIR });
-            if (output && fs.existsSync(output)) {
-                console.error(`[singlefile] saved: ${output}`);
-                console.log(output);
-                await browser.disconnect();
-                process.exit(0);
-            }
-
-            console.error('[❌] SingleFile extension did not produce output');
-            await browser.disconnect();
-            process.exit(3);
-        } catch (err) {
-            await browser.disconnect();
-            throw err;
-        }
-    } catch (err) {
-        console.error(`[❌] ${err.message || err}`);
-        process.exit(4);
-    }
-}
-
-if (require.main === module) {
-    main();
-}
diff --git a/archivebox/plugins/singlefile/templates/card.html b/archivebox/plugins/singlefile/templates/card.html
deleted file mode 100644
index 5d7e5614..00000000
--- a/archivebox/plugins/singlefile/templates/card.html
+++ /dev/null
@@ -1,8 +0,0 @@
-<!-- Singlefile thumbnail - scaled down iframe preview of archived HTML -->
-<div class="extractor-thumbnail singlefile-thumbnail" style="width: 100%; height: 100px; overflow: hidden; background: #fff;">
-    <iframe src="{{ output_path }}"
-            style="width: 400%; height: 400px; transform: scale(0.25); transform-origin: top left; pointer-events: none; border: none;"
-            loading="lazy"
-            sandbox="allow-same-origin">
-    </iframe>
-</div>
diff --git a/archivebox/plugins/singlefile/templates/icon.html b/archivebox/plugins/singlefile/templates/icon.html
deleted file mode 100644
index cd055f8b..00000000
--- a/archivebox/plugins/singlefile/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--singlefile" title="SingleFile"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M14 3H6a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V9z"/><path d="M14 3v6h6"/><path d="M9 14l2 2 4-4"/></svg></span>
diff --git a/archivebox/plugins/singlefile/tests/test_singlefile.py b/archivebox/plugins/singlefile/tests/test_singlefile.py
deleted file mode 100644
index 8de0a163..00000000
--- a/archivebox/plugins/singlefile/tests/test_singlefile.py
+++ /dev/null
@@ -1,304 +0,0 @@
-"""
-Integration tests for singlefile plugin
-
-Tests verify:
-1. Hook scripts exist with correct naming
-2. CLI-based singlefile extraction works
-3. Dependencies available via abx-pkg
-4. Output contains valid HTML
-5. Connects to Chrome session via CDP when available
-6. Works with extensions loaded (ublock, etc.)
-"""
-
-import json
-import os
-import subprocess
-import sys
-import tempfile
-from pathlib import Path
-
-import pytest
-
-from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-    get_test_env,
-    get_plugin_dir,
-    get_hook_script,
-    chrome_session,
-    cleanup_chrome,
-)
-
-
-PLUGIN_DIR = get_plugin_dir(__file__)
-SNAPSHOT_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_singlefile.py')
-INSTALL_SCRIPT = PLUGIN_DIR / 'on_Crawl__82_singlefile_install.js'
-TEST_URL = "https://example.com"
-
-
-def test_snapshot_hook_exists():
-    """Verify snapshot extraction hook exists"""
-    assert SNAPSHOT_HOOK is not None and SNAPSHOT_HOOK.exists(), f"Snapshot hook not found in {PLUGIN_DIR}"
-
-
-def test_snapshot_hook_priority():
-    """Test that snapshot hook has correct priority (50)"""
-    filename = SNAPSHOT_HOOK.name
-    assert "50" in filename, "SingleFile snapshot hook should have priority 50"
-    assert filename.startswith("on_Snapshot__50_"), "Should follow priority naming convention"
-
-
-def test_verify_deps_with_abx_pkg():
-    """Verify dependencies are available via abx-pkg."""
-    from abx_pkg import Binary, EnvProvider
-
-    EnvProvider.model_rebuild()
-
-    # Verify node is available
-    node_binary = Binary(name='node', binproviders=[EnvProvider()])
-    node_loaded = node_binary.load()
-    assert node_loaded and node_loaded.abspath, "Node.js required for singlefile plugin"
-
-
-def test_singlefile_cli_archives_example_com():
-    """Test that singlefile archives example.com and produces valid HTML."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        data_dir = tmpdir / 'data'
-        extensions_dir = data_dir / 'personas' / 'Default' / 'chrome_extensions'
-        downloads_dir = data_dir / 'personas' / 'Default' / 'chrome_downloads'
-        user_data_dir = data_dir / 'personas' / 'Default' / 'chrome_user_data'
-        extensions_dir.mkdir(parents=True, exist_ok=True)
-        downloads_dir.mkdir(parents=True, exist_ok=True)
-        user_data_dir.mkdir(parents=True, exist_ok=True)
-
-        env_install = os.environ.copy()
-        env_install.update({
-            'DATA_DIR': str(data_dir),
-            'CHROME_EXTENSIONS_DIR': str(extensions_dir),
-            'CHROME_DOWNLOADS_DIR': str(downloads_dir),
-        })
-
-        result = subprocess.run(
-            ['node', str(INSTALL_SCRIPT)],
-            capture_output=True,
-            text=True,
-            env=env_install,
-            timeout=120,
-        )
-        assert result.returncode == 0, f"Extension install failed: {result.stderr}"
-
-        old_env = os.environ.copy()
-        os.environ['CHROME_USER_DATA_DIR'] = str(user_data_dir)
-        os.environ['CHROME_DOWNLOADS_DIR'] = str(downloads_dir)
-        os.environ['CHROME_EXTENSIONS_DIR'] = str(extensions_dir)
-        try:
-            with chrome_session(
-                tmpdir=tmpdir,
-                crawl_id='singlefile-cli-crawl',
-                snapshot_id='singlefile-cli-snap',
-                test_url=TEST_URL,
-                navigate=True,
-                timeout=30,
-            ) as (_chrome_proc, _chrome_pid, snapshot_chrome_dir, env):
-                env['SINGLEFILE_ENABLED'] = 'true'
-                env['CHROME_EXTENSIONS_DIR'] = str(extensions_dir)
-                env['CHROME_DOWNLOADS_DIR'] = str(downloads_dir)
-
-                singlefile_output_dir = snapshot_chrome_dir.parent / 'singlefile'
-                singlefile_output_dir.mkdir(parents=True, exist_ok=True)
-
-                # Run singlefile snapshot hook
-                result = subprocess.run(
-                    [sys.executable, str(SNAPSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=test789'],
-                    cwd=singlefile_output_dir,
-                    capture_output=True,
-                    text=True,
-                    env=env,
-                    timeout=120,
-                )
-        finally:
-            os.environ.clear()
-            os.environ.update(old_env)
-
-        assert result.returncode == 0, f"Hook execution failed: {result.stderr}"
-
-        # Verify output file exists
-        output_file = singlefile_output_dir / 'singlefile.html'
-        assert output_file.exists(), f"singlefile.html not created. stdout: {result.stdout}, stderr: {result.stderr}"
-
-        # Verify it contains real HTML
-        html_content = output_file.read_text()
-        assert len(html_content) > 500, "Output file too small to be valid HTML"
-        assert '<!DOCTYPE html>' in html_content or '<html' in html_content, "Output should contain HTML doctype or html tag"
-        assert 'Example Domain' in html_content, "Output should contain example.com content"
-
-
-def test_singlefile_with_chrome_session():
-    """Test singlefile connects to existing Chrome session via CDP.
-
-    When a Chrome session exists (chrome/cdp_url.txt), singlefile should
-    connect to it instead of launching a new Chrome instance.
-    """
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Set up Chrome session using shared helper
-        with chrome_session(
-            tmpdir=tmpdir,
-            crawl_id='singlefile-test-crawl',
-            snapshot_id='singlefile-test-snap',
-            test_url=TEST_URL,
-            navigate=False,  # Don't navigate, singlefile will do that
-            timeout=20,
-        ) as (chrome_launch_process, chrome_pid, snapshot_chrome_dir, env):
-            # singlefile looks for ../chrome/cdp_url.txt relative to cwd
-            # So we need to run from a directory that has ../chrome pointing to our chrome dir
-            singlefile_output_dir = tmpdir / 'snapshot' / 'singlefile'
-            singlefile_output_dir.mkdir(parents=True, exist_ok=True)
-
-            # Create symlink so singlefile can find the chrome session
-            chrome_link = singlefile_output_dir.parent / 'chrome'
-            if not chrome_link.exists():
-                chrome_link.symlink_to(tmpdir / 'crawl' / 'chrome')
-
-            # Use env from chrome_session
-            env['SINGLEFILE_ENABLED'] = 'true'
-
-            # Run singlefile - it should find and use the existing Chrome session
-            result = subprocess.run(
-                [sys.executable, str(SNAPSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=singlefile-test-snap'],
-                cwd=str(singlefile_output_dir),
-                capture_output=True,
-                text=True,
-                env=env,
-                timeout=120
-            )
-
-            # Verify output
-            output_file = singlefile_output_dir / 'singlefile.html'
-            if output_file.exists():
-                html_content = output_file.read_text()
-                assert len(html_content) > 500, "Output file too small"
-                assert 'Example Domain' in html_content, "Should contain example.com content"
-            else:
-                # If singlefile couldn't connect to Chrome, it may have failed
-                # Check if it mentioned browser-server in its args (indicating it tried to use CDP)
-                assert result.returncode == 0 or 'browser-server' in result.stderr or 'cdp' in result.stderr.lower(), \
-                    f"Singlefile should attempt CDP connection. stderr: {result.stderr}"
-
-
-def test_singlefile_with_extension_uses_existing_chrome():
-    """Test SingleFile uses the Chrome extension via existing session (CLI fallback disabled)."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        data_dir = tmpdir / 'data'
-        extensions_dir = data_dir / 'personas' / 'Default' / 'chrome_extensions'
-        downloads_dir = data_dir / 'personas' / 'Default' / 'chrome_downloads'
-        user_data_dir = data_dir / 'personas' / 'Default' / 'chrome_user_data'
-        extensions_dir.mkdir(parents=True, exist_ok=True)
-        downloads_dir.mkdir(parents=True, exist_ok=True)
-        user_data_dir.mkdir(parents=True, exist_ok=True)
-
-        env_install = os.environ.copy()
-        env_install.update({
-            'DATA_DIR': str(data_dir),
-            'CHROME_EXTENSIONS_DIR': str(extensions_dir),
-            'CHROME_DOWNLOADS_DIR': str(downloads_dir),
-        })
-
-        # Install SingleFile extension cache before launching Chrome
-        result = subprocess.run(
-            ['node', str(INSTALL_SCRIPT)],
-            capture_output=True,
-            text=True,
-            env=env_install,
-            timeout=120
-        )
-        assert result.returncode == 0, f"Extension install failed: {result.stderr}"
-
-        # Launch Chrome session with extensions loaded
-        old_env = os.environ.copy()
-        os.environ['CHROME_USER_DATA_DIR'] = str(user_data_dir)
-        os.environ['CHROME_DOWNLOADS_DIR'] = str(downloads_dir)
-        os.environ['CHROME_EXTENSIONS_DIR'] = str(extensions_dir)
-        try:
-            with chrome_session(
-                tmpdir=tmpdir,
-                crawl_id='singlefile-ext-crawl',
-                snapshot_id='singlefile-ext-snap',
-                test_url=TEST_URL,
-                navigate=True,
-                timeout=30,
-            ) as (_chrome_proc, _chrome_pid, snapshot_chrome_dir, env):
-                singlefile_output_dir = tmpdir / 'snapshot' / 'singlefile'
-                singlefile_output_dir.mkdir(parents=True, exist_ok=True)
-
-                # Ensure ../chrome points to snapshot chrome session (contains target_id.txt)
-                chrome_dir = singlefile_output_dir.parent / 'chrome'
-                if not chrome_dir.exists():
-                    chrome_dir.symlink_to(snapshot_chrome_dir)
-
-                env['SINGLEFILE_ENABLED'] = 'true'
-                env['SINGLEFILE_BINARY'] = '/nonexistent/single-file'  # force extension path
-                env['CHROME_EXTENSIONS_DIR'] = str(extensions_dir)
-                env['CHROME_DOWNLOADS_DIR'] = str(downloads_dir)
-                env['CHROME_HEADLESS'] = 'false'
-
-                # Track downloads dir state before run to ensure file is created then moved out
-                downloads_before = set(downloads_dir.glob('*.html'))
-                downloads_mtime_before = downloads_dir.stat().st_mtime_ns
-
-                result = subprocess.run(
-                    [sys.executable, str(SNAPSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=singlefile-ext-snap'],
-                    cwd=str(singlefile_output_dir),
-                    capture_output=True,
-                    text=True,
-                    env=env,
-                    timeout=120
-                )
-
-                assert result.returncode == 0, f"SingleFile extension run failed: {result.stderr}"
-
-                output_file = singlefile_output_dir / 'singlefile.html'
-                assert output_file.exists(), f"singlefile.html not created. stdout: {result.stdout}, stderr: {result.stderr}"
-                html_content = output_file.read_text(errors='ignore')
-                assert 'Example Domain' in html_content, "Output should contain example.com content"
-
-                # Verify download moved out of downloads dir
-                downloads_after = set(downloads_dir.glob('*.html'))
-                new_downloads = downloads_after - downloads_before
-                downloads_mtime_after = downloads_dir.stat().st_mtime_ns
-                assert downloads_mtime_after != downloads_mtime_before, "Downloads dir should be modified during extension save"
-                assert not new_downloads, f"SingleFile download should be moved out of downloads dir, found: {new_downloads}"
-        finally:
-            os.environ.clear()
-            os.environ.update(old_env)
-
-
-def test_singlefile_disabled_skips():
-    """Test that SINGLEFILE_ENABLED=False exits without JSONL."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        env = get_test_env()
-        env['SINGLEFILE_ENABLED'] = 'False'
-
-        result = subprocess.run(
-            [sys.executable, str(SNAPSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=test-disabled'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=30
-        )
-
-        assert result.returncode == 0, f"Should exit 0 when disabled: {result.stderr}"
-
-        # Should NOT emit JSONL when disabled
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when disabled, but got: {jsonl_lines}"
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/ssl/config.json b/archivebox/plugins/ssl/config.json
deleted file mode 100644
index d83dbfd3..00000000
--- a/archivebox/plugins/ssl/config.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "required_plugins": ["chrome"],
-  "properties": {
-    "SSL_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_SSL", "USE_SSL"],
-      "description": "Enable SSL certificate capture"
-    },
-    "SSL_TIMEOUT": {
-      "type": "integer",
-      "default": 30,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for SSL capture in seconds"
-    }
-  }
-}
diff --git a/archivebox/plugins/ssl/on_Snapshot__23_ssl.bg.js b/archivebox/plugins/ssl/on_Snapshot__23_ssl.bg.js
deleted file mode 100755
index 6559d9fd..00000000
--- a/archivebox/plugins/ssl/on_Snapshot__23_ssl.bg.js
+++ /dev/null
@@ -1,197 +0,0 @@
-#!/usr/bin/env node
-/**
- * Extract SSL/TLS certificate details from a URL.
- *
- * This hook sets up CDP listeners BEFORE chrome_navigate loads the page,
- * then waits for navigation to complete. The listener captures SSL details
- * during the navigation request.
- *
- * Usage: on_Snapshot__23_ssl.js --url=<url> --snapshot-id=<uuid>
- * Output: Writes ssl.jsonl
- */
-
-const fs = require('fs');
-const path = require('path');
-
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-
-const puppeteer = require('puppeteer-core');
-
-// Import shared utilities from chrome_utils.js
-const {
-    getEnvBool,
-    getEnvInt,
-    parseArgs,
-    connectToPage,
-    waitForPageLoaded,
-} = require('../chrome/chrome_utils.js');
-
-const PLUGIN_NAME = 'ssl';
-const OUTPUT_DIR = '.';
-const OUTPUT_FILE = 'ssl.jsonl';
-const CHROME_SESSION_DIR = '../chrome';
-
-let browser = null;
-let page = null;
-let client = null;
-let sslCaptured = false;
-let shuttingDown = false;
-
-async function setupListener(url) {
-    const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
-    const timeout = getEnvInt('SSL_TIMEOUT', 30) * 1000;
-    let targetHost = null;
-
-    // Only extract SSL for HTTPS URLs
-    if (!url.startsWith('https://')) {
-        throw new Error('URL is not HTTPS');
-    }
-
-    try {
-        targetHost = new URL(url).host;
-    } catch (e) {
-        targetHost = null;
-    }
-
-    // Connect to Chrome page using shared utility
-    const { browser, page } = await connectToPage({
-        chromeSessionDir: CHROME_SESSION_DIR,
-        timeoutMs: timeout,
-        puppeteer,
-    });
-
-    client = await page.target().createCDPSession();
-    await client.send('Network.enable');
-
-    client.on('Network.responseReceived', (params) => {
-        try {
-            if (sslCaptured) return;
-            if (params.type && params.type !== 'Document') return;
-            const response = params.response || {};
-            const responseUrl = response.url || '';
-            if (!responseUrl.startsWith('http')) return;
-
-            if (targetHost) {
-                try {
-                    const responseHost = new URL(responseUrl).host;
-                    if (responseHost !== targetHost) return;
-                } catch (e) {
-                    // Ignore URL parse errors, fall through
-                }
-            }
-
-            const securityDetails = response.securityDetails || null;
-            let sslInfo = { url: responseUrl };
-
-            if (securityDetails) {
-                sslInfo.protocol = securityDetails.protocol;
-                sslInfo.subjectName = securityDetails.subjectName;
-                sslInfo.issuer = securityDetails.issuer;
-                sslInfo.validFrom = securityDetails.validFrom;
-                sslInfo.validTo = securityDetails.validTo;
-                sslInfo.certificateId = securityDetails.subjectName;
-                sslInfo.securityState = response.securityState || 'secure';
-                sslInfo.schemeIsCryptographic = true;
-
-                const sanList = securityDetails.sanList;
-                if (sanList && sanList.length > 0) {
-                    sslInfo.subjectAlternativeNames = sanList;
-                }
-            } else if (responseUrl.startsWith('https://')) {
-                sslInfo.securityState = response.securityState || 'unknown';
-                sslInfo.schemeIsCryptographic = true;
-                sslInfo.error = 'No security details available';
-            } else {
-                sslInfo.securityState = 'insecure';
-                sslInfo.schemeIsCryptographic = false;
-            }
-
-            fs.writeFileSync(outputPath, JSON.stringify(sslInfo, null, 2));
-            sslCaptured = true;
-        } catch (e) {
-            // Ignore errors
-        }
-    });
-
-    return { browser, page };
-}
-
-function emitResult(status = 'succeeded') {
-    if (shuttingDown) return;
-    shuttingDown = true;
-
-    const outputStr = sslCaptured ? OUTPUT_FILE : OUTPUT_FILE;
-    console.log(JSON.stringify({
-        type: 'ArchiveResult',
-        status,
-        output_str: outputStr,
-    }));
-}
-
-async function handleShutdown(signal) {
-    console.error(`\nReceived ${signal}, emitting final results...`);
-    emitResult('succeeded');
-    if (browser) {
-        try {
-            browser.disconnect();
-        } catch (e) {}
-    }
-    process.exit(0);
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-    const snapshotId = args.snapshot_id;
-
-    if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__23_ssl.js --url=<url> --snapshot-id=<uuid>');
-        process.exit(1);
-    }
-
-    if (!getEnvBool('SSL_ENABLED', true)) {
-        console.error('Skipping (SSL_ENABLED=False)');
-        console.log(JSON.stringify({type: 'ArchiveResult', status: 'skipped', output_str: 'SSL_ENABLED=False'}));
-        process.exit(0);
-    }
-
-    try {
-        // Set up listener BEFORE navigation
-        const connection = await setupListener(url);
-        browser = connection.browser;
-        page = connection.page;
-
-        // Register signal handlers for graceful shutdown
-        process.on('SIGTERM', () => handleShutdown('SIGTERM'));
-        process.on('SIGINT', () => handleShutdown('SIGINT'));
-
-        // Wait for chrome_navigate to complete (non-fatal)
-        try {
-            const timeout = getEnvInt('SSL_TIMEOUT', 30) * 1000;
-            await waitForPageLoaded(CHROME_SESSION_DIR, timeout * 4);
-        } catch (e) {
-            console.error(`WARN: ${e.message}`);
-        }
-
-        // console.error('SSL listener active, waiting for cleanup signal...');
-        await new Promise(() => {}); // Keep alive until SIGTERM
-        return;
-
-    } catch (e) {
-        const error = `${e.name}: ${e.message}`;
-        console.error(`ERROR: ${error}`);
-
-        console.log(JSON.stringify({
-            type: 'ArchiveResult',
-            status: 'failed',
-            output_str: error,
-        }));
-        process.exit(1);
-    }
-}
-
-main().catch(e => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
diff --git a/archivebox/plugins/ssl/templates/icon.html b/archivebox/plugins/ssl/templates/icon.html
deleted file mode 100644
index 1707e8b9..00000000
--- a/archivebox/plugins/ssl/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--ssl" title="SSL"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><rect x="5" y="11" width="14" height="9" rx="2"/><path d="M8 11V8a4 4 0 0 1 8 0v3"/></svg></span>
diff --git a/archivebox/plugins/ssl/tests/test_ssl.py b/archivebox/plugins/ssl/tests/test_ssl.py
deleted file mode 100644
index 6f8375c1..00000000
--- a/archivebox/plugins/ssl/tests/test_ssl.py
+++ /dev/null
@@ -1,147 +0,0 @@
-"""
-Tests for the SSL plugin.
-
-Tests the real SSL hook with an actual HTTPS URL to verify
-certificate information extraction.
-"""
-
-import json
-import shutil
-import subprocess
-import sys
-import tempfile
-import time
-from pathlib import Path
-
-from django.test import TestCase
-
-# Import chrome test helpers
-sys.path.insert(0, str(Path(__file__).parent.parent.parent / 'chrome' / 'tests'))
-from chrome_test_helpers import (
-    chrome_session,
-    CHROME_NAVIGATE_HOOK,
-    get_plugin_dir,
-    get_hook_script,
-)
-
-
-# Get the path to the SSL hook
-PLUGIN_DIR = get_plugin_dir(__file__)
-SSL_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_ssl.*')
-
-
-class TestSSLPlugin(TestCase):
-    """Test the SSL plugin with real HTTPS URLs."""
-
-    def test_ssl_hook_exists(self):
-        """SSL hook script should exist."""
-        self.assertIsNotNone(SSL_HOOK, "SSL hook not found in plugin directory")
-        self.assertTrue(SSL_HOOK.exists(), f"Hook not found: {SSL_HOOK}")
-
-
-class TestSSLWithChrome(TestCase):
-    """Integration tests for SSL plugin with Chrome."""
-
-    def setUp(self):
-        """Set up test environment."""
-        self.temp_dir = Path(tempfile.mkdtemp())
-
-    def tearDown(self):
-        """Clean up."""
-        shutil.rmtree(self.temp_dir, ignore_errors=True)
-
-    def test_ssl_extracts_certificate_from_https_url(self):
-        """SSL hook should extract certificate info from a real HTTPS URL."""
-        test_url = 'https://example.com'
-        snapshot_id = 'test-ssl-snapshot'
-
-        with chrome_session(
-            self.temp_dir,
-            crawl_id='test-ssl-crawl',
-            snapshot_id=snapshot_id,
-            test_url=test_url,
-            navigate=False,
-            timeout=30,
-        ) as (chrome_process, chrome_pid, snapshot_chrome_dir, env):
-            ssl_dir = snapshot_chrome_dir.parent / 'ssl'
-            ssl_dir.mkdir(exist_ok=True)
-
-            # Run SSL hook with the active Chrome session (background hook)
-            result = subprocess.Popen(
-                ['node', str(SSL_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                cwd=str(ssl_dir),
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE,
-                text=True,
-                env=env
-            )
-
-            nav_result = subprocess.run(
-                ['node', str(CHROME_NAVIGATE_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                cwd=str(snapshot_chrome_dir),
-                capture_output=True,
-                text=True,
-                timeout=120,
-                env=env
-            )
-            self.assertEqual(nav_result.returncode, 0, f"Navigation failed: {nav_result.stderr}")
-
-            # Check for output file
-            ssl_output = ssl_dir / 'ssl.jsonl'
-            for _ in range(30):
-                if ssl_output.exists() and ssl_output.stat().st_size > 0:
-                    break
-                time.sleep(1)
-
-            if result.poll() is None:
-                result.terminate()
-                try:
-                    stdout, stderr = result.communicate(timeout=5)
-                except subprocess.TimeoutExpired:
-                    result.kill()
-                    stdout, stderr = result.communicate()
-            else:
-                stdout, stderr = result.communicate()
-
-            ssl_data = None
-
-            # Try parsing from file first
-            if ssl_output.exists():
-                with open(ssl_output) as f:
-                    content = f.read().strip()
-                    if content.startswith('{'):
-                        try:
-                            ssl_data = json.loads(content)
-                        except json.JSONDecodeError:
-                            pass
-
-            # Try parsing from stdout if not in file
-            if not ssl_data:
-                for line in stdout.split('\n'):
-                    line = line.strip()
-                    if line.startswith('{'):
-                        try:
-                            record = json.loads(line)
-                            if 'protocol' in record or 'issuer' in record or record.get('type') == 'SSL':
-                                ssl_data = record
-                                break
-                        except json.JSONDecodeError:
-                            continue
-
-            # Verify hook ran successfully
-            self.assertNotIn('Traceback', stderr)
-            self.assertNotIn('Error:', stderr)
-
-            # example.com uses HTTPS, so we MUST get SSL certificate data
-            self.assertIsNotNone(ssl_data, "No SSL data extracted from HTTPS URL")
-
-            # Verify we got certificate info
-            self.assertIn('protocol', ssl_data, f"SSL data missing protocol: {ssl_data}")
-            self.assertTrue(
-                ssl_data['protocol'].startswith('TLS') or ssl_data['protocol'].startswith('SSL'),
-                f"Unexpected protocol: {ssl_data['protocol']}"
-            )
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/staticfile/config.json b/archivebox/plugins/staticfile/config.json
deleted file mode 100644
index 7e6df43c..00000000
--- a/archivebox/plugins/staticfile/config.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "required_plugins": ["chrome"],
-  "properties": {
-    "STATICFILE_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_STATICFILE", "USE_STATICFILE"],
-      "description": "Enable static file detection"
-    },
-    "STATICFILE_TIMEOUT": {
-      "type": "integer",
-      "default": 30,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for static file detection in seconds"
-    }
-  }
-}
diff --git a/archivebox/plugins/staticfile/on_Snapshot__26_staticfile.bg.js b/archivebox/plugins/staticfile/on_Snapshot__26_staticfile.bg.js
deleted file mode 100644
index 984e15c7..00000000
--- a/archivebox/plugins/staticfile/on_Snapshot__26_staticfile.bg.js
+++ /dev/null
@@ -1,366 +0,0 @@
-#!/usr/bin/env node
-/**
- * Detect and download static files using CDP during initial request.
- *
- * This hook sets up CDP listeners BEFORE chrome_navigate to capture the
- * Content-Type from the initial response. If it's a static file (PDF, image, etc.),
- * it downloads the content directly using CDP.
- *
- * Usage: on_Snapshot__26_staticfile.bg.js --url=<url> --snapshot-id=<uuid>
- * Output: Downloads static file
- */
-
-const fs = require('fs');
-const path = require('path');
-
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-
-const puppeteer = require('puppeteer-core');
-
-// Import shared utilities from chrome_utils.js
-const {
-    getEnvBool,
-    getEnvInt,
-    parseArgs,
-    connectToPage,
-    waitForPageLoaded,
-} = require('../chrome/chrome_utils.js');
-
-const PLUGIN_NAME = 'staticfile';
-const OUTPUT_DIR = '.';
-const CHROME_SESSION_DIR = '../chrome';
-
-// Content-Types that indicate static files
-const STATIC_CONTENT_TYPES = new Set([
-    // Documents
-    'application/pdf',
-    'application/msword',
-    'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
-    'application/vnd.ms-excel',
-    'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
-    'application/vnd.ms-powerpoint',
-    'application/vnd.openxmlformats-officedocument.presentationml.presentation',
-    'application/rtf',
-    'application/epub+zip',
-    // Images
-    'image/png',
-    'image/jpeg',
-    'image/gif',
-    'image/webp',
-    'image/svg+xml',
-    'image/x-icon',
-    'image/bmp',
-    'image/tiff',
-    'image/avif',
-    'image/heic',
-    'image/heif',
-    // Audio
-    'audio/mpeg',
-    'audio/mp3',
-    'audio/wav',
-    'audio/flac',
-    'audio/aac',
-    'audio/ogg',
-    'audio/webm',
-    'audio/m4a',
-    'audio/opus',
-    // Video
-    'video/mp4',
-    'video/webm',
-    'video/x-matroska',
-    'video/avi',
-    'video/quicktime',
-    'video/x-ms-wmv',
-    'video/x-flv',
-    // Archives
-    'application/zip',
-    'application/x-tar',
-    'application/gzip',
-    'application/x-bzip2',
-    'application/x-xz',
-    'application/x-7z-compressed',
-    'application/x-rar-compressed',
-    'application/vnd.rar',
-    // Data
-    'application/json',
-    'application/xml',
-    'text/csv',
-    'text/xml',
-    'application/x-yaml',
-    // Executables/Binaries
-    'application/octet-stream',
-    'application/x-executable',
-    'application/x-msdos-program',
-    'application/x-apple-diskimage',
-    'application/vnd.debian.binary-package',
-    'application/x-rpm',
-    // Other
-    'application/x-bittorrent',
-    'application/wasm',
-]);
-
-const STATIC_CONTENT_TYPE_PREFIXES = [
-    'image/',
-    'audio/',
-    'video/',
-    'application/zip',
-    'application/x-',
-];
-
-// Global state
-let originalUrl = '';
-let detectedContentType = null;
-let isStaticFile = false;
-let downloadedFilePath = null;
-let downloadError = null;
-let page = null;
-let browser = null;
-
-function isStaticContentType(contentType) {
-    if (!contentType) return false;
-
-    const ct = contentType.split(';')[0].trim().toLowerCase();
-
-    // Check exact match
-    if (STATIC_CONTENT_TYPES.has(ct)) return true;
-
-    // Check prefixes
-    for (const prefix of STATIC_CONTENT_TYPE_PREFIXES) {
-        if (ct.startsWith(prefix)) return true;
-    }
-
-    return false;
-}
-
-function sanitizeFilename(str, maxLen = 200) {
-    return str
-        .replace(/[^a-zA-Z0-9._-]/g, '_')
-        .slice(0, maxLen);
-}
-
-function getFilenameFromUrl(url) {
-    try {
-        const pathname = new URL(url).pathname;
-        const filename = path.basename(pathname) || 'downloaded_file';
-        return sanitizeFilename(filename);
-    } catch (e) {
-        return 'downloaded_file';
-    }
-}
-
-function normalizeUrl(url) {
-    try {
-        const parsed = new URL(url);
-        let path = parsed.pathname || '';
-        if (path === '/') path = '';
-        return `${parsed.origin}${path}`;
-    } catch (e) {
-        return url;
-    }
-}
-
-async function setupStaticFileListener() {
-    const timeout = getEnvInt('STATICFILE_TIMEOUT', 30) * 1000;
-
-    // Connect to Chrome page using shared utility
-    const connection = await connectToPage({
-        chromeSessionDir: CHROME_SESSION_DIR,
-        timeoutMs: timeout,
-        puppeteer,
-    });
-    browser = connection.browser;
-    page = connection.page;
-
-    // Track the first response to check Content-Type
-    let firstResponseHandled = false;
-
-    page.on('response', async (response) => {
-        if (firstResponseHandled) return;
-
-        try {
-            const url = response.url();
-            const headers = response.headers();
-            const contentType = headers['content-type'] || '';
-            const status = response.status();
-
-            // Only process the main document response
-            if (normalizeUrl(url) !== normalizeUrl(originalUrl)) return;
-            if (status < 200 || status >= 300) return;
-
-            firstResponseHandled = true;
-            detectedContentType = contentType.split(';')[0].trim();
-
-            console.error(`Detected Content-Type: ${detectedContentType}`);
-
-            // Check if it's a static file
-            if (!isStaticContentType(detectedContentType)) {
-                console.error('Not a static file, skipping download');
-                return;
-            }
-
-            isStaticFile = true;
-            console.error('Static file detected, downloading...');
-
-            // Download the file
-            const maxSize = getEnvInt('STATICFILE_MAX_SIZE', 1024 * 1024 * 1024); // 1GB default
-            const buffer = await response.buffer();
-
-            if (buffer.length > maxSize) {
-                downloadError = `File too large: ${buffer.length} bytes > ${maxSize} max`;
-                return;
-            }
-
-            // Determine filename
-            let filename = getFilenameFromUrl(url);
-
-            // Check content-disposition header for better filename
-            const contentDisp = headers['content-disposition'] || '';
-            if (contentDisp.includes('filename=')) {
-                const match = contentDisp.match(/filename[*]?=["']?([^"';\n]+)/);
-                if (match) {
-                    filename = sanitizeFilename(match[1].trim());
-                }
-            }
-
-            const outputPath = path.join(OUTPUT_DIR, filename);
-            fs.writeFileSync(outputPath, buffer);
-
-            downloadedFilePath = filename;
-            console.error(`Static file downloaded (${buffer.length} bytes): ${filename}`);
-
-        } catch (e) {
-            downloadError = `${e.name}: ${e.message}`;
-            console.error(`Error downloading static file: ${downloadError}`);
-        }
-    });
-
-    return { browser, page };
-}
-
-function handleShutdown(signal) {
-    console.error(`\nReceived ${signal}, emitting final results...`);
-
-    let result;
-
-    if (!detectedContentType) {
-        // No Content-Type detected (shouldn't happen, but handle it)
-        result = {
-            type: 'ArchiveResult',
-            status: 'skipped',
-            output_str: 'No Content-Type detected',
-            plugin: PLUGIN_NAME,
-        };
-    } else if (!isStaticFile) {
-        // Not a static file (normal case for HTML pages)
-        result = {
-            type: 'ArchiveResult',
-            status: 'skipped',
-            output_str: `Not a static file (Content-Type: ${detectedContentType})`,
-            plugin: PLUGIN_NAME,
-            content_type: detectedContentType,
-        };
-    } else if (downloadError) {
-        // Static file but download failed
-        result = {
-            type: 'ArchiveResult',
-            status: 'failed',
-            output_str: downloadError,
-            plugin: PLUGIN_NAME,
-            content_type: detectedContentType,
-        };
-    } else if (downloadedFilePath) {
-        // Static file downloaded successfully
-        result = {
-            type: 'ArchiveResult',
-            status: 'succeeded',
-            output_str: downloadedFilePath,
-            plugin: PLUGIN_NAME,
-            content_type: detectedContentType,
-        };
-    } else {
-        // Static file detected but no download happened (unexpected)
-        result = {
-            type: 'ArchiveResult',
-            status: 'failed',
-            output_str: 'Static file detected but download did not complete',
-            plugin: PLUGIN_NAME,
-            content_type: detectedContentType,
-        };
-    }
-
-    console.log(JSON.stringify(result));
-    process.exit(0);
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-    const snapshotId = args.snapshot_id;
-
-    if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__26_staticfile.bg.js --url=<url> --snapshot-id=<uuid>');
-        process.exit(1);
-    }
-
-    originalUrl = url;
-
-    if (!getEnvBool('STATICFILE_ENABLED', true)) {
-        console.error('Skipping (STATICFILE_ENABLED=False)');
-        console.log(JSON.stringify({type: 'ArchiveResult', status: 'skipped', output_str: 'STATICFILE_ENABLED=False'}));
-        process.exit(0);
-    }
-
-    const timeout = getEnvInt('STATICFILE_TIMEOUT', 30) * 1000;
-
-    // Register signal handlers for graceful shutdown
-    process.on('SIGTERM', () => handleShutdown('SIGTERM'));
-    process.on('SIGINT', () => handleShutdown('SIGINT'));
-
-    try {
-        // Set up static file listener BEFORE navigation
-        await setupStaticFileListener();
-
-        // Wait for chrome_navigate to complete (non-fatal)
-        try {
-            await waitForPageLoaded(CHROME_SESSION_DIR, timeout * 4, 500);
-            if (!detectedContentType && page) {
-                try {
-                    const inferred = await page.evaluate(() => document.contentType || '');
-                    if (inferred) {
-                        detectedContentType = inferred.split(';')[0].trim();
-                        if (isStaticContentType(detectedContentType)) {
-                            isStaticFile = true;
-                        }
-                    }
-                } catch (e) {
-                    // Best-effort only
-                }
-            }
-        } catch (e) {
-            console.error(`WARN: ${e.message}`);
-        }
-
-        // Keep process alive until killed by cleanup
-        // console.error('Static file detection complete, waiting for cleanup signal...');
-
-        // Keep the process alive indefinitely
-        await new Promise(() => {}); // Never resolves
-
-    } catch (e) {
-        const error = `${e.name}: ${e.message}`;
-        console.error(`ERROR: ${error}`);
-
-        console.log(JSON.stringify({
-            type: 'ArchiveResult',
-            status: 'failed',
-            output_str: error,
-        }));
-        process.exit(1);
-    }
-}
-
-main().catch(e => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
diff --git a/archivebox/plugins/staticfile/templates/card.html b/archivebox/plugins/staticfile/templates/card.html
deleted file mode 100644
index 6d16cbfa..00000000
--- a/archivebox/plugins/staticfile/templates/card.html
+++ /dev/null
@@ -1,24 +0,0 @@
-<!-- Staticfile thumbnail - preview of the static file -->
-<div class="extractor-thumbnail staticfile-thumbnail" style="width: 100%; height: 100px; overflow: hidden; background: #f5f5f5;">
-    {% if output_path %}
-        {% if output_path|lower|slice:"-4:" == ".pdf" or "application/pdf" in output_path %}
-            <embed src="{{ output_path }}#toolbar=0&navpanes=0&scrollbar=0&page=1&view=FitH"
-                   type="application/pdf"
-                   style="width: 100%; height: 200px; margin-top: -20px; pointer-events: none;">
-        {% elif output_path|lower|slice:"-4:" in ".jpg.png.gif.svg.bmp.webp.avif.heic" or output_path|lower|slice:"-5:" == ".jpeg" %}
-            <img src="{{ output_path }}"
-                 style="width: 100%; height: 100%; object-fit: cover;"
-                 loading="lazy">
-        {% elif output_path|lower|slice:"-4:" in ".mp4.webm.mov.avi.mkv" or output_path|lower|slice:"-5:" == ".mpeg" %}
-            <video src="{{ output_path }}"
-                   style="width: 100%; height: 100%; object-fit: cover;"
-                   preload="metadata"
-                   muted></video>
-        {% else %}
-            <iframe src="{{ output_path }}"
-                    style="width: 100%; height: 100%; border: none; pointer-events: none;"
-                    loading="lazy"
-                    sandbox="allow-same-origin"></iframe>
-        {% endif %}
-    {% endif %}
-</div>
diff --git a/archivebox/plugins/staticfile/templates/icon.html b/archivebox/plugins/staticfile/templates/icon.html
deleted file mode 100644
index bc71e426..00000000
--- a/archivebox/plugins/staticfile/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--staticfile" title="Static File"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M6 3h8l4 4v14H6z"/><path d="M14 3v5h5"/><circle cx="9" cy="16" r="1" fill="currentColor" stroke="none"/><circle cx="13" cy="16" r="1" fill="currentColor" stroke="none"/><circle cx="17" cy="16" r="1" fill="currentColor" stroke="none"/></svg></span>
diff --git a/archivebox/plugins/staticfile/tests/test_staticfile.py b/archivebox/plugins/staticfile/tests/test_staticfile.py
deleted file mode 100644
index f40b0677..00000000
--- a/archivebox/plugins/staticfile/tests/test_staticfile.py
+++ /dev/null
@@ -1,123 +0,0 @@
-"""
-Tests for the staticfile plugin.
-
-Tests the real staticfile hook with actual URLs to verify
-static file detection and download.
-"""
-
-import json
-import shutil
-import subprocess
-import sys
-import tempfile
-import time
-from pathlib import Path
-
-import pytest
-from django.test import TestCase
-
-# Import chrome test helpers
-sys.path.insert(0, str(Path(__file__).parent.parent.parent / 'chrome' / 'tests'))
-from chrome_test_helpers import (
-    chrome_session,
-    get_test_env,
-    get_plugin_dir,
-    get_hook_script,
-)
-
-
-def chrome_available() -> bool:
-    """Check if Chrome/Chromium is available."""
-    for name in ['chromium', 'chromium-browser', 'google-chrome', 'chrome']:
-        if shutil.which(name):
-            return True
-    return False
-
-
-# Get the path to the staticfile hook
-PLUGIN_DIR = get_plugin_dir(__file__)
-STATICFILE_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_staticfile.*')
-
-
-class TestStaticfilePlugin(TestCase):
-    """Test the staticfile plugin."""
-
-    def test_staticfile_hook_exists(self):
-        """Staticfile hook script should exist."""
-        self.assertIsNotNone(STATICFILE_HOOK, "Staticfile hook not found in plugin directory")
-        self.assertTrue(STATICFILE_HOOK.exists(), f"Hook not found: {STATICFILE_HOOK}")
-
-
-class TestStaticfileWithChrome(TestCase):
-    """Integration tests for staticfile plugin with Chrome."""
-
-    def setUp(self):
-        """Set up test environment."""
-        self.temp_dir = Path(tempfile.mkdtemp())
-
-    def tearDown(self):
-        """Clean up."""
-        shutil.rmtree(self.temp_dir, ignore_errors=True)
-
-    def test_staticfile_skips_html_pages(self):
-        """Staticfile hook should skip HTML pages (not static files)."""
-        test_url = 'https://example.com'  # HTML page, not a static file
-        snapshot_id = 'test-staticfile-snapshot'
-
-        try:
-            with chrome_session(
-                self.temp_dir,
-                crawl_id='test-staticfile-crawl',
-                snapshot_id=snapshot_id,
-                test_url=test_url,
-                navigate=True,
-                timeout=30,
-            ) as (chrome_process, chrome_pid, snapshot_chrome_dir, env):
-                # Use the environment from chrome_session (already has CHROME_HEADLESS=true)
-
-
-                # Run staticfile hook with the active Chrome session (background hook)
-                result = subprocess.Popen(
-                    ['node', str(STATICFILE_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                    cwd=str(snapshot_chrome_dir),
-                    stdout=subprocess.PIPE,
-                    stderr=subprocess.PIPE,
-                    text=True,
-                    env=env
-                )
-
-                # Allow it to run briefly, then terminate (background hook)
-                time.sleep(3)
-                if result.poll() is None:
-                    result.terminate()
-                    try:
-                        stdout, stderr = result.communicate(timeout=5)
-                    except subprocess.TimeoutExpired:
-                        result.kill()
-                        stdout, stderr = result.communicate()
-                else:
-                    stdout, stderr = result.communicate()
-
-                # Verify hook ran without crash
-                self.assertNotIn('Traceback', stderr)
-
-                # Parse JSONL output to verify it recognized HTML as non-static
-                for line in stdout.split('\n'):
-                    line = line.strip()
-                    if line.startswith('{'):
-                        try:
-                            record = json.loads(line)
-                            if record.get('type') == 'ArchiveResult':
-                                # HTML pages should be skipped
-                                if record.get('status') == 'skipped':
-                                    self.assertIn('Not a static file', record.get('output_str', ''))
-                                break
-                        except json.JSONDecodeError:
-                            continue
-
-        except RuntimeError:
-            raise
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/title/config.json b/archivebox/plugins/title/config.json
deleted file mode 100644
index 550c6de2..00000000
--- a/archivebox/plugins/title/config.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "required_plugins": ["chrome"],
-  "properties": {
-    "TITLE_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_TITLE", "USE_TITLE"],
-      "description": "Enable title extraction"
-    },
-    "TITLE_TIMEOUT": {
-      "type": "integer",
-      "default": 30,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for title extraction in seconds"
-    }
-  }
-}
diff --git a/archivebox/plugins/title/on_Snapshot__54_title.js b/archivebox/plugins/title/on_Snapshot__54_title.js
deleted file mode 100644
index af89e779..00000000
--- a/archivebox/plugins/title/on_Snapshot__54_title.js
+++ /dev/null
@@ -1,139 +0,0 @@
-#!/usr/bin/env node
-/**
- * Extract the title of a URL.
- *
- * Requires a Chrome session (from chrome plugin) and connects to it via CDP
- * to get the page title (which includes JS-rendered content).
- *
- * Usage: on_Snapshot__10_title.js --url=<url> --snapshot-id=<uuid>
- * Output: Writes title/title.txt
- *
- * Environment variables:
- *     TITLE_TIMEOUT: Timeout in seconds (default: 30)
- */
-
-const fs = require('fs');
-const path = require('path');
-const puppeteer = require('puppeteer-core');
-
-// Import shared utilities from chrome_utils.js
-const {
-    getEnvInt,
-    parseArgs,
-    connectToPage,
-    waitForPageLoaded,
-} = require('../chrome/chrome_utils.js');
-
-// Extractor metadata
-const PLUGIN_NAME = 'title';
-const OUTPUT_DIR = '.';
-const OUTPUT_FILE = 'title.txt';
-const CHROME_SESSION_DIR = '../chrome';
-
-async function extractTitle(url) {
-    // Output directory is current directory (hook already runs in output dir)
-    const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
-    const timeoutMs = getEnvInt('TITLE_TIMEOUT', getEnvInt('TIMEOUT', 30)) * 1000;
-    let browser = null;
-
-    try {
-        const connection = await connectToPage({
-            chromeSessionDir: CHROME_SESSION_DIR,
-            timeoutMs,
-            puppeteer,
-        });
-        browser = connection.browser;
-        const page = connection.page;
-
-        await waitForPageLoaded(CHROME_SESSION_DIR, timeoutMs * 4, 200);
-
-        // Get title from page
-        let title = await page.title();
-
-        if (!title) {
-            // Try getting from DOM directly
-            title = await page.evaluate(() => {
-                return document.title ||
-                       document.querySelector('meta[property="og:title"]')?.content ||
-                       document.querySelector('meta[name="twitter:title"]')?.content ||
-                       document.querySelector('h1')?.textContent?.trim();
-            });
-        }
-
-        if (title) {
-            fs.writeFileSync(outputPath, title, 'utf8');
-            return { success: true, output: outputPath, title, method: 'cdp' };
-        }
-        return { success: false, error: 'No title found in Chrome session' };
-    } catch (e) {
-        return { success: false, error: e.message };
-    } finally {
-        if (browser) {
-            browser.disconnect();
-        }
-    }
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-    const snapshotId = args.snapshot_id;
-
-    if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__10_title.js --url=<url> --snapshot-id=<uuid>');
-        process.exit(1);
-    }
-
-    const startTs = new Date();
-    let status = 'failed';
-    let output = null;
-    let error = '';
-    let extractedTitle = null;
-
-    try {
-        const result = await extractTitle(url);
-
-        if (result.success) {
-            status = 'succeeded';
-            output = result.output;
-            extractedTitle = result.title;
-            console.error(`Title extracted (${result.method}): ${result.title}`);
-        } else {
-            status = 'failed';
-            error = result.error;
-        }
-    } catch (e) {
-        error = `${e.name}: ${e.message}`;
-        status = 'failed';
-    }
-
-    const endTs = new Date();
-
-    if (error) {
-        console.error(`ERROR: ${error}`);
-    }
-
-    // Update snapshot title via JSONL
-    if (status === 'succeeded' && extractedTitle) {
-        console.log(JSON.stringify({
-            type: 'Snapshot',
-            id: snapshotId,
-            title: extractedTitle
-        }));
-    }
-
-    // Output ArchiveResult JSONL
-    const archiveResult = {
-        type: 'ArchiveResult',
-        status,
-        output_str: output || error || '',
-    };
-    console.log(JSON.stringify(archiveResult));
-
-    process.exit(status === 'succeeded' ? 0 : 1);
-}
-
-main().catch(e => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
diff --git a/archivebox/plugins/title/templates/icon.html b/archivebox/plugins/title/templates/icon.html
deleted file mode 100644
index 0cc05a17..00000000
--- a/archivebox/plugins/title/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--title" title="Title"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M3 11l8-8h9v9l-8 8-9-9z"/><circle cx="16" cy="7" r="1.5" fill="currentColor" stroke="none"/></svg></span>
diff --git a/archivebox/plugins/title/tests/test_title.py b/archivebox/plugins/title/tests/test_title.py
deleted file mode 100644
index 78b2ffbd..00000000
--- a/archivebox/plugins/title/tests/test_title.py
+++ /dev/null
@@ -1,277 +0,0 @@
-"""
-Integration tests for title plugin
-
-Tests verify:
-1. Plugin script exists
-2. Node.js is available
-3. Title extraction works for real example.com
-4. Output file contains actual page title
-5. Handles various title sources (<title>, og:title, twitter:title)
-6. Config options work (TITLE_TIMEOUT)
-"""
-
-import json
-import shutil
-import subprocess
-import tempfile
-from pathlib import Path
-
-import pytest
-
-from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-    get_plugin_dir,
-    get_hook_script,
-    parse_jsonl_output,
-    get_test_env,
-    chrome_session,
-    CHROME_NAVIGATE_HOOK,
-)
-
-
-PLUGIN_DIR = get_plugin_dir(__file__)
-TITLE_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_title.*')
-TEST_URL = 'https://example.com'
-
-def run_title_capture(title_dir, snapshot_chrome_dir, env, url, snapshot_id):
-    nav_result = subprocess.run(
-        ['node', str(CHROME_NAVIGATE_HOOK), f'--url={url}', f'--snapshot-id={snapshot_id}'],
-        cwd=str(snapshot_chrome_dir),
-        capture_output=True,
-        text=True,
-        timeout=120,
-        env=env,
-    )
-    result = subprocess.run(
-        ['node', str(TITLE_HOOK), f'--url={url}', f'--snapshot-id={snapshot_id}'],
-        cwd=title_dir,
-        capture_output=True,
-        text=True,
-        timeout=60,
-        env=env,
-    )
-    return nav_result, result
-
-
-def test_hook_script_exists():
-    """Verify hook script exists."""
-    assert TITLE_HOOK.exists(), f"Hook script not found: {TITLE_HOOK}"
-
-
-def test_extracts_title_from_example_com():
-    """Test full workflow: extract title from real example.com."""
-
-    # Check node is available
-    if not shutil.which('node'):
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        with chrome_session(tmpdir, test_url=TEST_URL, navigate=False) as (_process, _pid, snapshot_chrome_dir, env):
-            title_dir = snapshot_chrome_dir.parent / 'title'
-            title_dir.mkdir(exist_ok=True)
-
-            nav_result, result = run_title_capture(
-                title_dir,
-                snapshot_chrome_dir,
-                env,
-                TEST_URL,
-                'test789',
-            )
-            assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
-
-        assert result.returncode == 0, f"Extraction failed: {result.stderr}"
-
-        # Parse clean JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                pass
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-        # Verify output file exists (hook writes to current directory)
-        title_file = title_dir / 'title.txt'
-        assert title_file.exists(), "title.txt not created"
-
-        # Verify title contains REAL example.com title
-        title_text = title_file.read_text().strip()
-        assert len(title_text) > 0, "Title should not be empty"
-        assert 'example' in title_text.lower(), "Title should contain 'example'"
-
-        # example.com has title "Example Domain"
-        assert 'example domain' in title_text.lower(), f"Expected 'Example Domain', got: {title_text}"
-
-
-def test_fails_without_chrome_session():
-    """Test that title plugin fails when chrome session is missing."""
-
-    if not shutil.which('node'):
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-        title_dir = tmpdir / 'snapshot' / 'title'
-        title_dir.mkdir(parents=True, exist_ok=True)
-
-        # Run title extraction
-        result = subprocess.run(
-            ['node', str(TITLE_HOOK), f'--url={TEST_URL}', '--snapshot-id=testhttp'],
-            cwd=title_dir,
-            capture_output=True,
-            text=True,
-            timeout=60,
-            env=get_test_env(),
-        )
-
-        assert result.returncode != 0, f"Should fail without chrome session: {result.stderr}"
-        assert 'No Chrome session found (chrome plugin must run first)' in (result.stdout + result.stderr)
-
-
-def test_config_timeout_honored():
-    """Test that TITLE_TIMEOUT config is respected."""
-
-    if not shutil.which('node'):
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Set very short timeout (but example.com should still succeed)
-        import os
-        env_override = os.environ.copy()
-        env_override['TITLE_TIMEOUT'] = '5'
-
-        with chrome_session(tmpdir, test_url=TEST_URL, navigate=False) as (_process, _pid, snapshot_chrome_dir, env):
-            title_dir = snapshot_chrome_dir.parent / 'title'
-            title_dir.mkdir(exist_ok=True)
-            env.update(env_override)
-
-            nav_result, result = run_title_capture(
-                title_dir,
-                snapshot_chrome_dir,
-                env,
-                TEST_URL,
-                'testtimeout',
-            )
-            assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
-
-        # Should complete (success or fail, but not hang)
-        assert result.returncode in (0, 1), "Should complete without hanging"
-
-
-def test_handles_https_urls():
-    """Test that HTTPS URLs work correctly."""
-
-    if not shutil.which('node'):
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        with chrome_session(tmpdir, test_url='https://example.org', navigate=False) as (_process, _pid, snapshot_chrome_dir, env):
-            title_dir = snapshot_chrome_dir.parent / 'title'
-            title_dir.mkdir(exist_ok=True)
-
-            nav_result, result = run_title_capture(
-                title_dir,
-                snapshot_chrome_dir,
-                env,
-                'https://example.org',
-                'testhttps',
-            )
-            assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
-
-        if result.returncode == 0:
-            # Hook writes to current directory
-            output_title_file = title_dir / 'title.txt'
-            if output_title_file.exists():
-                title_text = output_title_file.read_text().strip()
-                assert len(title_text) > 0, "Title should not be empty"
-                assert 'example' in title_text.lower()
-
-
-def test_handles_404_gracefully():
-    """Test that title plugin handles 404 pages.
-
-    Note: example.com returns valid HTML even for 404 pages, so extraction may succeed
-    with the generic "Example Domain" title.
-    """
-
-    if not shutil.which('node'):
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        with chrome_session(tmpdir, test_url='https://example.com/nonexistent-page-404', navigate=False) as (
-            _process,
-            _pid,
-            snapshot_chrome_dir,
-            env,
-        ):
-            title_dir = snapshot_chrome_dir.parent / 'title'
-            title_dir.mkdir(exist_ok=True)
-
-            nav_result, result = run_title_capture(
-                title_dir,
-                snapshot_chrome_dir,
-                env,
-                'https://example.com/nonexistent-page-404',
-                'test404',
-            )
-            assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
-
-        # May succeed or fail depending on server behavior
-        # example.com returns "Example Domain" even for 404s
-        assert result.returncode in (0, 1), "Should complete (may succeed or fail)"
-
-
-def test_handles_redirects():
-    """Test that title plugin handles redirects correctly."""
-
-    if not shutil.which('node'):
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        with chrome_session(tmpdir, test_url='http://example.com', navigate=False) as (
-            _process,
-            _pid,
-            snapshot_chrome_dir,
-            env,
-        ):
-            title_dir = snapshot_chrome_dir.parent / 'title'
-            title_dir.mkdir(exist_ok=True)
-
-            # http://example.com redirects to https://example.com
-            nav_result, result = run_title_capture(
-                title_dir,
-                snapshot_chrome_dir,
-                env,
-                'http://example.com',
-                'testredirect',
-            )
-            assert nav_result.returncode == 0, f"Navigation failed: {nav_result.stderr}"
-
-        # Should succeed and follow redirect
-        if result.returncode == 0:
-            # Hook writes to current directory
-            output_title_file = title_dir / 'title.txt'
-            if output_title_file.exists():
-                title_text = output_title_file.read_text().strip()
-                assert 'example' in title_text.lower()
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/twocaptcha/config.json b/archivebox/plugins/twocaptcha/config.json
deleted file mode 100644
index d6c08ecf..00000000
--- a/archivebox/plugins/twocaptcha/config.json
+++ /dev/null
@@ -1,50 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "required_plugins": ["chrome"],
-  "properties": {
-    "TWOCAPTCHA_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["CAPTCHA2_ENABLED", "USE_CAPTCHA2", "USE_TWOCAPTCHA"],
-      "description": "Enable 2captcha browser extension for automatic CAPTCHA solving"
-    },
-    "TWOCAPTCHA_API_KEY": {
-      "type": "string",
-      "default": "",
-      "x-aliases": ["API_KEY_2CAPTCHA", "CAPTCHA2_API_KEY"],
-      "x-sensitive": true,
-      "description": "2captcha API key for CAPTCHA solving service (get from https://2captcha.com)"
-    },
-    "TWOCAPTCHA_RETRY_COUNT": {
-      "type": "integer",
-      "default": 3,
-      "minimum": 0,
-      "maximum": 10,
-      "x-aliases": ["CAPTCHA2_RETRY_COUNT"],
-      "description": "Number of times to retry CAPTCHA solving on error"
-    },
-    "TWOCAPTCHA_RETRY_DELAY": {
-      "type": "integer",
-      "default": 5,
-      "minimum": 0,
-      "maximum": 60,
-      "x-aliases": ["CAPTCHA2_RETRY_DELAY"],
-      "description": "Delay in seconds between CAPTCHA solving retries"
-    },
-    "TWOCAPTCHA_TIMEOUT": {
-      "type": "integer",
-      "default": 60,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "x-aliases": ["CAPTCHA2_TIMEOUT"],
-      "description": "Timeout for CAPTCHA solving in seconds"
-    },
-    "TWOCAPTCHA_AUTO_SUBMIT": {
-      "type": "boolean",
-      "default": false,
-      "description": "Automatically submit forms after CAPTCHA is solved"
-    }
-  }
-}
diff --git a/archivebox/plugins/twocaptcha/on_Crawl__83_twocaptcha_install.js b/archivebox/plugins/twocaptcha/on_Crawl__83_twocaptcha_install.js
deleted file mode 100755
index 23a1b3f2..00000000
--- a/archivebox/plugins/twocaptcha/on_Crawl__83_twocaptcha_install.js
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/usr/bin/env node
-/**
- * 2Captcha Extension Plugin
- *
- * Installs and configures the 2captcha Chrome extension for automatic
- * CAPTCHA solving during page archiving.
- *
- * Extension: https://chromewebstore.google.com/detail/ifibfemgeogfhoebkmokieepdoobkbpo
- * Documentation: https://2captcha.com/blog/how-to-use-2captcha-solver-extension-in-puppeteer
- *
- * Priority: 83 - Must install before Chrome session starts at Crawl level
- * Hook: on_Crawl (runs once per crawl, not per snapshot)
- *
- * Requirements:
- * - TWOCAPTCHA_API_KEY environment variable must be set
- * - Extension will automatically solve reCAPTCHA, hCaptcha, Cloudflare Turnstile, etc.
- */
-
-// Import extension utilities
-const { installExtensionWithCache } = require('../chrome/chrome_utils.js');
-
-// Extension metadata
-const EXTENSION = {
-    webstore_id: 'ifibfemgeogfhoebkmokieepdoobkbpo',
-    name: 'twocaptcha',
-};
-
-/**
- * Main entry point - install extension before archiving
- *
- * Note: 2captcha configuration is handled by on_Crawl__95_twocaptcha_config.js
- * during first-time browser setup to avoid repeated configuration on every snapshot.
- * The API key is injected via chrome.storage API once per browser session.
- */
-async function main() {
-    const extension = await installExtensionWithCache(EXTENSION);
-
-    if (extension) {
-        // Check if API key is configured
-        const apiKey = process.env.TWOCAPTCHA_API_KEY || process.env.API_KEY_2CAPTCHA;
-        if (!apiKey || apiKey === 'YOUR_API_KEY_HERE') {
-            console.warn('[⚠️] 2captcha extension installed but TWOCAPTCHA_API_KEY not configured');
-            console.warn('[⚠️] Set TWOCAPTCHA_API_KEY environment variable to enable automatic CAPTCHA solving');
-        } else {
-            console.log('[+] 2captcha extension installed and API key configured');
-        }
-    }
-
-    return extension;
-}
-
-// Export functions for use by other plugins
-module.exports = {
-    EXTENSION,
-};
-
-// Run if executed directly
-if (require.main === module) {
-    main().then(() => {
-        console.log('[✓] 2captcha extension setup complete');
-        process.exit(0);
-    }).catch(err => {
-        console.error('[❌] 2captcha extension setup failed:', err);
-        process.exit(1);
-    });
-}
diff --git a/archivebox/plugins/twocaptcha/on_Crawl__95_twocaptcha_config.js b/archivebox/plugins/twocaptcha/on_Crawl__95_twocaptcha_config.js
deleted file mode 100755
index 2dd2002f..00000000
--- a/archivebox/plugins/twocaptcha/on_Crawl__95_twocaptcha_config.js
+++ /dev/null
@@ -1,389 +0,0 @@
-#!/usr/bin/env node
-/**
- * 2Captcha Extension Configuration
- *
- * Configures the 2captcha extension with API key and settings after Crawl-level Chrome session starts.
- * Runs once per crawl to inject configuration into extension storage.
- *
- * Priority: 95 (after chrome_launch at 90, before snapshots start)
- * Hook: on_Crawl (runs once per crawl, not per snapshot)
- *
- * Config Options (from config.json / environment):
- * - TWOCAPTCHA_API_KEY: API key for 2captcha service
- * - TWOCAPTCHA_ENABLED: Enable/disable the extension
- * - TWOCAPTCHA_RETRY_COUNT: Number of retries on error
- * - TWOCAPTCHA_RETRY_DELAY: Delay between retries (seconds)
- * - TWOCAPTCHA_AUTO_SUBMIT: Auto-submit forms after solving
- *
- * Requirements:
- * - TWOCAPTCHA_API_KEY environment variable must be set
- * - chrome plugin must have loaded extensions (extensions.json must exist)
- */
-
-const path = require('path');
-const fs = require('fs');
-// Add NODE_MODULES_DIR to module resolution paths if set
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-const puppeteer = require('puppeteer-core');
-
-// Get crawl's chrome directory from environment variable set by hooks.py
-function getCrawlChromeSessionDir() {
-    const crawlOutputDir = process.env.CRAWL_OUTPUT_DIR || '';
-    if (!crawlOutputDir) {
-        return null;
-    }
-    return path.join(crawlOutputDir, 'chrome');
-}
-
-const CHROME_SESSION_DIR = getCrawlChromeSessionDir() || '../chrome';
-const CONFIG_MARKER = path.join(CHROME_SESSION_DIR, '.twocaptcha_configured');
-
-// Get environment variable with default
-function getEnv(name, defaultValue = '') {
-    return (process.env[name] || defaultValue).trim();
-}
-
-// Get boolean environment variable
-function getEnvBool(name, defaultValue = false) {
-    const val = getEnv(name, '').toLowerCase();
-    if (['true', '1', 'yes', 'on'].includes(val)) return true;
-    if (['false', '0', 'no', 'off'].includes(val)) return false;
-    return defaultValue;
-}
-
-// Get integer environment variable
-function getEnvInt(name, defaultValue = 0) {
-    const val = parseInt(getEnv(name, String(defaultValue)), 10);
-    return isNaN(val) ? defaultValue : val;
-}
-
-// Parse command line arguments
-function parseArgs() {
-    const args = {};
-    process.argv.slice(2).forEach(arg => {
-        if (arg.startsWith('--')) {
-            const [key, ...valueParts] = arg.slice(2).split('=');
-            args[key.replace(/-/g, '_')] = valueParts.join('=') || true;
-        }
-    });
-    return args;
-}
-
-/**
- * Get 2captcha configuration from environment variables.
- * Supports both TWOCAPTCHA_* and legacy API_KEY_2CAPTCHA naming.
- */
-function getTwoCaptchaConfig() {
-    const apiKey = getEnv('TWOCAPTCHA_API_KEY') || getEnv('API_KEY_2CAPTCHA') || getEnv('CAPTCHA2_API_KEY');
-    const isEnabled = getEnvBool('TWOCAPTCHA_ENABLED', true);
-    const retryCount = getEnvInt('TWOCAPTCHA_RETRY_COUNT', 3);
-    const retryDelay = getEnvInt('TWOCAPTCHA_RETRY_DELAY', 5);
-    const autoSubmit = getEnvBool('TWOCAPTCHA_AUTO_SUBMIT', false);
-
-    // Build the full config object matching the extension's storage structure
-    // Structure: chrome.storage.local.set({config: {...}})
-    return {
-        // API key - both variants for compatibility
-        apiKey: apiKey,
-        api_key: apiKey,
-
-        // Plugin enabled state
-        isPluginEnabled: isEnabled,
-
-        // Retry settings
-        repeatOnErrorTimes: retryCount,
-        repeatOnErrorDelay: retryDelay,
-
-        // Auto-submit setting
-        autoSubmitForms: autoSubmit,
-        submitFormsDelay: 0,
-
-        // Enable all CAPTCHA types
-        enabledForNormal: true,
-        enabledForRecaptchaV2: true,
-        enabledForInvisibleRecaptchaV2: true,
-        enabledForRecaptchaV3: true,
-        enabledForRecaptchaAudio: false,
-        enabledForGeetest: true,
-        enabledForGeetest_v4: true,
-        enabledForKeycaptcha: true,
-        enabledForArkoselabs: true,
-        enabledForLemin: true,
-        enabledForYandex: true,
-        enabledForCapyPuzzle: true,
-        enabledForTurnstile: true,
-        enabledForAmazonWaf: true,
-        enabledForMTCaptcha: true,
-
-        // Auto-solve all CAPTCHA types
-        autoSolveNormal: true,
-        autoSolveRecaptchaV2: true,
-        autoSolveInvisibleRecaptchaV2: true,
-        autoSolveRecaptchaV3: true,
-        autoSolveRecaptchaAudio: false,
-        autoSolveGeetest: true,
-        autoSolveGeetest_v4: true,
-        autoSolveKeycaptcha: true,
-        autoSolveArkoselabs: true,
-        autoSolveLemin: true,
-        autoSolveYandex: true,
-        autoSolveCapyPuzzle: true,
-        autoSolveTurnstile: true,
-        autoSolveAmazonWaf: true,
-        autoSolveMTCaptcha: true,
-
-        // Other settings with sensible defaults
-        recaptchaV2Type: 'token',
-        recaptchaV3MinScore: 0.3,
-        buttonPosition: 'inner',
-        useProxy: false,
-        proxy: '',
-        proxytype: 'HTTP',
-        blackListDomain: '',
-        autoSubmitRules: [],
-        normalSources: [],
-    };
-}
-
-async function configure2Captcha() {
-    // Check if already configured in this session
-    if (fs.existsSync(CONFIG_MARKER)) {
-        console.error('[*] 2captcha already configured in this browser session');
-        return { success: true, skipped: true };
-    }
-
-    // Get configuration
-    const config = getTwoCaptchaConfig();
-
-    // Check if API key is set
-    if (!config.apiKey || config.apiKey === 'YOUR_API_KEY_HERE') {
-        console.warn('[!] 2captcha extension loaded but TWOCAPTCHA_API_KEY not configured');
-        console.warn('[!] Set TWOCAPTCHA_API_KEY environment variable to enable automatic CAPTCHA solving');
-        return { success: false, error: 'TWOCAPTCHA_API_KEY not configured' };
-    }
-
-    console.error('[*] Configuring 2captcha extension...');
-    console.error(`[*]   API Key: ${config.apiKey.slice(0, 8)}...${config.apiKey.slice(-4)}`);
-    console.error(`[*]   Enabled: ${config.isPluginEnabled}`);
-    console.error(`[*]   Retry Count: ${config.repeatOnErrorTimes}`);
-    console.error(`[*]   Retry Delay: ${config.repeatOnErrorDelay}s`);
-    console.error(`[*]   Auto Submit: ${config.autoSubmitForms}`);
-    console.error(`[*]   Auto Solve: all CAPTCHA types enabled`);
-
-    try {
-        // Connect to the existing Chrome session via CDP
-        const cdpFile = path.join(CHROME_SESSION_DIR, 'cdp_url.txt');
-        if (!fs.existsSync(cdpFile)) {
-            return { success: false, error: 'No Chrome session found (chrome plugin must run first)' };
-        }
-
-        const cdpUrl = fs.readFileSync(cdpFile, 'utf-8').trim();
-        const browser = await puppeteer.connect({ browserWSEndpoint: cdpUrl });
-
-        try {
-            // First, navigate to a page to trigger extension content scripts and wake up service worker
-            console.error('[*] Waking up extension by visiting a page...');
-            const triggerPage = await browser.newPage();
-            try {
-                await triggerPage.goto('https://www.google.com', { waitUntil: 'domcontentloaded', timeout: 10000 });
-                await new Promise(r => setTimeout(r, 3000)); // Give extension time to initialize
-            } catch (e) {
-                console.warn(`[!] Trigger page failed: ${e.message}`);
-            }
-            try { await triggerPage.close(); } catch (e) {}
-
-            // Get 2captcha extension info from extensions.json
-            const extensionsFile = path.join(CHROME_SESSION_DIR, 'extensions.json');
-            if (!fs.existsSync(extensionsFile)) {
-                return { success: false, error: 'extensions.json not found - chrome plugin must run first' };
-            }
-
-            const extensions = JSON.parse(fs.readFileSync(extensionsFile, 'utf-8'));
-            const captchaExt = extensions.find(ext => ext.name === 'twocaptcha');
-
-            if (!captchaExt) {
-                console.error('[*] 2captcha extension not installed, skipping configuration');
-                return { success: true, skipped: true };
-            }
-
-            if (!captchaExt.id) {
-                return { success: false, error: '2captcha extension ID not found in extensions.json' };
-            }
-
-            const extensionId = captchaExt.id;
-            console.error(`[*] 2captcha Extension ID: ${extensionId}`);
-
-            // Configure via options page
-            console.error('[*] Configuring via options page...');
-            const optionsUrl = `chrome-extension://${extensionId}/options/options.html`;
-
-            let configPage = await browser.newPage();
-
-            try {
-                // Navigate to options page - catch error but continue since page may still load
-                try {
-                    await configPage.goto(optionsUrl, { waitUntil: 'networkidle0', timeout: 10000 });
-                } catch (navError) {
-                    // Navigation may throw ERR_BLOCKED_BY_CLIENT but page still loads
-                    console.error(`[*] Navigation threw error (may still work): ${navError.message}`);
-                }
-
-                // Wait a moment for page to settle
-                await new Promise(r => setTimeout(r, 3000));
-
-                // Check all pages for the extension page (Chrome may open it in a different tab)
-                const pages = await browser.pages();
-                for (const page of pages) {
-                    const url = page.url();
-                    if (url.startsWith(`chrome-extension://${extensionId}`)) {
-                        configPage = page;
-                        break;
-                    }
-                }
-
-                const currentUrl = configPage.url();
-                console.error(`[*] Current URL: ${currentUrl}`);
-
-                if (!currentUrl.startsWith(`chrome-extension://${extensionId}`)) {
-                    return { success: false, error: `Failed to navigate to options page, got: ${currentUrl}` };
-                }
-
-                // Wait for Config object to be available
-                console.error('[*] Waiting for Config object...');
-                await configPage.waitForFunction(() => typeof Config !== 'undefined', { timeout: 10000 });
-
-                // Use chrome.storage.local.set with the config wrapper
-                const result = await configPage.evaluate((cfg) => {
-                    return new Promise((resolve) => {
-                        if (typeof chrome !== 'undefined' && chrome.storage) {
-                            chrome.storage.local.set({ config: cfg }, () => {
-                                if (chrome.runtime.lastError) {
-                                    resolve({ success: false, error: chrome.runtime.lastError.message });
-                                } else {
-                                    resolve({ success: true, method: 'options_page' });
-                                }
-                            });
-                        } else {
-                            resolve({ success: false, error: 'chrome.storage not available' });
-                        }
-                    });
-                }, config);
-
-                if (result.success) {
-                    console.error(`[+] 2captcha configured via ${result.method}`);
-
-                    // Verify config was applied by reloading options page and checking form values
-                    console.error('[*] Verifying config by reloading options page...');
-                    try {
-                        await configPage.reload({ waitUntil: 'networkidle0', timeout: 10000 });
-                    } catch (e) {
-                        console.error(`[*] Reload threw error (may still work): ${e.message}`);
-                    }
-
-                    await new Promise(r => setTimeout(r, 2000));
-
-                    // Wait for Config object again
-                    await configPage.waitForFunction(() => typeof Config !== 'undefined', { timeout: 10000 });
-
-                    // Read back the config using Config.getAll()
-                    const verifyConfig = await configPage.evaluate(async () => {
-                        if (typeof Config !== 'undefined' && typeof Config.getAll === 'function') {
-                            return await Config.getAll();
-                        }
-                        return null;
-                    });
-
-                    if (!verifyConfig) {
-                        return { success: false, error: 'Could not verify config - Config.getAll() not available' };
-                    }
-
-                    // Check that API key was actually set
-                    const actualApiKey = verifyConfig.apiKey || verifyConfig.api_key;
-                    if (!actualApiKey || actualApiKey !== config.apiKey) {
-                        console.error(`[!] Config verification FAILED - API key mismatch`);
-                        console.error(`[!]   Expected: ${config.apiKey.slice(0, 8)}...${config.apiKey.slice(-4)}`);
-                        console.error(`[!]   Got: ${actualApiKey ? actualApiKey.slice(0, 8) + '...' + actualApiKey.slice(-4) : 'null'}`);
-                        return { success: false, error: 'Config verification failed - API key not set correctly' };
-                    }
-
-                    console.error('[+] Config verified successfully!');
-                    console.error(`[+]   API Key: ${actualApiKey.slice(0, 8)}...${actualApiKey.slice(-4)}`);
-                    console.error(`[+]   Plugin Enabled: ${verifyConfig.isPluginEnabled}`);
-                    console.error(`[+]   Auto Solve Turnstile: ${verifyConfig.autoSolveTurnstile}`);
-
-                    fs.writeFileSync(CONFIG_MARKER, JSON.stringify({
-                        timestamp: new Date().toISOString(),
-                        method: result.method,
-                        extensionId: extensionId,
-                        verified: true,
-                        config: {
-                            apiKeySet: !!config.apiKey,
-                            isPluginEnabled: config.isPluginEnabled,
-                            repeatOnErrorTimes: config.repeatOnErrorTimes,
-                            repeatOnErrorDelay: config.repeatOnErrorDelay,
-                            autoSubmitForms: config.autoSubmitForms,
-                            autoSolveEnabled: true,
-                        }
-                    }, null, 2));
-                    return { success: true, method: result.method, verified: true };
-                }
-
-                return { success: false, error: result.error || 'Config failed' };
-            } finally {
-                try { await configPage.close(); } catch (e) {}
-            }
-        } finally {
-            browser.disconnect();
-        }
-    } catch (e) {
-        return { success: false, error: `${e.name}: ${e.message}` };
-    }
-}
-
-async function main() {
-    const args = parseArgs();
-    const url = args.url;
-    const snapshotId = args.snapshot_id;
-
-    if (!url || !snapshotId) {
-        console.error('Usage: on_Crawl__95_twocaptcha_config.js --url=<url> --snapshot-id=<uuid>');
-        process.exit(1);
-    }
-
-    const startTs = new Date();
-    let status = 'failed';
-    let error = '';
-
-    try {
-        const result = await configure2Captcha();
-
-        if (result.skipped) {
-            status = 'skipped';
-        } else if (result.success) {
-            status = 'succeeded';
-        } else {
-            status = 'failed';
-            error = result.error || 'Configuration failed';
-        }
-    } catch (e) {
-        error = `${e.name}: ${e.message}`;
-        status = 'failed';
-    }
-
-    const endTs = new Date();
-    const duration = (endTs - startTs) / 1000;
-
-    if (error) {
-        console.error(`ERROR: ${error}`);
-    }
-
-    // Config hooks don't emit JSONL - they're utility hooks for setup
-    // Exit code indicates success/failure
-
-    process.exit(status === 'succeeded' || status === 'skipped' ? 0 : 1);
-}
-
-main().catch(e => {
-    console.error(`Fatal error: ${e.message}`);
-    process.exit(1);
-});
diff --git a/archivebox/plugins/twocaptcha/templates/icon.html b/archivebox/plugins/twocaptcha/templates/icon.html
deleted file mode 100644
index e69de29b..00000000
diff --git a/archivebox/plugins/twocaptcha/tests/test_twocaptcha.py b/archivebox/plugins/twocaptcha/tests/test_twocaptcha.py
deleted file mode 100644
index 4569cb49..00000000
--- a/archivebox/plugins/twocaptcha/tests/test_twocaptcha.py
+++ /dev/null
@@ -1,338 +0,0 @@
-"""
-Integration tests for twocaptcha plugin
-
-Run with: TWOCAPTCHA_API_KEY=your_key pytest archivebox/plugins/twocaptcha/tests/ -xvs
-
-NOTE: Chrome 137+ removed --load-extension support, so these tests MUST use Chromium.
-"""
-
-import json
-import os
-import signal
-import subprocess
-import tempfile
-import time
-from pathlib import Path
-
-import pytest
-
-from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-    setup_test_env,
-    launch_chromium_session,
-    kill_chromium_session,
-    CHROME_LAUNCH_HOOK,
-    PLUGINS_ROOT,
-)
-
-
-PLUGIN_DIR = Path(__file__).parent.parent
-INSTALL_SCRIPT = PLUGIN_DIR / 'on_Crawl__83_twocaptcha_install.js'
-CONFIG_SCRIPT = PLUGIN_DIR / 'on_Crawl__95_twocaptcha_config.js'
-
-TEST_URL = 'https://2captcha.com/demo/cloudflare-turnstile'
-
-
-# Alias for backward compatibility with existing test names
-launch_chrome = launch_chromium_session
-kill_chrome = kill_chromium_session
-
-
-class TestTwoCaptcha:
-    """Integration tests requiring TWOCAPTCHA_API_KEY."""
-
-    @pytest.fixture(autouse=True)
-    def setup(self):
-        self.api_key = os.environ.get('TWOCAPTCHA_API_KEY') or os.environ.get('API_KEY_2CAPTCHA')
-        if not self.api_key:
-            pytest.fail("TWOCAPTCHA_API_KEY required")
-
-    def test_install_and_load(self):
-        """Extension installs and loads in Chromium."""
-        with tempfile.TemporaryDirectory() as tmpdir:
-            tmpdir = Path(tmpdir)
-            env = setup_test_env(tmpdir)
-            env['TWOCAPTCHA_API_KEY'] = self.api_key
-
-            # Install
-            result = subprocess.run(['node', str(INSTALL_SCRIPT)], env=env, timeout=120, capture_output=True, text=True)
-            assert result.returncode == 0, f"Install failed: {result.stderr}"
-
-            cache = Path(env['CHROME_EXTENSIONS_DIR']) / 'twocaptcha.extension.json'
-            assert cache.exists()
-            data = json.loads(cache.read_text())
-            assert data['webstore_id'] == 'ifibfemgeogfhoebkmokieepdoobkbpo'
-
-            # Launch Chromium in crawls directory
-            crawl_id = 'test'
-            crawl_dir = Path(env['CRAWLS_DIR']) / crawl_id
-            chrome_dir = crawl_dir / 'chrome'
-            env['CRAWL_OUTPUT_DIR'] = str(crawl_dir)
-            process, cdp_url = launch_chrome(env, chrome_dir, crawl_id)
-
-            try:
-                # Wait for extensions.json to be written
-                extensions_file = chrome_dir / 'extensions.json'
-                for i in range(20):
-                    if extensions_file.exists():
-                        break
-                    time.sleep(0.5)
-
-                assert extensions_file.exists(), f"extensions.json not created. Chrome dir files: {list(chrome_dir.iterdir())}"
-
-                exts = json.loads(extensions_file.read_text())
-                assert any(e['name'] == 'twocaptcha' for e in exts), f"twocaptcha not loaded: {exts}"
-                print(f"[+] Extension loaded: id={next(e['id'] for e in exts if e['name']=='twocaptcha')}")
-            finally:
-                kill_chrome(process, chrome_dir)
-
-    def test_config_applied(self):
-        """Configuration is applied to extension and verified via Config.getAll()."""
-        with tempfile.TemporaryDirectory() as tmpdir:
-            tmpdir = Path(tmpdir)
-            env = setup_test_env(tmpdir)
-            env['TWOCAPTCHA_API_KEY'] = self.api_key
-            env['TWOCAPTCHA_RETRY_COUNT'] = '5'
-            env['TWOCAPTCHA_RETRY_DELAY'] = '10'
-
-            subprocess.run(['node', str(INSTALL_SCRIPT)], env=env, timeout=120, capture_output=True)
-
-            # Launch Chromium in crawls directory
-            crawl_id = 'cfg'
-            crawl_dir = Path(env['CRAWLS_DIR']) / crawl_id
-            chrome_dir = crawl_dir / 'chrome'
-            env['CRAWL_OUTPUT_DIR'] = str(crawl_dir)
-            process, cdp_url = launch_chrome(env, chrome_dir, crawl_id)
-
-            try:
-                # Wait for extensions.json to be written
-                extensions_file = chrome_dir / 'extensions.json'
-                for i in range(20):
-                    if extensions_file.exists():
-                        break
-                    time.sleep(0.5)
-                assert extensions_file.exists(), f"extensions.json not created"
-
-                result = subprocess.run(
-                    ['node', str(CONFIG_SCRIPT), '--url=https://example.com', '--snapshot-id=test'],
-                    env=env, timeout=30, capture_output=True, text=True
-                )
-                assert result.returncode == 0, f"Config failed: {result.stderr}"
-                assert (chrome_dir / '.twocaptcha_configured').exists()
-
-                # Verify config via options.html and Config.getAll()
-                # Get the actual extension ID from the config marker (Chrome computes IDs differently)
-                config_marker = json.loads((chrome_dir / '.twocaptcha_configured').read_text())
-                ext_id = config_marker['extensionId']
-                script = f'''
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-const puppeteer = require('puppeteer-core');
-(async () => {{
-    const browser = await puppeteer.connect({{ browserWSEndpoint: '{cdp_url}' }});
-
-    // Load options.html and use Config.getAll() to verify
-    const optionsUrl = 'chrome-extension://{ext_id}/options/options.html';
-    const page = await browser.newPage();
-    console.error('[*] Loading options page:', optionsUrl);
-
-    // Navigate - catch error but continue since page may still load
-    try {{
-        await page.goto(optionsUrl, {{ waitUntil: 'networkidle0', timeout: 10000 }});
-    }} catch (e) {{
-        console.error('[*] Navigation threw error (may still work):', e.message);
-    }}
-
-    // Wait for page to settle
-    await new Promise(r => setTimeout(r, 2000));
-    console.error('[*] Current URL:', page.url());
-
-    // Wait for Config object to be available
-    await page.waitForFunction(() => typeof Config !== 'undefined', {{ timeout: 5000 }});
-
-    // Call Config.getAll() - the extension's own API (returns a Promise)
-    const cfg = await page.evaluate(async () => await Config.getAll());
-    console.error('[*] Config.getAll() returned:', JSON.stringify(cfg));
-
-    await page.close();
-    browser.disconnect();
-    console.log(JSON.stringify(cfg));
-}})();
-'''
-                (tmpdir / 'v.js').write_text(script)
-                r = subprocess.run(['node', str(tmpdir / 'v.js')], env=env, timeout=30, capture_output=True, text=True)
-                print(r.stderr)
-                assert r.returncode == 0, f"Verify failed: {r.stderr}"
-
-                cfg = json.loads(r.stdout.strip().split('\n')[-1])
-                print(f"[*] Config from extension: {json.dumps(cfg, indent=2)}")
-
-                # Verify all the fields we care about
-                assert cfg.get('apiKey') == self.api_key or cfg.get('api_key') == self.api_key, f"API key not set: {cfg}"
-                assert cfg.get('isPluginEnabled') == True, f"Plugin not enabled: {cfg}"
-                assert cfg.get('repeatOnErrorTimes') == 5, f"Retry count wrong: {cfg}"
-                assert cfg.get('repeatOnErrorDelay') == 10, f"Retry delay wrong: {cfg}"
-                assert cfg.get('autoSolveRecaptchaV2') == True, f"autoSolveRecaptchaV2 not enabled: {cfg}"
-                assert cfg.get('autoSolveRecaptchaV3') == True, f"autoSolveRecaptchaV3 not enabled: {cfg}"
-                assert cfg.get('autoSolveTurnstile') == True, f"autoSolveTurnstile not enabled: {cfg}"
-                assert cfg.get('enabledForRecaptchaV2') == True, f"enabledForRecaptchaV2 not enabled: {cfg}"
-
-                print(f"[+] Config verified via Config.getAll()!")
-            finally:
-                kill_chrome(process, chrome_dir)
-
-    def test_solves_recaptcha(self):
-        """Extension attempts to solve CAPTCHA on demo page.
-
-        CRITICAL: DO NOT SKIP OR DISABLE THIS TEST EVEN IF IT'S FLAKY!
-
-        This test is INTENTIONALLY left enabled to expose the REAL, ACTUAL flakiness
-        of the 2captcha service and demo page. The test failures you see here are NOT
-        test bugs - they are ACCURATE representations of the real-world reliability
-        of this CAPTCHA solving service.
-
-        If this test is flaky, that's because 2captcha IS FLAKY in production.
-        If this test fails intermittently, that's because 2captcha FAILS INTERMITTENTLY in production.
-
-        NEVER EVER hide real flakiness by disabling tests or adding @pytest.mark.skip.
-        Users NEED to see this failure rate to understand what they're getting into.
-
-        When this test DOES pass, it confirms:
-        - Extension loads and configures correctly
-        - 2captcha API key is accepted
-        - Extension can successfully auto-solve CAPTCHAs
-        - The entire flow works end-to-end
-
-        When it fails (as it often does):
-        - Demo page has JavaScript errors (representing real-world broken sites)
-        - Turnstile tokens expire before solving (representing real-world timing issues)
-        - 2captcha service may be slow/down (representing real-world service issues)
-
-        This is VALUABLE INFORMATION about the service. DO NOT HIDE IT.
-        """
-        with tempfile.TemporaryDirectory() as tmpdir:
-            tmpdir = Path(tmpdir)
-            env = setup_test_env(tmpdir)
-            env['TWOCAPTCHA_API_KEY'] = self.api_key
-
-            subprocess.run(['node', str(INSTALL_SCRIPT)], env=env, timeout=120, capture_output=True)
-
-            # Launch Chromium in crawls directory
-            crawl_id = 'solve'
-            crawl_dir = Path(env['CRAWLS_DIR']) / crawl_id
-            chrome_dir = crawl_dir / 'chrome'
-            env['CRAWL_OUTPUT_DIR'] = str(crawl_dir)
-            process, cdp_url = launch_chrome(env, chrome_dir, crawl_id)
-
-            try:
-                # Wait for extensions.json to be written
-                extensions_file = chrome_dir / 'extensions.json'
-                for i in range(20):
-                    if extensions_file.exists():
-                        break
-                    time.sleep(0.5)
-                assert extensions_file.exists(), f"extensions.json not created"
-
-                subprocess.run(['node', str(CONFIG_SCRIPT), '--url=x', '--snapshot-id=x'], env=env, timeout=30, capture_output=True)
-
-                script = f'''
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-const puppeteer = require('puppeteer-core');
-(async () => {{
-    const browser = await puppeteer.connect({{ browserWSEndpoint: '{cdp_url}' }});
-    const page = await browser.newPage();
-
-    // Capture console messages from the page (including extension messages)
-    page.on('console', msg => {{
-        const text = msg.text();
-        if (text.includes('2captcha') || text.includes('turnstile') || text.includes('captcha')) {{
-            console.error('[CONSOLE]', text);
-        }}
-    }});
-
-    await page.setViewport({{ width: 1440, height: 900 }});
-    console.error('[*] Loading {TEST_URL}...');
-    await page.goto('{TEST_URL}', {{ waitUntil: 'networkidle2', timeout: 30000 }});
-
-    // Wait for CAPTCHA iframe (minimal wait to avoid token expiration)
-    console.error('[*] Waiting for CAPTCHA iframe...');
-    await page.waitForSelector('iframe', {{ timeout: 30000 }});
-    console.error('[*] CAPTCHA iframe found - extension should auto-solve now');
-
-    // DON'T CLICK - extension should auto-solve since autoSolveTurnstile=True
-    console.error('[*] Waiting for auto-solve (extension configured with autoSolveTurnstile=True)...');
-
-    // Poll for data-state changes with debug output
-    console.error('[*] Waiting for CAPTCHA to be solved (up to 150s)...');
-    const start = Date.now();
-    let solved = false;
-    let lastState = null;
-
-    while (!solved && (Date.now() - start) < 150000) {{
-        const state = await page.evaluate(() => {{
-            const solver = document.querySelector('.captcha-solver');
-            return {{
-                state: solver?.getAttribute('data-state'),
-                text: solver?.textContent?.trim(),
-                classList: solver?.className
-            }};
-        }});
-
-        if (state.state !== lastState) {{
-            const elapsed = Math.round((Date.now() - start) / 1000);
-            console.error(`[*] State change at ${{elapsed}}s: "${{lastState}}" -> "${{state.state}}" (text: "${{state.text?.slice(0, 50)}}")`);
-            lastState = state.state;
-        }}
-
-        if (state.state === 'solved') {{
-            solved = true;
-            const elapsed = Math.round((Date.now() - start) / 1000);
-            console.error('[+] SOLVED in ' + elapsed + 's!');
-            break;
-        }}
-
-        // Check every 2 seconds
-        await new Promise(r => setTimeout(r, 2000));
-    }}
-
-    if (!solved) {{
-        const elapsed = Math.round((Date.now() - start) / 1000);
-        const finalState = await page.evaluate(() => {{
-            const solver = document.querySelector('.captcha-solver');
-            return {{
-                state: solver?.getAttribute('data-state'),
-                text: solver?.textContent?.trim(),
-                html: solver?.outerHTML?.slice(0, 200)
-            }};
-        }});
-        console.error(`[!] TIMEOUT after ${{elapsed}}s. Final state: ${{JSON.stringify(finalState)}}`);
-        browser.disconnect();
-        process.exit(1);
-    }}
-
-    const final = await page.evaluate(() => {{
-        const solver = document.querySelector('.captcha-solver');
-        return {{
-            solved: true,
-            state: solver?.getAttribute('data-state'),
-            text: solver?.textContent?.trim()
-        }};
-    }});
-    browser.disconnect();
-    console.log(JSON.stringify(final));
-}})();
-'''
-                (tmpdir / 's.js').write_text(script)
-                print("\n[*] Solving CAPTCHA (this can take up to 150s for 2captcha API)...")
-                r = subprocess.run(['node', str(tmpdir / 's.js')], env=env, timeout=200, capture_output=True, text=True)
-                print(r.stderr)
-                assert r.returncode == 0, f"Failed: {r.stderr}"
-
-                final = json.loads([l for l in r.stdout.strip().split('\n') if l.startswith('{')][-1])
-                assert final.get('solved'), f"Not solved: {final}"
-                assert final.get('state') == 'solved', f"State not 'solved': {final}"
-                print(f"[+] SUCCESS! CAPTCHA solved: {final.get('text','')[:50]}")
-            finally:
-                kill_chrome(process, chrome_dir)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-xvs'])
diff --git a/archivebox/plugins/ublock/config.json b/archivebox/plugins/ublock/config.json
deleted file mode 100644
index f7f47aef..00000000
--- a/archivebox/plugins/ublock/config.json
+++ /dev/null
@@ -1,14 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "required_plugins": ["chrome"],
-  "properties": {
-    "UBLOCK_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["USE_UBLOCK"],
-      "description": "Enable uBlock Origin browser extension for ad blocking"
-    }
-  }
-}
diff --git a/archivebox/plugins/ublock/on_Crawl__80_install_ublock_extension.js b/archivebox/plugins/ublock/on_Crawl__80_install_ublock_extension.js
deleted file mode 100755
index ea5fd474..00000000
--- a/archivebox/plugins/ublock/on_Crawl__80_install_ublock_extension.js
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/usr/bin/env node
-/**
- * uBlock Origin Extension Plugin
- *
- * Installs and configures the uBlock Origin Chrome extension for ad blocking
- * and privacy protection during page archiving.
- *
- * Extension: https://chromewebstore.google.com/detail/cjpalhdlnbpafiamejdnhcphjbkeiagm
- *
- * Priority: 80 - Must install before Chrome session starts at Crawl level
- * Hook: on_Crawl (runs once per crawl, not per snapshot)
- *
- * This extension automatically:
- * - Blocks ads, trackers, and malware domains
- * - Reduces page load time and bandwidth usage
- * - Improves privacy during archiving
- * - Removes clutter from archived pages
- * - Uses efficient blocking with filter lists
- */
-
-// Import extension utilities
-const { installExtensionWithCache } = require('../chrome/chrome_utils.js');
-
-// Extension metadata
-const EXTENSION = {
-    webstore_id: 'cjpalhdlnbpafiamejdnhcphjbkeiagm',
-    name: 'ublock',
-};
-
-/**
- * Main entry point - install extension before archiving
- *
- * Note: uBlock Origin works automatically with default filter lists.
- * No configuration needed - blocks ads, trackers, and malware domains out of the box.
- */
-async function main() {
-    const extension = await installExtensionWithCache(EXTENSION);
-
-    if (extension) {
-        console.log('[+] Ads and trackers will be blocked during archiving');
-    }
-
-    return extension;
-}
-
-// Export functions for use by other plugins
-module.exports = {
-    EXTENSION,
-};
-
-// Run if executed directly
-if (require.main === module) {
-    main().then(() => {
-        console.log('[✓] uBlock Origin extension setup complete');
-        process.exit(0);
-    }).catch(err => {
-        console.error('[❌] uBlock Origin extension setup failed:', err);
-        process.exit(1);
-    });
-}
diff --git a/archivebox/plugins/ublock/templates/icon.html b/archivebox/plugins/ublock/templates/icon.html
deleted file mode 100644
index e69de29b..00000000
diff --git a/archivebox/plugins/ublock/tests/test_ublock.py b/archivebox/plugins/ublock/tests/test_ublock.py
deleted file mode 100644
index a3ab08a8..00000000
--- a/archivebox/plugins/ublock/tests/test_ublock.py
+++ /dev/null
@@ -1,725 +0,0 @@
-"""
-Unit tests for ublock plugin
-
-Tests invoke the plugin hook as an external process and verify outputs/side effects.
-"""
-
-import json
-import os
-import subprocess
-import tempfile
-from pathlib import Path
-
-import pytest
-
-from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-    setup_test_env,
-    get_test_env,
-    launch_chromium_session,
-    kill_chromium_session,
-    CHROME_LAUNCH_HOOK,
-    PLUGINS_ROOT,
-)
-
-
-PLUGIN_DIR = Path(__file__).parent.parent
-INSTALL_SCRIPT = next(PLUGIN_DIR.glob('on_Crawl__*_install_ublock_extension.*'), None)
-
-
-def test_install_script_exists():
-    """Verify install script exists"""
-    assert INSTALL_SCRIPT.exists(), f"Install script not found: {INSTALL_SCRIPT}"
-
-
-def test_extension_metadata():
-    """Test that uBlock Origin extension has correct metadata"""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        env = os.environ.copy()
-        env["CHROME_EXTENSIONS_DIR"] = str(Path(tmpdir) / "chrome_extensions")
-
-        result = subprocess.run(
-            ["node", "-e", f"const ext = require('{INSTALL_SCRIPT}'); console.log(JSON.stringify(ext.EXTENSION))"],
-            capture_output=True,
-            text=True,
-            env=env
-        )
-
-        assert result.returncode == 0, f"Failed to load extension metadata: {result.stderr}"
-
-        metadata = json.loads(result.stdout)
-        assert metadata["webstore_id"] == "cjpalhdlnbpafiamejdnhcphjbkeiagm"
-        assert metadata["name"] == "ublock"
-
-
-def test_install_creates_cache():
-    """Test that install creates extension cache"""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        ext_dir = Path(tmpdir) / "chrome_extensions"
-        ext_dir.mkdir(parents=True)
-
-        env = os.environ.copy()
-        env["CHROME_EXTENSIONS_DIR"] = str(ext_dir)
-
-        result = subprocess.run(
-            ["node", str(INSTALL_SCRIPT)],
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=120  # uBlock is large, may take longer to download
-        )
-
-        # Check output mentions installation
-        assert "uBlock" in result.stdout or "ublock" in result.stdout
-
-        # Check cache file was created
-        cache_file = ext_dir / "ublock.extension.json"
-        assert cache_file.exists(), "Cache file should be created"
-
-        # Verify cache content
-        cache_data = json.loads(cache_file.read_text())
-        assert cache_data["webstore_id"] == "cjpalhdlnbpafiamejdnhcphjbkeiagm"
-        assert cache_data["name"] == "ublock"
-
-
-def test_install_twice_uses_cache():
-    """Test that running install twice uses existing cache on second run"""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        ext_dir = Path(tmpdir) / "chrome_extensions"
-        ext_dir.mkdir(parents=True)
-
-        env = os.environ.copy()
-        env["CHROME_EXTENSIONS_DIR"] = str(ext_dir)
-
-        # First install - downloads the extension
-        result1 = subprocess.run(
-            ["node", str(INSTALL_SCRIPT)],
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=120  # uBlock is large
-        )
-        assert result1.returncode == 0, f"First install failed: {result1.stderr}"
-
-        # Verify cache was created
-        cache_file = ext_dir / "ublock.extension.json"
-        assert cache_file.exists(), "Cache file should exist after first install"
-
-        # Second install - should use cache and be faster
-        result2 = subprocess.run(
-            ["node", str(INSTALL_SCRIPT)],
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=30
-        )
-        assert result2.returncode == 0, f"Second install failed: {result2.stderr}"
-
-        # Second run should mention cache reuse
-        assert "already installed" in result2.stdout or "cache" in result2.stdout.lower() or result2.returncode == 0
-
-
-def test_no_configuration_required():
-    """Test that uBlock Origin works without configuration"""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        ext_dir = Path(tmpdir) / "chrome_extensions"
-        ext_dir.mkdir(parents=True)
-
-        env = os.environ.copy()
-        env["CHROME_EXTENSIONS_DIR"] = str(ext_dir)
-        # No API keys needed - works with default filter lists
-
-        result = subprocess.run(
-            ["node", str(INSTALL_SCRIPT)],
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=120
-        )
-
-        # Should not require any API keys
-        combined_output = result.stdout + result.stderr
-        assert "API" not in combined_output or result.returncode == 0
-
-
-def test_large_extension_size():
-    """Test that uBlock Origin is downloaded successfully despite large size"""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        ext_dir = Path(tmpdir) / "chrome_extensions"
-        ext_dir.mkdir(parents=True)
-
-        env = os.environ.copy()
-        env["CHROME_EXTENSIONS_DIR"] = str(ext_dir)
-
-        result = subprocess.run(
-            ["node", str(INSTALL_SCRIPT)],
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=120
-        )
-
-        # If extension was downloaded, verify it's substantial size
-        crx_file = ext_dir / "cjpalhdlnbpafiamejdnhcphjbkeiagm__ublock.crx"
-        if crx_file.exists():
-            # uBlock Origin with filter lists is typically 2-5 MB
-            size_bytes = crx_file.stat().st_size
-            assert size_bytes > 1_000_000, f"uBlock Origin should be > 1MB, got {size_bytes} bytes"
-
-
-def check_ad_blocking(cdp_url: str, test_url: str, env: dict, script_dir: Path) -> dict:
-    """Check ad blocking effectiveness by counting ad elements on page.
-
-    Returns dict with:
-        - adElementsFound: int - number of ad-related elements found
-        - adElementsVisible: int - number of visible ad elements
-        - blockedRequests: int - number of blocked network requests (ads/trackers)
-        - totalRequests: int - total network requests made
-        - percentBlocked: int - percentage of ad elements hidden (0-100)
-    """
-    test_script = f'''
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-const puppeteer = require('puppeteer-core');
-
-(async () => {{
-    const browser = await puppeteer.connect({{ browserWSEndpoint: '{cdp_url}' }});
-
-    const page = await browser.newPage();
-    await page.setUserAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
-    await page.setViewport({{ width: 1440, height: 900 }});
-
-    // Track network requests
-    let blockedRequests = 0;
-    let totalRequests = 0;
-    const adDomains = ['doubleclick', 'googlesyndication', 'googleadservices', 'facebook.com/tr',
-                       'analytics', 'adservice', 'advertising', 'taboola', 'outbrain', 'criteo',
-                       'amazon-adsystem', 'ads.yahoo', 'gemini.yahoo', 'yimg.com/cv/', 'beap.gemini'];
-
-    page.on('request', request => {{
-        totalRequests++;
-        const url = request.url().toLowerCase();
-        if (adDomains.some(d => url.includes(d))) {{
-            // This is an ad request
-        }}
-    }});
-
-    page.on('requestfailed', request => {{
-        const url = request.url().toLowerCase();
-        if (adDomains.some(d => url.includes(d))) {{
-            blockedRequests++;
-        }}
-    }});
-
-    console.error('Navigating to {test_url}...');
-    await page.goto('{test_url}', {{ waitUntil: 'domcontentloaded', timeout: 60000 }});
-
-    // Wait for page to fully render and ads to load
-    await new Promise(r => setTimeout(r, 5000));
-
-    // Check for ad elements in the DOM
-    const result = await page.evaluate(() => {{
-        // Common ad-related selectors
-        const adSelectors = [
-            // Generic ad containers
-            '[class*="ad-"]', '[class*="ad_"]', '[class*="-ad"]', '[class*="_ad"]',
-            '[id*="ad-"]', '[id*="ad_"]', '[id*="-ad"]', '[id*="_ad"]',
-            '[class*="advertisement"]', '[id*="advertisement"]',
-            '[class*="sponsored"]', '[id*="sponsored"]',
-            // Google ads
-            'ins.adsbygoogle', '[data-ad-client]', '[data-ad-slot]',
-            // Yahoo specific
-            '[class*="gemini"]', '[data-beacon]', '[class*="native-ad"]',
-            '[class*="stream-ad"]', '[class*="LDRB"]', '[class*="ntv-ad"]',
-            // iframes (often ads)
-            'iframe[src*="ad"]', 'iframe[src*="doubleclick"]', 'iframe[src*="googlesyndication"]',
-            // Common ad sizes
-            '[style*="300px"][style*="250px"]', '[style*="728px"][style*="90px"]',
-            '[style*="160px"][style*="600px"]', '[style*="320px"][style*="50px"]',
-        ];
-
-        let adElementsFound = 0;
-        let adElementsVisible = 0;
-
-        for (const selector of adSelectors) {{
-            try {{
-                const elements = document.querySelectorAll(selector);
-                for (const el of elements) {{
-                    adElementsFound++;
-                    const style = window.getComputedStyle(el);
-                    const rect = el.getBoundingClientRect();
-                    const isVisible = style.display !== 'none' &&
-                                     style.visibility !== 'hidden' &&
-                                     style.opacity !== '0' &&
-                                     rect.width > 0 && rect.height > 0;
-                    if (isVisible) {{
-                        adElementsVisible++;
-                    }}
-                }}
-            }} catch (e) {{
-                // Invalid selector, skip
-            }}
-        }}
-
-        return {{
-            adElementsFound,
-            adElementsVisible,
-            pageTitle: document.title
-        }};
-    }});
-
-    result.blockedRequests = blockedRequests;
-    result.totalRequests = totalRequests;
-    // Calculate how many ad elements were hidden (found but not visible)
-    const hiddenAds = result.adElementsFound - result.adElementsVisible;
-    result.percentBlocked = result.adElementsFound > 0
-        ? Math.round((hiddenAds / result.adElementsFound) * 100)
-        : 0;
-
-    console.error('Ad blocking result:', JSON.stringify(result));
-    browser.disconnect();
-    console.log(JSON.stringify(result));
-}})();
-'''
-    script_path = script_dir / 'check_ads.js'
-    script_path.write_text(test_script)
-
-    result = subprocess.run(
-        ['node', str(script_path)],
-        cwd=str(script_dir),
-        capture_output=True,
-        text=True,
-        env=env,
-        timeout=90
-    )
-
-    if result.returncode != 0:
-        raise RuntimeError(f"Ad check script failed: {result.stderr}")
-
-    output_lines = [l for l in result.stdout.strip().split('\n') if l.startswith('{')]
-    if not output_lines:
-        raise RuntimeError(f"No JSON output from ad check: {result.stdout}\nstderr: {result.stderr}")
-
-    return json.loads(output_lines[-1])
-
-
-# Test URL: Yahoo has many ads that uBlock should block (no mocks)
-TEST_URL = 'https://www.yahoo.com/'
-
-
-def test_extension_loads_in_chromium():
-    """Verify uBlock extension loads in Chromium by visiting its dashboard page.
-
-    Uses Chromium with --load-extension to load the extension, then navigates
-    to chrome-extension://<id>/dashboard.html and checks that "uBlock" appears
-    in the page content.
-    """
-    import signal
-    import time
-    print("[test] Starting test_extension_loads_in_chromium", flush=True)
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-        print(f"[test] tmpdir={tmpdir}", flush=True)
-
-        # Set up isolated env with proper directory structure
-        env = setup_test_env(tmpdir)
-        env.setdefault('CHROME_HEADLESS', 'true')
-        print(f"[test] DATA_DIR={env.get('DATA_DIR')}", flush=True)
-        print(f"[test] CHROME_BINARY={env.get('CHROME_BINARY')}", flush=True)
-
-        ext_dir = Path(env['CHROME_EXTENSIONS_DIR'])
-
-        # Step 1: Install the uBlock extension
-        print("[test] Installing uBlock extension...", flush=True)
-        result = subprocess.run(
-            ['node', str(INSTALL_SCRIPT)],
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=5
-        )
-        print(f"[test] Extension install rc={result.returncode}", flush=True)
-        assert result.returncode == 0, f"Extension install failed: {result.stderr}"
-
-        # Verify extension cache was created
-        cache_file = ext_dir / 'ublock.extension.json'
-        assert cache_file.exists(), "Extension cache not created"
-        ext_data = json.loads(cache_file.read_text())
-        print(f"[test] Extension installed: {ext_data.get('name')} v{ext_data.get('version')}", flush=True)
-
-        # Step 2: Launch Chromium using the chrome hook (loads extensions automatically)
-        print(f"[test] NODE_MODULES_DIR={env.get('NODE_MODULES_DIR')}", flush=True)
-        print(f"[test] puppeteer-core exists: {(Path(env['NODE_MODULES_DIR']) / 'puppeteer-core').exists()}", flush=True)
-        print("[test] Launching Chromium...", flush=True)
-
-        # Launch Chromium in crawls directory
-        crawl_id = 'test-ublock'
-        crawl_dir = Path(env['CRAWLS_DIR']) / crawl_id
-        crawl_dir.mkdir(parents=True, exist_ok=True)
-        chrome_dir = crawl_dir / 'chrome'
-        chrome_dir.mkdir(parents=True, exist_ok=True)
-        env['CRAWL_OUTPUT_DIR'] = str(crawl_dir)
-
-        chrome_launch_process = subprocess.Popen(
-            ['node', str(CHROME_LAUNCH_HOOK), f'--crawl-id={crawl_id}'],
-            cwd=str(chrome_dir),
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-            env=env
-        )
-        print("[test] Chrome hook started, waiting for CDP...", flush=True)
-
-        # Wait for Chromium to launch and CDP URL to be available
-        cdp_url = None
-        import select
-        for i in range(20):
-            poll_result = chrome_launch_process.poll()
-            if poll_result is not None:
-                stdout, stderr = chrome_launch_process.communicate()
-                raise RuntimeError(f"Chromium launch failed (exit={poll_result}):\nStdout: {stdout}\nStderr: {stderr}")
-            cdp_file = chrome_dir / 'cdp_url.txt'
-            if cdp_file.exists():
-                cdp_url = cdp_file.read_text().strip()
-                print(f"[test] CDP URL found after {i+1} attempts", flush=True)
-                break
-            # Read any available stderr
-            while select.select([chrome_launch_process.stderr], [], [], 0)[0]:
-                line = chrome_launch_process.stderr.readline()
-                if not line:
-                    break
-                print(f"[hook] {line.strip()}", flush=True)
-            time.sleep(0.3)
-
-        assert cdp_url, "Chromium CDP URL not found after 20s"
-        print(f"[test] Chromium launched with CDP URL: {cdp_url}", flush=True)
-        print("[test] Reading hook stderr...", flush=True)
-
-        # Check what extensions were loaded by chrome hook
-        extensions_file = chrome_dir / 'extensions.json'
-        if extensions_file.exists():
-            loaded_exts = json.loads(extensions_file.read_text())
-            print(f"Extensions loaded by chrome hook: {[e.get('name') for e in loaded_exts]}")
-        else:
-            print("Warning: extensions.json not found")
-
-        # Get the unpacked extension ID - Chrome computes this from the path
-        unpacked_path = ext_data.get('unpacked_path', '')
-        print(f"[test] Extension unpacked path: {unpacked_path}", flush=True)
-        print("[test] Running puppeteer test script...", flush=True)
-
-        try:
-            # Step 3: Connect to Chromium and verify extension loads
-            # First use CDP to get all targets and find extension ID
-            test_script = f'''
-if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-const puppeteer = require('puppeteer-core');
-
-(async () => {{
-    const browser = await puppeteer.connect({{ browserWSEndpoint: '{cdp_url}' }});
-
-    // Wait for extension to initialize
-    await new Promise(r => setTimeout(r, 500));
-
-    // Use CDP to get all targets including service workers
-    const pages = await browser.pages();
-    const page = pages[0] || await browser.newPage();
-    const client = await page.createCDPSession();
-
-    const {{ targetInfos }} = await client.send('Target.getTargets');
-    console.error('All CDP targets:');
-    targetInfos.forEach(t => console.error('  -', t.type, t.url.slice(0, 100)));
-
-    // Find any chrome-extension:// URLs
-    const extTargets = targetInfos.filter(t => t.url.startsWith('chrome-extension://'));
-    console.error('Extension targets:', extTargets.length);
-
-    // Filter out built-in extensions
-    const builtinIds = ['nkeimhogjdpnpccoofpliimaahmaaome', 'fignfifoniblkonapihmkfakmlgkbkcf',
-                       'ahfgeienlihckogmohjhadlkjgocpleb', 'mhjfbmdgcfjbbpaeojofohoefgiehjai'];
-    const customExts = extTargets.filter(t => {{
-        const extId = t.url.split('://')[1].split('/')[0];
-        return !builtinIds.includes(extId);
-    }});
-
-    if (customExts.length === 0) {{
-        console.log(JSON.stringify({{ loaded: false, error: 'No custom extension found via CDP' }}));
-        browser.disconnect();
-        return;
-    }}
-
-    // Get extension ID from first custom extension
-    const extId = customExts[0].url.split('://')[1].split('/')[0];
-    console.error('Found extension ID:', extId);
-
-    // Try to load dashboard.html
-    const newPage = await browser.newPage();
-    const dashboardUrl = 'chrome-extension://' + extId + '/dashboard.html';
-    console.error('Loading:', dashboardUrl);
-
-    try {{
-        await newPage.goto(dashboardUrl, {{ waitUntil: 'domcontentloaded', timeout: 15000 }});
-        const title = await newPage.title();
-        const content = await newPage.content();
-        const hasUblock = content.toLowerCase().includes('ublock') || title.toLowerCase().includes('ublock');
-
-        console.log(JSON.stringify({{
-            loaded: true,
-            extensionId: extId,
-            pageTitle: title,
-            hasExtensionName: hasUblock,
-            contentLength: content.length
-        }}));
-    }} catch (e) {{
-        console.error('Dashboard load failed:', e.message);
-        console.log(JSON.stringify({{ loaded: true, extensionId: extId, dashboardError: e.message }}));
-    }}
-
-    browser.disconnect();
-}})();
-'''
-            script_path = tmpdir / 'test_ublock.js'
-            script_path.write_text(test_script)
-
-            result = subprocess.run(
-                ['node', str(script_path)],
-                cwd=str(tmpdir),
-                capture_output=True,
-                text=True,
-                env=env,
-                timeout=10
-            )
-
-            print(f"stderr: {result.stderr}")
-            print(f"stdout: {result.stdout}")
-
-            assert result.returncode == 0, f"Test failed: {result.stderr}"
-
-            output_lines = [l for l in result.stdout.strip().split('\n') if l.startswith('{')]
-            assert output_lines, f"No JSON output: {result.stdout}"
-
-            test_result = json.loads(output_lines[-1])
-            assert test_result.get('loaded'), \
-                f"uBlock extension should be loaded in Chromium. Result: {test_result}"
-            print(f"Extension loaded successfully: {test_result}")
-
-        finally:
-            # Clean up Chromium
-            try:
-                chrome_launch_process.send_signal(signal.SIGTERM)
-                chrome_launch_process.wait(timeout=5)
-            except:
-                pass
-            chrome_pid_file = chrome_dir / 'chrome.pid'
-            if chrome_pid_file.exists():
-                try:
-                    chrome_pid = int(chrome_pid_file.read_text().strip())
-                    os.kill(chrome_pid, signal.SIGKILL)
-                except (OSError, ValueError):
-                    pass
-
-
-def test_blocks_ads_on_yahoo_com():
-    """Live test: verify uBlock Origin blocks ads on yahoo.com (real network).
-
-    This test runs TWO browser sessions:
-    1. WITHOUT extension - verifies ads are NOT blocked (baseline)
-    2. WITH extension - verifies ads ARE blocked
-
-    This ensures we're actually testing the extension's effect, not just
-    that a test page happens to show ads as blocked. No mocks are used.
-    """
-    import time
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Set up isolated env with proper directory structure
-        env_base = setup_test_env(tmpdir)
-        env_base['CHROME_HEADLESS'] = 'true'
-
-        # ============================================================
-        # STEP 1: BASELINE - Run WITHOUT extension, verify ads are NOT blocked
-        # ============================================================
-        print("\n" + "="*60)
-        print("STEP 1: BASELINE TEST (no extension)")
-        print("="*60)
-
-        data_dir = Path(env_base['DATA_DIR'])
-
-        env_no_ext = env_base.copy()
-        env_no_ext['CHROME_EXTENSIONS_DIR'] = str(data_dir / 'personas' / 'Default' / 'empty_extensions')
-        (data_dir / 'personas' / 'Default' / 'empty_extensions').mkdir(parents=True, exist_ok=True)
-
-        # Launch baseline Chromium in crawls directory
-        baseline_crawl_id = 'baseline-no-ext'
-        baseline_crawl_dir = Path(env_base['CRAWLS_DIR']) / baseline_crawl_id
-        baseline_crawl_dir.mkdir(parents=True, exist_ok=True)
-        baseline_chrome_dir = baseline_crawl_dir / 'chrome'
-        env_no_ext['CRAWL_OUTPUT_DIR'] = str(baseline_crawl_dir)
-        baseline_process = None
-
-        try:
-            baseline_process, baseline_cdp_url = launch_chromium_session(
-                env_no_ext, baseline_chrome_dir, baseline_crawl_id
-            )
-            print(f"Baseline Chromium launched: {baseline_cdp_url}")
-
-            # Wait a moment for browser to be ready
-            time.sleep(2)
-
-            baseline_result = check_ad_blocking(
-                baseline_cdp_url, TEST_URL, env_no_ext, tmpdir
-            )
-
-            print(f"Baseline result: {baseline_result['adElementsVisible']} visible ads "
-                  f"(found {baseline_result['adElementsFound']} ad elements)")
-
-        finally:
-            if baseline_process:
-                kill_chromium_session(baseline_process, baseline_chrome_dir)
-
-        # Verify baseline shows ads ARE visible (not blocked)
-        if baseline_result['adElementsFound'] == 0:
-            pytest.fail(
-                f"Baseline must find ad elements on {TEST_URL}, but found none. "
-                f"This test requires a real ad-heavy page."
-            )
-
-        if baseline_result['adElementsVisible'] == 0:
-            pytest.fail(
-                f"Baseline must have visible ads on {TEST_URL}, but none were visible. "
-                f"This likely means another ad blocker is active or network-level blocking is in effect."
-            )
-
-        print(f"\n✓ Baseline confirmed: {baseline_result['adElementsVisible']} visible ads without extension")
-
-        # ============================================================
-        # STEP 2: Install the uBlock extension
-        # ============================================================
-        print("\n" + "="*60)
-        print("STEP 2: INSTALLING EXTENSION")
-        print("="*60)
-
-        ext_dir = Path(env_base['CHROME_EXTENSIONS_DIR'])
-
-        result = subprocess.run(
-            ['node', str(INSTALL_SCRIPT)],
-            capture_output=True,
-            text=True,
-            env=env_base,
-            timeout=60
-        )
-        assert result.returncode == 0, f"Extension install failed: {result.stderr}"
-
-        cache_file = ext_dir / 'ublock.extension.json'
-        assert cache_file.exists(), "Extension cache not created"
-        ext_data = json.loads(cache_file.read_text())
-        print(f"Extension installed: {ext_data.get('name')} v{ext_data.get('version')}")
-
-        # ============================================================
-        # STEP 3: Run WITH extension, verify ads ARE blocked
-        # ============================================================
-        print("\n" + "="*60)
-        print("STEP 3: TEST WITH EXTENSION")
-        print("="*60)
-
-        # Launch extension test Chromium in crawls directory
-        ext_crawl_id = 'test-with-ext'
-        ext_crawl_dir = Path(env_base['CRAWLS_DIR']) / ext_crawl_id
-        ext_crawl_dir.mkdir(parents=True, exist_ok=True)
-        ext_chrome_dir = ext_crawl_dir / 'chrome'
-        env_base['CRAWL_OUTPUT_DIR'] = str(ext_crawl_dir)
-        ext_process = None
-
-        try:
-            ext_process, ext_cdp_url = launch_chromium_session(
-                env_base, ext_chrome_dir, ext_crawl_id
-            )
-            print(f"Extension Chromium launched: {ext_cdp_url}")
-
-            # Check that extension was loaded
-            extensions_file = ext_chrome_dir / 'extensions.json'
-            if extensions_file.exists():
-                loaded_exts = json.loads(extensions_file.read_text())
-                print(f"Extensions loaded: {[e.get('name') for e in loaded_exts]}")
-
-                # Verify extension has ID and is initialized
-                if loaded_exts and loaded_exts[0].get('id'):
-                    ext_id = loaded_exts[0]['id']
-                    print(f"Extension ID: {ext_id}")
-
-                    # Visit the extension dashboard to ensure it's fully loaded
-                    print("Visiting extension dashboard to verify initialization...")
-                    dashboard_script = f'''
-const puppeteer = require('{env_base['NODE_MODULES_DIR']}/puppeteer-core');
-(async () => {{
-    const browser = await puppeteer.connect({{
-        browserWSEndpoint: '{ext_cdp_url}',
-        defaultViewport: null
-    }});
-    const page = await browser.newPage();
-    await page.goto('chrome-extension://{ext_id}/dashboard.html', {{ waitUntil: 'domcontentloaded', timeout: 10000 }});
-    const title = await page.title();
-    console.log('Dashboard title:', title);
-    await page.close();
-    browser.disconnect();
-}})();
-'''
-                    dash_script_path = tmpdir / 'check_dashboard.js'
-                    dash_script_path.write_text(dashboard_script)
-                    subprocess.run(['node', str(dash_script_path)], capture_output=True, timeout=15, env=env_base)
-
-            # Wait longer for extension to fully initialize filters
-            # On first run, uBlock needs to download filter lists which can take 10-15 seconds
-            print("Waiting for uBlock filter lists to download and initialize...")
-            time.sleep(15)
-
-            ext_result = check_ad_blocking(
-                ext_cdp_url, TEST_URL, env_base, tmpdir
-            )
-
-            print(f"Extension result: {ext_result['adElementsVisible']} visible ads "
-                  f"(found {ext_result['adElementsFound']} ad elements)")
-
-        finally:
-            if ext_process:
-                kill_chromium_session(ext_process, ext_chrome_dir)
-
-        # ============================================================
-        # STEP 4: Compare results
-        # ============================================================
-        print("\n" + "="*60)
-        print("STEP 4: COMPARISON")
-        print("="*60)
-        print(f"Baseline (no extension): {baseline_result['adElementsVisible']} visible ads")
-        print(f"With extension: {ext_result['adElementsVisible']} visible ads")
-
-        # Calculate reduction in visible ads
-        ads_blocked = baseline_result['adElementsVisible'] - ext_result['adElementsVisible']
-        reduction_percent = (ads_blocked / baseline_result['adElementsVisible'] * 100) if baseline_result['adElementsVisible'] > 0 else 0
-
-        print(f"Reduction: {ads_blocked} fewer visible ads ({reduction_percent:.0f}% reduction)")
-
-        # Extension should significantly reduce visible ads
-        assert ext_result['adElementsVisible'] < baseline_result['adElementsVisible'], \
-            f"uBlock should reduce visible ads.\n" \
-            f"Baseline: {baseline_result['adElementsVisible']} visible ads\n" \
-            f"With extension: {ext_result['adElementsVisible']} visible ads\n" \
-            f"Expected fewer ads with extension."
-
-        # Ensure uBlock actually blocks at least some ad/track requests
-        assert ext_result['blockedRequests'] > 0, \
-            "uBlock should block at least one ad/track request on yahoo.com"
-
-        # Extension should block at least 20% of ads (was consistently blocking 5-13% without proper init time)
-        assert reduction_percent >= 20, \
-            f"uBlock should block at least 20% of ads.\n" \
-            f"Baseline: {baseline_result['adElementsVisible']} visible ads\n" \
-            f"With extension: {ext_result['adElementsVisible']} visible ads\n" \
-            f"Reduction: only {reduction_percent:.0f}% (expected at least 20%)\n" \
-            f"Note: Filter lists must be downloaded on first run (takes ~15s)"
-
-        print(f"\n✓ SUCCESS: uBlock correctly blocks ads!")
-        print(f"  - Baseline: {baseline_result['adElementsVisible']} visible ads")
-        print(f"  - With extension: {ext_result['adElementsVisible']} visible ads")
-        print(f"  - Blocked: {ads_blocked} ads ({reduction_percent:.0f}% reduction)")
diff --git a/archivebox/plugins/wget/config.json b/archivebox/plugins/wget/config.json
deleted file mode 100644
index 70893612..00000000
--- a/archivebox/plugins/wget/config.json
+++ /dev/null
@@ -1,75 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "WGET_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_WGET", "USE_WGET"],
-      "description": "Enable wget archiving"
-    },
-    "WGET_WARC_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": ["SAVE_WARC", "WGET_SAVE_WARC"],
-      "description": "Save WARC archive file"
-    },
-    "WGET_BINARY": {
-      "type": "string",
-      "default": "wget",
-      "description": "Path to wget binary"
-    },
-    "WGET_TIMEOUT": {
-      "type": "integer",
-      "default": 60,
-      "minimum": 5,
-      "x-fallback": "TIMEOUT",
-      "description": "Timeout for wget in seconds"
-    },
-    "WGET_USER_AGENT": {
-      "type": "string",
-      "default": "",
-      "x-fallback": "USER_AGENT",
-      "description": "User agent string for wget"
-    },
-    "WGET_COOKIES_FILE": {
-      "type": "string",
-      "default": "",
-      "x-fallback": "COOKIES_FILE",
-      "description": "Path to cookies file"
-    },
-    "WGET_CHECK_SSL_VALIDITY": {
-      "type": "boolean",
-      "default": true,
-      "x-fallback": "CHECK_SSL_VALIDITY",
-      "description": "Whether to verify SSL certificates"
-    },
-    "WGET_ARGS": {
-      "type": "array",
-      "items": {"type": "string"},
-      "default": [
-        "--no-verbose",
-        "--adjust-extension",
-        "--convert-links",
-        "--force-directories",
-        "--backup-converted",
-        "--span-hosts",
-        "--no-parent",
-        "--page-requisites",
-        "--restrict-file-names=windows",
-        "--tries=2",
-        "-e", "robots=off"
-      ],
-      "x-aliases": ["WGET_DEFAULT_ARGS"],
-      "description": "Default wget arguments"
-    },
-    "WGET_ARGS_EXTRA": {
-      "type": "array",
-      "items": {"type": "string"},
-      "default": [],
-      "x-aliases": ["WGET_EXTRA_ARGS"],
-      "description": "Extra arguments to append to wget command"
-    }
-  }
-}
diff --git a/archivebox/plugins/wget/on_Crawl__10_wget_install.py b/archivebox/plugins/wget/on_Crawl__10_wget_install.py
deleted file mode 100755
index 16d95332..00000000
--- a/archivebox/plugins/wget/on_Crawl__10_wget_install.py
+++ /dev/null
@@ -1,95 +0,0 @@
-#!/usr/bin/env python3
-"""
-Emit wget Binary dependency for the crawl.
-"""
-
-import json
-import os
-import sys
-
-
-# Read config from environment (already validated by JSONSchema)
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-def get_env_int(name: str, default: int = 0) -> int:
-    try:
-        return int(get_env(name, str(default)))
-    except ValueError:
-        return default
-
-
-def output_binary(name: str, binproviders: str):
-    """Output Binary JSONL record for a dependency."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,
-        'machine_id': machine_id,
-    }
-    print(json.dumps(record))
-
-
-def output_machine_config(config: dict):
-    """Output Machine config JSONL patch."""
-    if not config:
-        return
-    record = {
-        'type': 'Machine',
-        'config': config,
-    }
-    print(json.dumps(record))
-
-
-def main():
-    warnings = []
-    errors = []
-
-    # Get config values
-    wget_enabled = get_env_bool('WGET_ENABLED', True)
-    wget_save_warc = get_env_bool('WGET_SAVE_WARC', True)
-    wget_timeout = get_env_int('WGET_TIMEOUT') or get_env_int('TIMEOUT', 60)
-    wget_binary = get_env('WGET_BINARY', 'wget')
-
-    # Compute derived values (USE_WGET for backward compatibility)
-    use_wget = wget_enabled
-
-    # Validate timeout with warning (not error)
-    if use_wget and wget_timeout < 20:
-        warnings.append(
-            f"WGET_TIMEOUT={wget_timeout} is very low. "
-            "wget may fail to archive sites if set to less than ~20 seconds. "
-            "Consider setting WGET_TIMEOUT=60 or higher."
-        )
-
-    if use_wget:
-        output_binary(name='wget', binproviders='apt,brew,pip,env')
-
-    # Output computed config patch as JSONL
-    output_machine_config({
-        'USE_WGET': use_wget,
-        'WGET_BINARY': wget_binary,
-    })
-
-    for warning in warnings:
-        print(f"WARNING:{warning}", file=sys.stderr)
-
-    for error in errors:
-        print(f"ERROR:{error}", file=sys.stderr)
-
-    # Exit with error if any hard errors
-    sys.exit(1 if errors else 0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/wget/on_Snapshot__06_wget.bg.py b/archivebox/plugins/wget/on_Snapshot__06_wget.bg.py
deleted file mode 100644
index f62b21b5..00000000
--- a/archivebox/plugins/wget/on_Snapshot__06_wget.bg.py
+++ /dev/null
@@ -1,233 +0,0 @@
-#!/usr/bin/env python3
-"""
-Archive a URL using wget.
-
-Usage: on_Snapshot__06_wget.bg.py --url=<url> --snapshot-id=<uuid>
-Output: Downloads files to $PWD
-
-Environment variables:
-    WGET_ENABLED: Enable wget archiving (default: True)
-    WGET_WARC_ENABLED: Save WARC file (default: True)
-    WGET_BINARY: Path to wget binary (default: wget)
-    WGET_TIMEOUT: Timeout in seconds (x-fallback: TIMEOUT)
-    WGET_USER_AGENT: User agent string (x-fallback: USER_AGENT)
-    WGET_COOKIES_FILE: Path to cookies file (x-fallback: COOKIES_FILE)
-    WGET_CHECK_SSL_VALIDITY: Whether to check SSL certificates (x-fallback: CHECK_SSL_VALIDITY)
-    WGET_ARGS: Default wget arguments (JSON array)
-    WGET_ARGS_EXTRA: Extra arguments to append (JSON array)
-"""
-
-import json
-import os
-import re
-import subprocess
-import sys
-from datetime import datetime, timezone
-from pathlib import Path
-
-import rich_click as click
-
-
-# Extractor metadata
-PLUGIN_NAME = 'wget'
-BIN_NAME = 'wget'
-BIN_PROVIDERS = 'apt,brew,env'
-OUTPUT_DIR = '.'
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def get_env_int(name: str, default: int = 0) -> int:
-    try:
-        return int(get_env(name, str(default)))
-    except ValueError:
-        return default
-
-
-def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
-    """Parse a JSON array from environment variable."""
-    val = get_env(name, '')
-    if not val:
-        return default if default is not None else []
-    try:
-        result = json.loads(val)
-        if isinstance(result, list):
-            return [str(item) for item in result]
-        return default if default is not None else []
-    except json.JSONDecodeError:
-        return default if default is not None else []
-
-
-STATICFILE_DIR = '../staticfile'
-
-def has_staticfile_output() -> bool:
-    """Check if staticfile extractor already downloaded this URL."""
-    staticfile_dir = Path(STATICFILE_DIR)
-    if not staticfile_dir.exists():
-        return False
-    stdout_log = staticfile_dir / 'stdout.log'
-    if not stdout_log.exists():
-        return False
-    for line in stdout_log.read_text(errors='ignore').splitlines():
-        line = line.strip()
-        if not line.startswith('{'):
-            continue
-        try:
-            record = json.loads(line)
-        except json.JSONDecodeError:
-            continue
-        if record.get('type') == 'ArchiveResult' and record.get('status') == 'succeeded':
-            return True
-    return False
-
-
-
-
-def save_wget(url: str, binary: str) -> tuple[bool, str | None, str]:
-    """
-    Archive URL using wget.
-
-    Returns: (success, output_path, error_message)
-    """
-    # Get config from env (with WGET_ prefix, x-fallback handled by config loader)
-    timeout = get_env_int('WGET_TIMEOUT') or get_env_int('TIMEOUT', 60)
-    user_agent = get_env('WGET_USER_AGENT') or get_env('USER_AGENT', 'Mozilla/5.0 (compatible; ArchiveBox/1.0)')
-    check_ssl = get_env_bool('WGET_CHECK_SSL_VALIDITY', True) if get_env('WGET_CHECK_SSL_VALIDITY') else get_env_bool('CHECK_SSL_VALIDITY', True)
-    cookies_file = get_env('WGET_COOKIES_FILE') or get_env('COOKIES_FILE', '')
-    wget_args = get_env_array('WGET_ARGS', [])
-    wget_args_extra = get_env_array('WGET_ARGS_EXTRA', [])
-
-    # Feature toggles
-    warc_enabled = get_env_bool('WGET_WARC_ENABLED', True)
-
-    # Build wget command (later options take precedence)
-    cmd = [
-        binary,
-        *wget_args,
-        f'--timeout={timeout}',
-    ]
-
-    if user_agent:
-        cmd.append(f'--user-agent={user_agent}')
-
-    if warc_enabled:
-        warc_dir = Path('warc')
-        warc_dir.mkdir(exist_ok=True)
-        warc_path = warc_dir / str(int(datetime.now(timezone.utc).timestamp()))
-        cmd.append(f'--warc-file={warc_path}')
-    else:
-        cmd.append('--timestamping')
-
-    if cookies_file and Path(cookies_file).is_file():
-        cmd.extend(['--load-cookies', cookies_file])
-
-    if not check_ssl:
-        cmd.extend(['--no-check-certificate', '--no-hsts'])
-
-    if wget_args_extra:
-        cmd.extend(wget_args_extra)
-
-    cmd.append(url)
-
-    # Run wget
-    try:
-        result = subprocess.run(
-            cmd,
-            capture_output=True,
-            text=True,
-            timeout=timeout * 2,  # Allow extra time for large downloads
-        )
-
-        # Find downloaded files
-        downloaded_files = [
-            f for f in Path('.').rglob('*')
-            if f.is_file() and f.name != '.gitkeep' and not str(f).startswith('warc/')
-        ]
-
-        if not downloaded_files:
-            if result.returncode != 0:
-                return False, None, f'wget failed (exit={result.returncode})'
-            return False, None, 'No files downloaded'
-
-        # Find main HTML file
-        html_files = [
-            f for f in downloaded_files
-            if re.search(r'\.[Ss]?[Hh][Tt][Mm][Ll]?$', str(f))
-        ]
-        output_path = str(html_files[0]) if html_files else str(downloaded_files[0])
-
-        # Parse download stats from wget output
-        stderr_text = (result.stderr or '')
-        output_tail = stderr_text.strip().split('\n')[-3:] if stderr_text else []
-        files_count = len(downloaded_files)
-
-        return True, output_path, ''
-
-    except subprocess.TimeoutExpired:
-        return False, None, f'Timed out after {timeout * 2} seconds'
-    except Exception as e:
-        return False, None, f'{type(e).__name__}: {e}'
-
-
-@click.command()
-@click.option('--url', required=True, help='URL to archive')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
-def main(url: str, snapshot_id: str):
-    """Archive a URL using wget."""
-
-    output = None
-    status = 'failed'
-    error = ''
-
-    try:
-        # Check if wget is enabled
-        if not get_env_bool('WGET_ENABLED', True):
-            print('Skipping wget (WGET_ENABLED=False)', file=sys.stderr)
-            # Temporary failure (config disabled) - NO JSONL emission
-            sys.exit(0)
-
-        # Check if staticfile extractor already handled this (permanent skip)
-        if has_staticfile_output():
-            print('Skipping wget - staticfile extractor already downloaded this', file=sys.stderr)
-            print(json.dumps({'type': 'ArchiveResult', 'status': 'skipped', 'output_str': 'staticfile already exists'}))
-            sys.exit(0)
-
-        # Get binary from environment
-        binary = get_env('WGET_BINARY', 'wget')
-
-        # Run extraction
-        success, output, error = save_wget(url, binary)
-
-        if success:
-            # Success - emit ArchiveResult
-            result = {
-                'type': 'ArchiveResult',
-                'status': 'succeeded',
-                'output_str': output or ''
-            }
-            print(json.dumps(result))
-            sys.exit(0)
-        else:
-            # Transient error - emit NO JSONL
-            print(f'ERROR: {error}', file=sys.stderr)
-            sys.exit(1)
-
-    except Exception as e:
-        # Transient error - emit NO JSONL
-        print(f'ERROR: {type(e).__name__}: {e}', file=sys.stderr)
-        sys.exit(1)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/wget/templates/card.html b/archivebox/plugins/wget/templates/card.html
deleted file mode 100644
index 550db449..00000000
--- a/archivebox/plugins/wget/templates/card.html
+++ /dev/null
@@ -1,8 +0,0 @@
-<!-- Wget thumbnail - scaled down iframe preview of mirrored site -->
-<div class="extractor-thumbnail wget-thumbnail" style="width: 100%; height: 100px; overflow: hidden; background: #fff;">
-    <iframe src="{{ output_path }}"
-            style="width: 400%; height: 400px; transform: scale(0.25); transform-origin: top left; pointer-events: none; border: none;"
-            loading="lazy"
-            sandbox="allow-same-origin">
-    </iframe>
-</div>
diff --git a/archivebox/plugins/wget/templates/icon.html b/archivebox/plugins/wget/templates/icon.html
deleted file mode 100644
index 430432cf..00000000
--- a/archivebox/plugins/wget/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--wget" title="Wget"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M12 4v10"/><path d="M8 10l4 4 4-4"/><path d="M4 20h16"/></svg></span>
diff --git a/archivebox/plugins/wget/tests/test_wget.py b/archivebox/plugins/wget/tests/test_wget.py
deleted file mode 100644
index 52c1fc55..00000000
--- a/archivebox/plugins/wget/tests/test_wget.py
+++ /dev/null
@@ -1,433 +0,0 @@
-"""
-Integration tests for wget plugin
-
-Tests verify:
-    pass
-1. Validate hook checks for wget binary
-2. Verify deps with abx-pkg
-3. Config options work (WGET_ENABLED, WGET_SAVE_WARC, etc.)
-4. Extraction works against real example.com
-5. Output files contain actual page content
-6. Skip cases work (WGET_ENABLED=False, staticfile present)
-7. Failure cases handled (404, network errors)
-"""
-
-import json
-import os
-import shutil
-import subprocess
-import sys
-import tempfile
-import uuid
-from pathlib import Path
-
-import pytest
-
-
-PLUGIN_DIR = Path(__file__).parent.parent
-PLUGINS_ROOT = PLUGIN_DIR.parent
-WGET_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_wget.*'))
-BREW_HOOK = PLUGINS_ROOT / 'brew' / 'on_Binary__install_using_brew_provider.py'
-APT_HOOK = PLUGINS_ROOT / 'apt' / 'on_Binary__install_using_apt_provider.py'
-TEST_URL = 'https://example.com'
-
-
-def test_hook_script_exists():
-    """Verify hook script exists."""
-    assert WGET_HOOK.exists(), f"Hook script not found: {WGET_HOOK}"
-
-
-def test_verify_deps_with_abx_pkg():
-    """Verify wget is available via abx-pkg."""
-    from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides
-
-    wget_binary = Binary(name='wget', binproviders=[AptProvider(), BrewProvider(), EnvProvider()])
-    wget_loaded = wget_binary.load()
-
-    if wget_loaded and wget_loaded.abspath:
-        assert True, "wget is available"
-    else:
-        pass
-
-
-def test_reports_missing_dependency_when_not_installed():
-    """Test that script reports DEPENDENCY_NEEDED when wget is not found."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Run with empty PATH so binary won't be found
-        env = {'PATH': '/nonexistent', 'HOME': str(tmpdir)}
-
-        result = subprocess.run(
-            [sys.executable, str(WGET_HOOK), '--url', TEST_URL, '--snapshot-id', 'test123'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env
-        )
-
-        # Missing binary is a transient error - should exit 1 with no JSONL
-        assert result.returncode == 1, "Should exit 1 when dependency missing"
-
-        # Should NOT emit JSONL (transient error - will be retried)
-        jsonl_lines = [line for line in result.stdout.strip().split('\n')
-                      if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, "Should not emit JSONL for transient error (missing binary)"
-
-        # Should log error to stderr
-        assert 'wget' in result.stderr.lower() or 'error' in result.stderr.lower(), \
-            "Should report error in stderr"
-
-
-def test_can_install_wget_via_provider():
-    """Test that wget can be installed via brew/apt provider hooks."""
-
-    # Determine which provider to use
-    if shutil.which('brew'):
-        provider_hook = BREW_HOOK
-        provider_name = 'brew'
-    elif shutil.which('apt-get'):
-        provider_hook = APT_HOOK
-        provider_name = 'apt'
-    else:
-        pass
-
-    assert provider_hook.exists(), f"Provider hook not found: {provider_hook}"
-
-    # Test installation via provider hook
-    binary_id = str(uuid.uuid4())
-    machine_id = str(uuid.uuid4())
-
-    result = subprocess.run(
-        [
-            sys.executable,
-            str(provider_hook),
-            '--binary-id', binary_id,
-            '--machine-id', machine_id,
-            '--name', 'wget',
-            '--binproviders', 'apt,brew,env'
-        ],
-        capture_output=True,
-        text=True,
-        timeout=300  # Installation can take time
-    )
-
-    # Should succeed (wget installs successfully or is already installed)
-    assert result.returncode == 0, f"{provider_name} install failed: {result.stderr}"
-
-    # Should output Binary JSONL record
-    assert 'Binary' in result.stdout or 'wget' in result.stderr, \
-        f"Should output installation info: stdout={result.stdout}, stderr={result.stderr}"
-
-    # Parse JSONL if present
-    if result.stdout.strip():
-        pass
-        for line in result.stdout.strip().split('\n'):
-            pass
-            try:
-                record = json.loads(line)
-                if record.get('type') == 'Binary':
-                    assert record['name'] == 'wget'
-                    assert record['binprovider'] in ['brew', 'apt']
-                    assert record['abspath'], "Should have binary path"
-                    assert Path(record['abspath']).exists(), f"Binary should exist at {record['abspath']}"
-                    break
-            except json.JSONDecodeError:
-                continue
-
-    # Verify wget is now available
-    result = subprocess.run(['which', 'wget'], capture_output=True, text=True)
-    assert result.returncode == 0, "wget should be available after installation"
-
-
-def test_archives_example_com():
-    """Test full workflow: ensure wget installed then archive example.com."""
-
-    # First ensure wget is installed via provider
-    if shutil.which('brew'):
-        provider_hook = BREW_HOOK
-    elif shutil.which('apt-get'):
-        provider_hook = APT_HOOK
-    else:
-        pass
-
-    # Run installation (idempotent - will succeed if already installed)
-    install_result = subprocess.run(
-        [
-            sys.executable,
-            str(provider_hook),
-            '--dependency-id', str(uuid.uuid4()),
-            '--bin-name', 'wget',
-            '--bin-providers', 'apt,brew,env'
-        ],
-        capture_output=True,
-        text=True,
-        timeout=300
-    )
-
-    if install_result.returncode != 0:
-        pass
-
-    # Now test archiving
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Run wget extraction
-        result = subprocess.run(
-            [sys.executable, str(WGET_HOOK), '--url', TEST_URL, '--snapshot-id', 'test789'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            timeout=120
-        )
-
-        assert result.returncode == 0, f"Extraction failed: {result.stderr}"
-
-        # Parse clean JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                pass
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-        # Verify files were downloaded
-        downloaded_files = list(tmpdir.rglob('*.html')) + list(tmpdir.rglob('*.htm'))
-        assert len(downloaded_files) > 0, "No HTML files downloaded"
-
-        # Find main HTML file (should contain example.com)
-        main_html = None
-        for html_file in downloaded_files:
-            content = html_file.read_text(errors='ignore')
-            if 'example domain' in content.lower():
-                main_html = html_file
-                break
-
-        assert main_html is not None, "Could not find main HTML file with example.com content"
-
-        # Verify HTML content contains REAL example.com text
-        html_content = main_html.read_text(errors='ignore')
-        assert len(html_content) > 200, f"HTML content too short: {len(html_content)} bytes"
-        assert 'example domain' in html_content.lower(), "Missing 'Example Domain' in HTML"
-        assert ('this domain' in html_content.lower() or
-                'illustrative examples' in html_content.lower()), \
-            "Missing example.com description text"
-        assert ('iana' in html_content.lower() or
-                'more information' in html_content.lower()), \
-            "Missing IANA reference"
-
-
-def test_config_save_wget_false_skips():
-    """Test that WGET_ENABLED=False exits without emitting JSONL."""
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Set WGET_ENABLED=False
-        env = os.environ.copy()
-        env['WGET_ENABLED'] = 'False'
-
-        result = subprocess.run(
-            [sys.executable, str(WGET_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=30
-        )
-
-        # Should exit 0 when feature disabled
-        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
-
-        # Feature disabled - no JSONL emission, just logs to stderr
-        assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
-
-        # Should NOT emit any JSONL
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
-
-
-def test_config_save_warc():
-    """Test that WGET_SAVE_WARC=True creates WARC files."""
-
-    # Ensure wget is available
-    if not shutil.which('wget'):
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Set WGET_SAVE_WARC=True explicitly
-        env = os.environ.copy()
-        env['WGET_SAVE_WARC'] = 'True'
-
-        result = subprocess.run(
-            [sys.executable, str(WGET_HOOK), '--url', TEST_URL, '--snapshot-id', 'testwarc'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=120
-        )
-
-        if result.returncode == 0:
-            # Look for WARC files in warc/ subdirectory
-            warc_dir = tmpdir / 'warc'
-            if warc_dir.exists():
-                warc_files = list(warc_dir.rglob('*'))
-                warc_files = [f for f in warc_files if f.is_file()]
-                assert len(warc_files) > 0, "WARC file not created when WGET_SAVE_WARC=True"
-
-
-def test_staticfile_present_skips():
-    """Test that wget skips when staticfile already downloaded."""
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Create directory structure like real ArchiveBox:
-        # tmpdir/
-        #   staticfile/  <- staticfile extractor output
-        #   wget/         <- wget extractor runs here, looks for ../staticfile
-        staticfile_dir = tmpdir / 'staticfile'
-        staticfile_dir.mkdir()
-        (staticfile_dir / 'stdout.log').write_text('{"type":"ArchiveResult","status":"succeeded","output_str":"index.html"}\n')
-
-        wget_dir = tmpdir / 'wget'
-        wget_dir.mkdir()
-
-        result = subprocess.run(
-            [sys.executable, str(WGET_HOOK), '--url', TEST_URL, '--snapshot-id', 'teststatic'],
-            cwd=wget_dir,  # Run from wget subdirectory
-            capture_output=True,
-            text=True,
-            timeout=30
-        )
-
-        # Should skip with permanent skip JSONL
-        assert result.returncode == 0, "Should exit 0 when permanently skipping"
-
-        # Parse clean JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                pass
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, "Should emit ArchiveResult JSONL for permanent skip"
-        assert result_json['status'] == 'skipped', f"Should have status='skipped': {result_json}"
-        assert 'staticfile' in result_json.get('output_str', '').lower(), "Should mention staticfile in output_str"
-
-
-def test_handles_404_gracefully():
-    """Test that wget fails gracefully on 404."""
-
-    if not shutil.which('wget'):
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Try to download non-existent page
-        result = subprocess.run(
-            [sys.executable, str(WGET_HOOK), '--url', 'https://example.com/nonexistent-page-404', '--snapshot-id', 'test404'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            timeout=60
-        )
-
-        # Should fail
-        assert result.returncode != 0, "Should fail on 404"
-        combined = result.stdout + result.stderr
-        assert '404' in combined or 'Not Found' in combined or 'No files downloaded' in combined, \
-            "Should report 404 or no files downloaded"
-
-
-def test_config_timeout_honored():
-    """Test that WGET_TIMEOUT config is respected."""
-
-    if not shutil.which('wget'):
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Set very short timeout
-        env = os.environ.copy()
-        env['WGET_TIMEOUT'] = '5'
-
-        # This should still succeed for example.com (it's fast)
-        result = subprocess.run(
-            [sys.executable, str(WGET_HOOK), '--url', TEST_URL, '--snapshot-id', 'testtimeout'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=30
-        )
-
-        # Verify it completed (success or fail, but didn't hang)
-        assert result.returncode in (0, 1), "Should complete (success or fail)"
-
-
-def test_config_user_agent():
-    """Test that WGET_USER_AGENT config is used."""
-
-    if not shutil.which('wget'):
-        pass
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Set custom user agent
-        env = os.environ.copy()
-        env['WGET_USER_AGENT'] = 'TestBot/1.0'
-
-        result = subprocess.run(
-            [sys.executable, str(WGET_HOOK), '--url', TEST_URL, '--snapshot-id', 'testua'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=120
-        )
-
-        # Should succeed (example.com doesn't block)
-        if result.returncode == 0:
-            # Parse clean JSONL output
-            result_json = None
-            for line in result.stdout.strip().split('\n'):
-                line = line.strip()
-                if line.startswith('{'):
-                    pass
-                    try:
-                        record = json.loads(line)
-                        if record.get('type') == 'ArchiveResult':
-                            result_json = record
-                            break
-                    except json.JSONDecodeError:
-                        pass
-
-            assert result_json, "Should have ArchiveResult JSONL output"
-            assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/plugins/ytdlp/config.json b/archivebox/plugins/ytdlp/config.json
deleted file mode 100644
index 2a98e24e..00000000
--- a/archivebox/plugins/ytdlp/config.json
+++ /dev/null
@@ -1,92 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "additionalProperties": false,
-  "properties": {
-    "YTDLP_ENABLED": {
-      "type": "boolean",
-      "default": true,
-      "x-aliases": [
-        "MEDIA_ENABLED",
-        "SAVE_MEDIA",
-        "USE_MEDIA",
-        "USE_YTDLP",
-        "FETCH_MEDIA",
-        "SAVE_YTDLP"
-      ],
-      "description": "Enable video/audio downloading with yt-dlp"
-    },
-    "YTDLP_BINARY": {
-      "type": "string",
-      "default": "yt-dlp",
-      "x-aliases": ["YOUTUBEDL_BINARY", "YOUTUBE_DL_BINARY"],
-      "description": "Path to yt-dlp binary"
-    },
-    "YTDLP_NODE_BINARY": {
-      "type": "string",
-      "default": "node",
-      "x-fallback": "NODE_BINARY",
-      "description": "Path to Node.js binary for yt-dlp JS runtime"
-    },
-    "YTDLP_TIMEOUT": {
-      "type": "integer",
-      "default": 3600,
-      "minimum": 30,
-      "x-fallback": "TIMEOUT",
-      "x-aliases": ["MEDIA_TIMEOUT"],
-      "description": "Timeout for yt-dlp downloads in seconds"
-    },
-    "YTDLP_COOKIES_FILE": {
-      "type": "string",
-      "default": "",
-      "x-fallback": "COOKIES_FILE",
-      "description": "Path to cookies file"
-    },
-    "YTDLP_MAX_SIZE": {
-      "type": "string",
-      "default": "750m",
-      "pattern": "^\\d+[kmgKMG]?$",
-      "x-aliases": ["MEDIA_MAX_SIZE"],
-      "description": "Maximum file size for yt-dlp downloads"
-    },
-    "YTDLP_CHECK_SSL_VALIDITY": {
-      "type": "boolean",
-      "default": true,
-      "x-fallback": "CHECK_SSL_VALIDITY",
-      "description": "Whether to verify SSL certificates"
-    },
-    "YTDLP_ARGS": {
-      "type": "array",
-      "items": { "type": "string" },
-      "default": [
-        "--restrict-filenames",
-        "--trim-filenames=128",
-        "--write-description",
-        "--write-info-json",
-        "--write-thumbnail",
-        "--write-sub",
-        "--write-auto-subs",
-        "--convert-subs=srt",
-        "--yes-playlist",
-        "--continue",
-        "--no-abort-on-error",
-        "--ignore-errors",
-        "--geo-bypass",
-        "--add-metadata",
-        "--no-progress",
-        "--remote-components=ejs:github",
-        "-o",
-        "%(title)s.%(ext)s"
-      ],
-      "x-aliases": ["YTDLP_DEFAULT_ARGS"],
-      "description": "Default yt-dlp arguments"
-    },
-    "YTDLP_ARGS_EXTRA": {
-      "type": "array",
-      "items": { "type": "string" },
-      "default": [],
-      "x-aliases": ["YTDLP_EXTRA_ARGS"],
-      "description": "Extra arguments to append to yt-dlp command"
-    }
-  }
-}
diff --git a/archivebox/plugins/ytdlp/on_Crawl__15_ytdlp_install.py b/archivebox/plugins/ytdlp/on_Crawl__15_ytdlp_install.py
deleted file mode 100755
index 7b81b5d9..00000000
--- a/archivebox/plugins/ytdlp/on_Crawl__15_ytdlp_install.py
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/usr/bin/env python3
-"""
-Emit yt-dlp (and related) Binary dependencies for the crawl.
-"""
-
-import json
-import os
-import sys
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def output_binary(name: str, binproviders: str, overrides: dict | None = None):
-    """Output Binary JSONL record for a dependency."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,
-        'machine_id': machine_id,
-    }
-    if overrides:
-        record['overrides'] = overrides
-    print(json.dumps(record))
-
-
-def main():
-    ytdlp_enabled = get_env_bool('YTDLP_ENABLED', True)
-
-    if not ytdlp_enabled:
-        sys.exit(0)
-
-    output_binary(
-        name='yt-dlp',
-        binproviders='pip,brew,apt,env',
-        overrides={'pip': {'packages': ['yt-dlp[default]']}},
-    )
-
-    # Node.js (required by several JS-based extractors, declared here per legacy binaries.jsonl)
-    output_binary(
-        name='node',
-        binproviders='apt,brew,env',
-        overrides={'apt': {'packages': ['nodejs']}},
-    )
-
-    # ffmpeg (used by media extraction)
-    output_binary(name='ffmpeg', binproviders='apt,brew,env')
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/ytdlp/on_Snapshot__02_ytdlp.bg.py b/archivebox/plugins/ytdlp/on_Snapshot__02_ytdlp.bg.py
deleted file mode 100644
index fbf841ae..00000000
--- a/archivebox/plugins/ytdlp/on_Snapshot__02_ytdlp.bg.py
+++ /dev/null
@@ -1,258 +0,0 @@
-#!/usr/bin/env python3
-"""
-Download video/audio from a URL using yt-dlp.
-
-Usage: on_Snapshot__02_ytdlp.bg.py --url=<url> --snapshot-id=<uuid>
-Output: Downloads video/audio files to $PWD
-
-Environment variables:
-    YTDLP_ENABLED: Enable yt-dlp extraction (default: True)
-    YTDLP_BINARY: Path to yt-dlp binary (default: yt-dlp)
-    YTDLP_NODE_BINARY: Path to Node.js binary (x-fallback: NODE_BINARY)
-    YTDLP_TIMEOUT: Timeout in seconds (x-fallback: TIMEOUT)
-    YTDLP_COOKIES_FILE: Path to cookies file (x-fallback: COOKIES_FILE)
-    YTDLP_MAX_SIZE: Maximum file size (default: 750m)
-    YTDLP_CHECK_SSL_VALIDITY: Whether to verify SSL certs (x-fallback: CHECK_SSL_VALIDITY)
-    YTDLP_ARGS: Default yt-dlp arguments (JSON array)
-    YTDLP_ARGS_EXTRA: Extra arguments to append (JSON array)
-"""
-
-import json
-import os
-import subprocess
-import sys
-import threading
-from pathlib import Path
-
-import rich_click as click
-
-
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def get_env_int(name: str, default: int = 0) -> int:
-    try:
-        return int(get_env(name, str(default)))
-    except ValueError:
-        return default
-
-
-def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
-    """Parse a JSON array from environment variable."""
-    val = get_env(name, '')
-    if not val:
-        return default if default is not None else []
-    try:
-        result = json.loads(val)
-        if isinstance(result, list):
-            return [str(item) for item in result]
-        return default if default is not None else []
-    except json.JSONDecodeError:
-        return default if default is not None else []
-
-
-STATICFILE_DIR = '../staticfile'
-
-def has_staticfile_output() -> bool:
-    """Check if staticfile extractor already downloaded this URL."""
-    staticfile_dir = Path(STATICFILE_DIR)
-    if not staticfile_dir.exists():
-        return False
-    stdout_log = staticfile_dir / 'stdout.log'
-    if not stdout_log.exists():
-        return False
-    for line in stdout_log.read_text(errors='ignore').splitlines():
-        line = line.strip()
-        if not line.startswith('{'):
-            continue
-        try:
-            record = json.loads(line)
-        except json.JSONDecodeError:
-            continue
-        if record.get('type') == 'ArchiveResult' and record.get('status') == 'succeeded':
-            return True
-    return False
-
-
-def save_ytdlp(url: str, binary: str) -> tuple[bool, str | None, str]:
-    """
-    Download video/audio using yt-dlp.
-
-    Returns: (success, output_path, error_message)
-    """
-    # Get config from env (with YTDLP_ prefix, x-fallback handled by config loader)
-    timeout = get_env_int('YTDLP_TIMEOUT') or get_env_int('TIMEOUT', 3600)
-    check_ssl = get_env_bool('YTDLP_CHECK_SSL_VALIDITY', True) if get_env('YTDLP_CHECK_SSL_VALIDITY') else get_env_bool('CHECK_SSL_VALIDITY', True)
-    cookies_file = get_env('YTDLP_COOKIES_FILE') or get_env('COOKIES_FILE', '')
-    max_size = get_env('YTDLP_MAX_SIZE', '750m')
-    node_binary = get_env('YTDLP_NODE_BINARY') or get_env('NODE_BINARY', 'node')
-    ytdlp_args = get_env_array('YTDLP_ARGS', [])
-    ytdlp_args_extra = get_env_array('YTDLP_ARGS_EXTRA', [])
-
-    # Output directory is current directory (hook already runs in output dir)
-    output_dir = Path('.')
-
-    # Build command (later options take precedence)
-    cmd = [
-        binary,
-        *ytdlp_args,
-        # Format with max_size limit (appended after YTDLP_ARGS so it can be overridden by YTDLP_ARGS_EXTRA)
-        f'--format=(bv*+ba/b)[filesize<={max_size}][filesize_approx<=?{max_size}]/(bv*+ba/b)',
-        f'--js-runtimes=node:{node_binary}',
-    ]
-
-    if not check_ssl:
-        cmd.append('--no-check-certificate')
-
-    if cookies_file and Path(cookies_file).is_file():
-        cmd.extend(['--cookies', cookies_file])
-
-    if ytdlp_args_extra:
-        cmd.extend(ytdlp_args_extra)
-
-    if '--newline' not in cmd:
-        cmd.append('--newline')
-
-    cmd.append(url)
-
-    try:
-        print(f'[ytdlp] Starting download (timeout={timeout}s)', file=sys.stderr)
-
-        output_lines: list[str] = []
-        process = subprocess.Popen(
-            cmd,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            text=True,
-            bufsize=1,
-        )
-
-        def _read_output() -> None:
-            if not process.stdout:
-                return
-            for line in process.stdout:
-                output_lines.append(line)
-                sys.stderr.write(line)
-
-        reader = threading.Thread(target=_read_output, daemon=True)
-        reader.start()
-
-        try:
-            process.wait(timeout=timeout)
-        except subprocess.TimeoutExpired:
-            process.kill()
-            reader.join(timeout=1)
-            return False, None, f'Timed out after {timeout} seconds'
-
-        reader.join(timeout=1)
-        combined_output = ''.join(output_lines)
-
-        # Check if any media files were downloaded
-        media_extensions = (
-            '.mp4', '.webm', '.mkv', '.avi', '.mov', '.flv', '.wmv', '.m4v',
-            '.mp3', '.m4a', '.ogg', '.wav', '.flac', '.aac', '.opus',
-            '.json', '.jpg', '.png', '.webp', '.jpeg',
-            '.vtt', '.srt', '.ass', '.lrc',
-            '.description',
-        )
-
-        downloaded_files = [
-            f for f in output_dir.glob('*')
-            if f.is_file() and f.suffix.lower() in media_extensions
-        ]
-
-        if downloaded_files:
-            # Return first video/audio file, or first file if no media
-            video_audio = [
-                f for f in downloaded_files
-                if f.suffix.lower() in ('.mp4', '.webm', '.mkv', '.avi', '.mov', '.mp3', '.m4a', '.ogg', '.wav', '.flac')
-            ]
-            output = str(video_audio[0]) if video_audio else str(downloaded_files[0])
-            return True, output, ''
-        else:
-            stderr = combined_output
-
-            # These are NOT errors - page simply has no downloadable media
-            # Return success with no output (legitimate "nothing to download")
-            if 'ERROR: Unsupported URL' in stderr:
-                return True, None, ''  # Not a media site - success, no output
-            if 'URL could be a direct video link' in stderr:
-                return True, None, ''  # Not a supported media URL - success, no output
-            if process.returncode == 0:
-                return True, None, ''  # yt-dlp exited cleanly, just no media - success
-
-            # These ARE errors - something went wrong
-            if 'HTTP Error 404' in stderr:
-                return False, None, '404 Not Found'
-            if 'HTTP Error 403' in stderr:
-                return False, None, '403 Forbidden'
-            if 'Unable to extract' in stderr:
-                return False, None, 'Unable to extract media info'
-
-            return False, None, f'yt-dlp error: {stderr}'
-
-    except subprocess.TimeoutExpired:
-        return False, None, f'Timed out after {timeout} seconds'
-    except Exception as e:
-        return False, None, f'{type(e).__name__}: {e}'
-
-
-@click.command()
-@click.option('--url', required=True, help='URL to download video/audio from')
-@click.option('--snapshot-id', required=True, help='Snapshot UUID')
-def main(url: str, snapshot_id: str):
-    """Download video/audio from a URL using yt-dlp."""
-
-    try:
-        # Check if yt-dlp downloading is enabled
-        if not get_env_bool('YTDLP_ENABLED', True):
-            print('Skipping ytdlp (YTDLP_ENABLED=False)', file=sys.stderr)
-            # Temporary failure (config disabled) - NO JSONL emission
-            sys.exit(0)
-
-        # Check if staticfile extractor already handled this (permanent skip)
-        if has_staticfile_output():
-            print('Skipping ytdlp - staticfile extractor already downloaded this', file=sys.stderr)
-            print(json.dumps({'type': 'ArchiveResult', 'status': 'skipped', 'output_str': 'staticfile already exists'}))
-            sys.exit(0)
-
-        # Get binary from environment
-        binary = get_env('YTDLP_BINARY', 'yt-dlp')
-
-        # Run extraction
-        success, output, error = save_ytdlp(url, binary)
-
-        if success:
-            # Success - emit ArchiveResult
-            result = {
-                'type': 'ArchiveResult',
-                'status': 'succeeded',
-                'output_str': output or ''
-            }
-            print(json.dumps(result))
-            sys.exit(0)
-        else:
-            # Transient error - emit NO JSONL
-            print(f'ERROR: {error}', file=sys.stderr)
-            sys.exit(1)
-
-    except Exception as e:
-        # Transient error - emit NO JSONL
-        print(f'ERROR: {type(e).__name__}: {e}', file=sys.stderr)
-        sys.exit(1)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/ytdlp/templates/card.html b/archivebox/plugins/ytdlp/templates/card.html
deleted file mode 100644
index 6fe32098..00000000
--- a/archivebox/plugins/ytdlp/templates/card.html
+++ /dev/null
@@ -1,17 +0,0 @@
-<!-- YT-DLP output list -->
-{% if media_files %}
-    <div class="loose-items" style="pointer-events: auto;">
-        {% for file in media_files %}
-            <a href="{{ file.url|default:file.path|urlencode }}" target="preview"
-               title="{{ file.name }}">
-                📄 {{ file.name }}
-            </a>
-        {% endfor %}
-    </div>
-{% else %}
-    <div class="thumbnail-compact" data-plugin="ytdlp" data-compact="1">
-        <span class="thumbnail-compact-icon">🎬</span>
-        <span class="thumbnail-compact-label">YT-DLP</span>
-        <span class="thumbnail-compact-meta">media</span>
-    </div>
-{% endif %}
diff --git a/archivebox/plugins/ytdlp/templates/full.html b/archivebox/plugins/ytdlp/templates/full.html
deleted file mode 100644
index 6a4b2b35..00000000
--- a/archivebox/plugins/ytdlp/templates/full.html
+++ /dev/null
@@ -1,10 +0,0 @@
-<!-- YT-DLP fullscreen - full video/audio player -->
-<div class="extractor-fullscreen ytdlp-fullscreen" style="width: 100%; height: 100vh; background: #000; display: flex; align-items: center; justify-content: center;">
-    <video src="{{ output_path }}"
-           style="max-width: 100%; max-height: 100%;"
-           controls
-           autoplay
-           preload="auto">
-        Your browser does not support the video tag.
-    </video>
-</div>
diff --git a/archivebox/plugins/ytdlp/templates/icon.html b/archivebox/plugins/ytdlp/templates/icon.html
deleted file mode 100644
index bf0e4ee4..00000000
--- a/archivebox/plugins/ytdlp/templates/icon.html
+++ /dev/null
@@ -1 +0,0 @@
-<span class="abx-output-icon abx-output-icon--ytdlp" title="Video"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><rect x="3" y="6" width="18" height="12" rx="2"/><path d="M10 9l5 3-5 3z"/></svg></span>
diff --git a/archivebox/plugins/ytdlp/tests/test_ytdlp.py b/archivebox/plugins/ytdlp/tests/test_ytdlp.py
deleted file mode 100644
index 561c4324..00000000
--- a/archivebox/plugins/ytdlp/tests/test_ytdlp.py
+++ /dev/null
@@ -1,202 +0,0 @@
-"""
-Integration tests for ytdlp plugin
-
-Tests verify:
-1. Hook script exists
-2. Verify deps with abx-pkg
-3. YT-DLP extraction works on video URLs
-4. JSONL output is correct
-5. Config options work (YTDLP_ENABLED, YTDLP_TIMEOUT)
-6. Handles non-video URLs gracefully
-"""
-
-import json
-import subprocess
-import sys
-import tempfile
-import time
-from pathlib import Path
-import pytest
-
-PLUGIN_DIR = Path(__file__).parent.parent
-PLUGINS_ROOT = PLUGIN_DIR.parent
-YTDLP_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_ytdlp.*'), None)
-TEST_URL = 'https://example.com/video.mp4'
-
-def test_hook_script_exists():
-    """Verify on_Snapshot hook exists."""
-    assert YTDLP_HOOK.exists(), f"Hook not found: {YTDLP_HOOK}"
-
-
-def test_verify_deps_with_abx_pkg():
-    """Verify yt-dlp, node, and ffmpeg are available via abx-pkg."""
-    from abx_pkg import Binary, PipProvider, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides
-
-    missing_binaries = []
-
-    # Verify yt-dlp is available
-    ytdlp_binary = Binary(name='yt-dlp', binproviders=[PipProvider(), EnvProvider()])
-    ytdlp_loaded = ytdlp_binary.load()
-    if not (ytdlp_loaded and ytdlp_loaded.abspath):
-        missing_binaries.append('yt-dlp')
-
-    # Verify node is available (yt-dlp needs it for JS extraction)
-    node_binary = Binary(
-        name='node',
-        binproviders=[AptProvider(), BrewProvider(), EnvProvider()]
-    )
-    node_loaded = node_binary.load()
-    if not (node_loaded and node_loaded.abspath):
-        missing_binaries.append('node')
-
-    # Verify ffmpeg is available (yt-dlp needs it for video conversion)
-    ffmpeg_binary = Binary(name='ffmpeg', binproviders=[AptProvider(), BrewProvider(), EnvProvider()])
-    ffmpeg_loaded = ffmpeg_binary.load()
-    if not (ffmpeg_loaded and ffmpeg_loaded.abspath):
-        missing_binaries.append('ffmpeg')
-
-    if missing_binaries:
-        pass
-
-def test_handles_non_video_url():
-    """Test that ytdlp extractor handles non-video URLs gracefully via hook."""
-    # Prerequisites checked by earlier test
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Run ytdlp extraction hook on non-video URL
-        result = subprocess.run(
-            [sys.executable, str(YTDLP_HOOK), '--url', 'https://example.com', '--snapshot-id', 'test789'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            timeout=60
-        )
-
-        # Should exit 0 even for non-media URL
-        assert result.returncode == 0, f"Should handle non-media URL gracefully: {result.stderr}"
-
-        # Parse clean JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                pass
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, "Should have ArchiveResult JSONL output"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-
-def test_config_ytdlp_enabled_false_skips():
-    """Test that YTDLP_ENABLED=False exits without emitting JSONL."""
-    import os
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        env = os.environ.copy()
-        env['YTDLP_ENABLED'] = 'False'
-
-        result = subprocess.run(
-            [sys.executable, str(YTDLP_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=30
-        )
-
-        assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
-
-        # Feature disabled - temporary failure, should NOT emit JSONL
-        assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
-
-        # Should NOT emit any JSONL
-        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
-        assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
-
-
-def test_config_timeout():
-    """Test that YTDLP_TIMEOUT config is respected (also via MEDIA_TIMEOUT alias)."""
-    import os
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        env = os.environ.copy()
-        env['YTDLP_TIMEOUT'] = '5'
-
-        start_time = time.time()
-        result = subprocess.run(
-            [sys.executable, str(YTDLP_HOOK), '--url', 'https://example.com', '--snapshot-id', 'testtimeout'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=10  # Should complete in 5s, use 10s as safety margin
-        )
-        elapsed_time = time.time() - start_time
-
-        assert result.returncode == 0, f"Should complete without hanging: {result.stderr}"
-        # Allow 1 second overhead for subprocess startup and Python interpreter
-        assert elapsed_time <= 6.0, f"Should complete within 6 seconds (5s timeout + 1s overhead), took {elapsed_time:.2f}s"
-
-
-def test_real_youtube_url():
-    """Test that yt-dlp can extract video/audio from a real YouTube URL."""
-    import os
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmpdir = Path(tmpdir)
-
-        # Use a short, stable YouTube video (YouTube's own about video)
-        youtube_url = 'https://www.youtube.com/watch?v=jNQXAC9IVRw'  # "Me at the zoo" - first YouTube video
-
-        env = os.environ.copy()
-        env['YTDLP_TIMEOUT'] = '120'  # Give it time to download
-
-        start_time = time.time()
-        result = subprocess.run(
-            [sys.executable, str(YTDLP_HOOK), '--url', youtube_url, '--snapshot-id', 'testyoutube'],
-            cwd=tmpdir,
-            capture_output=True,
-            text=True,
-            env=env,
-            timeout=180
-        )
-        elapsed_time = time.time() - start_time
-
-        # Should succeed
-        assert result.returncode == 0, f"Should extract video/audio successfully: {result.stderr}"
-
-        # Parse JSONL output
-        result_json = None
-        for line in result.stdout.strip().split('\n'):
-            line = line.strip()
-            if line.startswith('{'):
-                try:
-                    record = json.loads(line)
-                    if record.get('type') == 'ArchiveResult':
-                        result_json = record
-                        break
-                except json.JSONDecodeError:
-                    pass
-
-        assert result_json, f"Should have ArchiveResult JSONL output. stdout: {result.stdout}"
-        assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
-
-        # Check that some video/audio files were downloaded
-        output_files = list(tmpdir.glob('**/*'))
-        media_files = [f for f in output_files if f.is_file() and f.suffix.lower() in ('.mp4', '.webm', '.mkv', '.m4a', '.mp3', '.json', '.jpg', '.webp')]
-
-        assert len(media_files) > 0, f"Should have downloaded at least one video/audio file. Files: {output_files}"
-
-        print(f"Successfully extracted {len(media_files)} file(s) in {elapsed_time:.2f}s")
-
-
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
diff --git a/archivebox/search/__init__.py b/archivebox/search/__init__.py
index f4e670cb..b98f7f95 100644
--- a/archivebox/search/__init__.py
+++ b/archivebox/search/__init__.py
@@ -2,7 +2,7 @@
 Search module for ArchiveBox.
 
 Search indexing is handled by search backend hooks in plugins:
-    archivebox/plugins/search_backend_*/on_Snapshot__*_index_*.py
+    abx_plugins/plugins/search_backend_*/on_Snapshot__*_index_*.py
 
 This module provides the query interface that dynamically discovers
 search backend plugins using the hooks system.
diff --git a/archivebox/templates/admin/base.html b/archivebox/templates/admin/base.html
index 86bd85c8..b2b5bcc9 100644
--- a/archivebox/templates/admin/base.html
+++ b/archivebox/templates/admin/base.html
@@ -275,6 +275,21 @@
                 -moz-osx-font-smoothing: grayscale;
             }
 
+            /* Snapshot admin actions: hide label/colon and remove card border */
+            #content-main form .field-admin_actions > label,
+            #content form .field-admin_actions > label,
+            #content-main form .field-admin_actions label,
+            #content form .field-admin_actions label {
+                display: none !important;
+            }
+
+            #content-main form fieldset.actions-card,
+            #content form fieldset.actions-card {
+                border: none !important;
+                box-shadow: none !important;
+                background: transparent !important;
+            }
+
             /* Readonly fields styling */
             #content-main form fieldset .readonly,
             #content form fieldset .readonly {
diff --git a/archivebox/templates/admin/progress_monitor.html b/archivebox/templates/admin/progress_monitor.html
index 5fc449e6..733ad9eb 100644
--- a/archivebox/templates/admin/progress_monitor.html
+++ b/archivebox/templates/admin/progress_monitor.html
@@ -608,10 +608,6 @@
         </div>
     </div>
 
-    <div class="thumbnail-strip empty" id="thumbnail-strip">
-        <span class="thumbnail-label">Recent:</span>
-    </div>
-
     <div class="tree-container" id="tree-container">
         <div class="idle-message" id="idle-message">No active crawls</div>
         <div id="crawl-tree"></div>
@@ -625,7 +621,7 @@
     const treeContainer = document.getElementById('tree-container');
     const crawlTree = document.getElementById('crawl-tree');
     const idleMessage = document.getElementById('idle-message');
-    const thumbnailStrip = document.getElementById('thumbnail-strip');
+    const thumbnailStrip = null;
 
     let pollInterval = null;
     let pollDelayMs = 1000;
@@ -697,65 +693,8 @@
     }
 
 
-    function renderThumbnail(thumb, isNew) {
-        const ext = (thumb.embed_path || '').toLowerCase().split('.').pop();
-        const isImage = ['png', 'jpg', 'jpeg', 'gif', 'webp', 'svg', 'ico'].includes(ext);
-
-        const item = document.createElement('a');
-        item.className = 'thumbnail-item' + (isNew ? ' new' : '');
-        item.href = `/admin/core/snapshot/${thumb.snapshot_id}/change/`;
-        item.title = `${thumb.plugin}: ${thumb.snapshot_url}`;
-        item.dataset.id = thumb.id;
-
-        const archiveUrl = thumb.archive_url || thumb.archive_path;
-        if (isImage && archiveUrl) {
-            item.innerHTML = `
-                <img src="${archiveUrl}" alt="${thumb.plugin}" loading="lazy" onerror="this.parentElement.innerHTML='<div class=\\'thumbnail-fallback\\'>${getPluginIcon(thumb.plugin)}</div><span class=\\'thumbnail-plugin\\'>${thumb.plugin}</span>'">
-                <span class="thumbnail-plugin">${thumb.plugin}</span>
-            `;
-        } else {
-            item.innerHTML = `
-                <div class="thumbnail-fallback">${getPluginIcon(thumb.plugin)}</div>
-                <span class="thumbnail-plugin">${thumb.plugin}</span>
-            `;
-        }
-
-        return item;
-    }
-
-    function updateThumbnails(thumbnails) {
-        if (!thumbnails || thumbnails.length === 0) {
-            thumbnailStrip.classList.add('empty');
-            return;
-        }
-
-        thumbnailStrip.classList.remove('empty');
-
-        // Find new thumbnails (ones we haven't seen before)
-        const newThumbs = thumbnails.filter(t => !knownThumbnailIds.has(t.id));
-
-        // Add new thumbnails to the beginning (after the label)
-        const label = thumbnailStrip.querySelector('.thumbnail-label');
-        newThumbs.reverse().forEach(thumb => {
-            const item = renderThumbnail(thumb, true);
-            if (label.nextSibling) {
-                thumbnailStrip.insertBefore(item, label.nextSibling);
-            } else {
-                thumbnailStrip.appendChild(item);
-            }
-            knownThumbnailIds.add(thumb.id);
-        });
-
-        // Limit to 20 thumbnails (remove old ones)
-        const items = thumbnailStrip.querySelectorAll('.thumbnail-item');
-        if (items.length > 20) {
-            for (let i = 20; i < items.length; i++) {
-                const id = items[i].dataset.id;
-                knownThumbnailIds.delete(id);
-                items[i].remove();
-            }
-        }
-    }
+    function renderThumbnail(thumb, isNew) { return null; }
+    function updateThumbnails(thumbnails) {}
 
     function renderExtractor(extractor) {
         const icon = extractor.status === 'started' ? '&#8635;' :
@@ -1009,8 +948,7 @@
             crawlTree.innerHTML = '';
         }
 
-        // Update thumbnail strip with recently completed results
-        updateThumbnails(data.recent_thumbnails || []);
+        // Recent thumbnails removed
     }
 
     function fetchProgress() {
diff --git a/archivebox/templates/core/snapshot.html b/archivebox/templates/core/snapshot.html
index 6adbf7c4..0ad5a226 100644
--- a/archivebox/templates/core/snapshot.html
+++ b/archivebox/templates/core/snapshot.html
@@ -717,7 +717,7 @@
                                             <p class="card-text"><code>{{ result_info.path }}</code></p>
                                         </a>
                                     <a href="{{ display_url }}" target="preview">
-                                        <h4 class="card-title">{{ result_info.name|plugin_display_name|title }}</h4>
+                                        <h4 class="card-title">{{ result_info.name|title }}</h4>
                                     </a>
                                     {% if result_info.result %}
                                         {% with plugin_base=result_info.name|plugin_name %}
diff --git a/archivebox/tests/test_auth_ldap.py b/archivebox/tests/test_auth_ldap.py
index a56d29f7..10972acd 100644
--- a/archivebox/tests/test_auth_ldap.py
+++ b/archivebox/tests/test_auth_ldap.py
@@ -63,7 +63,7 @@ class TestLDAPConfig(unittest.TestCase):
 
         config = LDAPConfig(
             LDAP_ENABLED=True,
-            LDAP_SERVER_URI="ldap://localhost:389",
+            LDAP_SERVER_URI="ldap://ldap-test.localhost:389",
             LDAP_BIND_DN="cn=admin,dc=example,dc=com",
             LDAP_BIND_PASSWORD="password",
             LDAP_USER_BASE="ou=users,dc=example,dc=com",
@@ -172,7 +172,7 @@ class TestArchiveBoxWithLDAP(unittest.TestCase):
             env={
                 **os.environ,
                 'LDAP_ENABLED': 'False',
-                'LDAP_SERVER_URI': 'ldap://localhost:389',
+                'LDAP_SERVER_URI': 'ldap://ldap-test.localhost:389',
             }
         )
 
diff --git a/archivebox/tests/test_hooks.py b/archivebox/tests/test_hooks.py
index 308633ba..9d0afa0e 100755
--- a/archivebox/tests/test_hooks.py
+++ b/archivebox/tests/test_hooks.py
@@ -468,7 +468,7 @@ class TestPluginMetadata(unittest.TestCase):
     def test_plugin_name_added(self):
         """run_hook() should add plugin name to records."""
         # Simulate what run_hook() does
-        script = Path('/archivebox/plugins/wget/on_Snapshot__50_wget.py')
+        script = Path('/abx_plugins/plugins/wget/on_Snapshot__50_wget.py')
         plugin_name = script.parent.name
 
         record = {'type': 'ArchiveResult', 'status': 'succeeded'}
diff --git a/archivebox/workers/orchestrator.py b/archivebox/workers/orchestrator.py
index 6465ef88..c83d4a55 100644
--- a/archivebox/workers/orchestrator.py
+++ b/archivebox/workers/orchestrator.py
@@ -3,13 +3,16 @@ Orchestrator for managing worker processes.
 
 The Orchestrator polls the Crawl queue and spawns CrawlWorkers as needed.
 
-Architecture:
-    Orchestrator (polls Crawl queue)
-    └── CrawlWorker(s) (one per active Crawl)
-        └── SnapshotWorker(s) (one per Snapshot, up to limit)
-            └── Hook Processes (sequential, forked by SnapshotWorker)
+Orchestrator (takes list of specific crawls | polls for pending queued crawls forever) spawns:
+└── CrawlWorker(s) (one per active Crawl)
+    └── SnapshotWorker(s) (one per Snapshot, up to limit)
+        └── Hook Processes (sequential, forked by SnapshotWorker)
+            e.g on_Snapshot__23_save_pdf.js
+                on_Snapshot__24_save_screenshot.js
+                ...
 
 Usage:
+
     # Default: runs forever (for use as subprocess of server)
     orchestrator = Orchestrator(exit_on_idle=False)
     orchestrator.runloop()
diff --git a/pyproject.toml b/pyproject.toml
index 65983d51..23f34ab7 100755
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -84,6 +84,7 @@ dependencies = [
     "yt-dlp>=2024.1.0",      # for: media extractor
     ### Binary/Package Management
     "abx-pkg>=0.1.0",        # for: detecting, versioning, and installing binaries via apt/brew/pip/npm
+    "abx-plugins>=0.1.0",    # shared plugin package (sourced from uv workspace in local dev)
     "gallery-dl>=1.31.1",
     ### UUID7 backport for Python <3.14
     "uuid7>=0.1.0; python_version < '3.14'",  # for: uuid7 support on Python 3.13 (provides uuid_extensions module)
@@ -164,6 +165,9 @@ package = true
 python-version = "3.13"
 # compile-bytecode = true
 
+[tool.uv.sources]
+abx-plugins = { workspace = true }
+
 [build-system]
 requires = ["pdm-backend"]
 build-backend = "pdm.backend"