From c7b2217cd6cdb36eda6cddcbf86a6a32faae4025 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 19 Jan 2026 01:00:53 -0800
Subject: [PATCH] tons of fixes with codex

---
 archivebox/cli/archivebox_add.py              |  61 +-
 archivebox/cli/archivebox_pluginmap.py        |  13 +-
 archivebox/config/configset.py                |   7 +
 archivebox/core/models.py                     | 171 +++--
 archivebox/core/settings.py                   |   4 +
 archivebox/core/views.py                      |  10 +-
 archivebox/crawls/models.py                   |  64 +-
 archivebox/hooks.py                           | 156 ++---
 archivebox/machine/detect.py                  |  66 +-
 archivebox/machine/models.py                  | 190 ++++--
 .../machine/tests/test_machine_models.py      |  36 +-
 archivebox/misc/checks.py                     |   4 +-
 archivebox/misc/progress_layout.py            | 615 +++++++++++-------
 .../plugins/accessibility/templates/icon.html |   1 +
 .../plugins/accessibility/tests/__init__.py   |   1 -
 .../plugins/apt/on_Binary__13_apt_install.py  |   2 +-
 archivebox/plugins/apt/tests/__init__.py      |   1 -
 .../plugins/apt/tests/test_apt_provider.py    |   4 +-
 .../on_Snapshot__13_archivedotorg.py          |  14 +
 .../plugins/archivedotorg/templates/icon.html |   2 +-
 archivebox/plugins/chrome/binaries.jsonl      |   1 -
 archivebox/plugins/chrome/chrome_utils.js     |  21 +-
 archivebox/plugins/chrome/config.json         |   4 +-
 .../chrome/on_Crawl__01_chrome_install.py     | 265 --------
 .../chrome/on_Crawl__70_chrome_install.py     |  34 +
 ...bg.js => on_Crawl__90_chrome_launch.bg.js} |   6 +-
 ...bg.js => on_Snapshot__10_chrome_tab.bg.js} |  83 ++-
 .../chrome/on_Snapshot__11_chrome_wait.js     |  76 +++
 .../chrome/on_Snapshot__30_chrome_navigate.js |   2 +-
 archivebox/plugins/chrome/templates/icon.html |   1 +
 archivebox/plugins/chrome/tests/__init__.py   |   0
 .../chrome/tests/chrome_test_helpers.py       | 178 +++--
 .../plugins/chrome/tests/test_chrome.py       |  57 +-
 .../on_Snapshot__21_consolelog.bg.js          |  61 +-
 .../plugins/consolelog/templates/icon.html    |   1 +
 .../plugins/consolelog/tests/__init__.py      |   1 -
 .../consolelog/tests/test_consolelog.py       |  26 +-
 .../custom/on_Binary__14_custom_install.py    |  13 +-
 archivebox/plugins/custom/tests/__init__.py   |   1 -
 .../custom/tests/test_custom_provider.py      |   4 +-
 .../plugins/dns/on_Snapshot__22_dns.bg.js     |  63 +-
 archivebox/plugins/dns/templates/icon.html    |   1 +
 archivebox/plugins/dom/on_Snapshot__53_dom.js |  16 +-
 archivebox/plugins/dom/templates/icon.html    |   2 +-
 archivebox/plugins/dom/tests/test_dom.py      |   2 +-
 .../plugins/env/on_Binary__15_env_install.py  |   3 +-
 archivebox/plugins/env/tests/__init__.py      |   1 -
 .../plugins/env/tests/test_env_provider.py    |   4 +-
 .../favicon/on_Snapshot__11_favicon.py        |   9 +-
 .../plugins/favicon/templates/icon.html       |   2 +-
 archivebox/plugins/forumdl/binaries.jsonl     |   1 -
 .../forumdl/on_Crawl__13_forumdl_install.py   |  80 ---
 .../forumdl/on_Crawl__25_forumdl_install.py   |  79 +++
 ...dl.bg.py => on_Snapshot__04_forumdl.bg.py} |  37 +-
 .../plugins/forumdl/templates/icon.html       |   2 +-
 archivebox/plugins/gallerydl/binaries.jsonl   |   1 -
 .../on_Crawl__10_gallerydl_install.py         |  80 ---
 .../on_Crawl__20_gallerydl_install.py         |  48 ++
 ....bg.py => on_Snapshot__03_gallerydl.bg.py} |  54 +-
 .../plugins/gallerydl/templates/icon.html     |   2 +-
 archivebox/plugins/git/binaries.jsonl         |   1 -
 .../plugins/git/on_Crawl__05_git_install.py   |  48 ++
 .../plugins/git/on_Crawl__09_git_install.py   |  80 ---
 ...t__62_git.py => on_Snapshot__05_git.bg.py} |   2 +-
 archivebox/plugins/git/templates/icon.html    |   2 +-
 .../plugins/headers/templates/icon.html       |   2 +-
 .../htmltotext/on_Snapshot__58_htmltotext.py  |  24 +-
 .../plugins/htmltotext/templates/icon.html    |   2 +-
 .../plugins/infiniscroll/templates/icon.html  |   1 +
 ...l_istilldontcareaboutcookies_extension.js} |   2 +-
 archivebox/plugins/mercury/binaries.jsonl     |   1 -
 .../mercury/on_Crawl__12_mercury_install.py   |  85 ---
 .../mercury/on_Crawl__40_mercury_install.py   |  53 ++
 .../plugins/mercury/templates/icon.html       |   2 +-
 .../plugins/merkletree/templates/icon.html    |   1 +
 .../plugins/merkletree/tests/__init__.py      |   1 -
 .../on_Snapshot__15_modalcloser.bg.js         |   2 +-
 .../plugins/modalcloser/templates/icon.html   |   1 +
 .../plugins/npm/on_Binary__10_npm_install.py  |  38 +-
 .../plugins/npm/on_Crawl__00_npm_install.py   |  51 ++
 archivebox/plugins/npm/tests/__init__.py      |   1 -
 .../plugins/npm/tests/test_npm_provider.py    |   4 +-
 archivebox/plugins/papersdl/binaries.jsonl    |   1 -
 .../papersdl/on_Crawl__14_papersdl_install.py |  80 ---
 .../papersdl/on_Crawl__30_papersdl_install.py |  48 ++
 .../papersdl/on_Snapshot__66_papersdl.bg.py   |  37 +-
 .../plugins/papersdl/templates/icon.html      |   2 +-
 .../on_Snapshot__75_parse_dom_outlinks.js     |   5 +
 .../parse_dom_outlinks/templates/icon.html    |   2 +-
 .../parse_dom_outlinks/tests/__init__.py      |   1 -
 .../tests/test_parse_dom_outlinks.py          |   3 +-
 .../on_Snapshot__70_parse_html_urls.py        | 143 +++-
 .../parse_html_urls/templates/icon.html       |   2 +-
 .../on_Snapshot__74_parse_jsonl_urls.py       |   7 +
 .../parse_jsonl_urls/templates/icon.html      |   2 +-
 .../on_Snapshot__73_parse_netscape_urls.py    |   7 +
 .../parse_netscape_urls/templates/icon.html   |   2 +-
 .../on_Snapshot__72_parse_rss_urls.py         |   7 +
 .../parse_rss_urls/templates/icon.html        |   2 +-
 .../on_Snapshot__71_parse_txt_urls.py         |   7 +
 .../parse_txt_urls/templates/icon.html        |   2 +-
 archivebox/plugins/pdf/on_Snapshot__52_pdf.js |  16 +-
 archivebox/plugins/pdf/templates/icon.html    |   2 +-
 .../plugins/pip/on_Binary__11_pip_install.py  |  43 +-
 archivebox/plugins/pip/tests/__init__.py      |   1 -
 .../plugins/pip/tests/test_pip_provider.py    |  30 +-
 archivebox/plugins/puppeteer/__init__.py      |   1 +
 .../on_Binary__12_puppeteer_install.py        | 170 +++++
 .../on_Crawl__60_puppeteer_install.py         |  31 +
 .../plugins/puppeteer/tests/test_puppeteer.py | 124 ++++
 archivebox/plugins/readability/binaries.jsonl |   1 -
 .../on_Crawl__11_readability_install.py       |  83 ---
 .../on_Crawl__35_readability_install.py       |  53 ++
 .../on_Snapshot__56_readability.py            |  18 +-
 .../plugins/readability/templates/icon.html   |   2 +-
 ....bg.js => on_Snapshot__25_redirects.bg.js} |  14 +-
 .../plugins/redirects/templates/icon.html     |   1 +
 .../plugins/redirects/tests/__init__.py       |   1 -
 .../plugins/redirects/tests/test_redirects.py |  38 +-
 .../responses/on_Snapshot__24_responses.bg.js |  60 +-
 .../plugins/responses/templates/icon.html     |   1 +
 .../plugins/responses/tests/__init__.py       |   1 -
 .../plugins/responses/tests/test_responses.py |  31 +-
 .../screenshot/on_Snapshot__51_screenshot.js  |  16 +-
 .../plugins/screenshot/templates/icon.html    |   2 +-
 .../screenshot/tests/test_screenshot.py       |   2 +-
 .../search_backend_ripgrep/binaries.jsonl     |   1 -
 .../on_Crawl__00_ripgrep_install.py           |  92 ---
 .../on_Crawl__50_ripgrep_install.py           |  32 +
 .../plugins/search_backend_ripgrep/search.py  |  14 +-
 .../search_backend_ripgrep/tests/__init__.py  |   0
 .../tests/test_ripgrep_detection.py           |  34 +-
 .../tests/test_ripgrep_search.py              |   4 +-
 .../search_backend_sonic/templates/icon.html  |   1 +
 .../plugins/search_backend_sqlite/search.py   |  11 +-
 .../search_backend_sqlite/templates/icon.html |   1 +
 .../search_backend_sqlite/tests/__init__.py   |   1 -
 archivebox/plugins/seo/templates/icon.html    |   1 +
 archivebox/plugins/seo/tests/__init__.py      |   1 -
 archivebox/plugins/seo/tests/test_seo.py      |   3 +-
 archivebox/plugins/singlefile/binaries.jsonl  |   1 -
 archivebox/plugins/singlefile/config.json     |   2 +-
 .../on_Crawl__08_singlefile_install.py        |  85 ---
 .../on_Crawl__45_singlefile_install.py        |  54 ++
 ....js => on_Crawl__82_singlefile_install.js} |   2 +-
 .../singlefile/on_Snapshot__50_singlefile.py  |  83 ++-
 .../plugins/singlefile/templates/icon.html    |   2 +-
 .../singlefile/tests/test_singlefile.py       |   7 +-
 .../plugins/ssl/on_Snapshot__23_ssl.bg.js     |  57 +-
 archivebox/plugins/ssl/templates/icon.html    |   1 +
 archivebox/plugins/ssl/tests/__init__.py      |   1 -
 archivebox/plugins/ssl/tests/test_ssl.py      |  32 +-
 ...bg.js => on_Snapshot__26_staticfile.bg.js} |  14 +-
 .../plugins/staticfile/templates/icon.html    |   2 +-
 .../plugins/staticfile/tests/__init__.py      |   1 -
 .../staticfile/tests/test_staticfile.py       |  25 +-
 archivebox/plugins/title/templates/icon.html  |   2 +-
 ....js => on_Crawl__83_twocaptcha_install.js} |   4 +-
 ...g.js => on_Crawl__95_twocaptcha_config.js} |   4 +-
 .../twocaptcha/tests/test_twocaptcha.py       |   4 +-
 ... on_Crawl__80_install_ublock_extension.js} |   2 +-
 archivebox/plugins/wget/binaries.jsonl        |   1 -
 .../plugins/wget/on_Crawl__06_wget_install.py | 146 -----
 .../plugins/wget/on_Crawl__10_wget_install.py |  95 +++
 ..._61_wget.py => on_Snapshot__06_wget.bg.py} |  19 +-
 archivebox/plugins/wget/templates/icon.html   |   2 +-
 archivebox/plugins/wget/tests/test_wget.py    |   2 +-
 archivebox/plugins/ytdlp/binaries.jsonl       |   3 -
 .../ytdlp/on_Crawl__07_ytdlp_install.py       |  80 ---
 .../ytdlp/on_Crawl__15_ytdlp_install.py       |  64 ++
 ...tdlp.bg.py => on_Snapshot__02_ytdlp.bg.py} |  58 +-
 archivebox/plugins/ytdlp/templates/icon.html  |   2 +-
 archivebox/templates/core/snapshot_live.html  |   6 +-
 archivebox/templates/static/admin.css         |  32 +
 archivebox/tests/conftest.py                  |  14 +-
 archivebox/tests/test_cli_add_interrupt.py    | 133 ++++
 archivebox/tests/test_hooks.py                | 147 ++---
 archivebox/tests/test_list.py                 |  12 +-
 archivebox/tests/test_real_world_add.py       | 133 ++++
 .../tests/test_settings_signal_webhooks.py    |   8 +
 archivebox/tests/test_snapshot.py             | 105 +--
 archivebox/workers/orchestrator.py            | 276 ++++++--
 archivebox/workers/tests/test_orchestrator.py |  40 ++
 archivebox/workers/worker.py                  | 154 +++--
 184 files changed, 3943 insertions(+), 2420 deletions(-)
 delete mode 100644 archivebox/plugins/accessibility/tests/__init__.py
 delete mode 100644 archivebox/plugins/apt/tests/__init__.py
 delete mode 100644 archivebox/plugins/chrome/binaries.jsonl
 delete mode 100755 archivebox/plugins/chrome/on_Crawl__01_chrome_install.py
 create mode 100755 archivebox/plugins/chrome/on_Crawl__70_chrome_install.py
 rename archivebox/plugins/chrome/{on_Crawl__20_chrome_launch.bg.js => on_Crawl__90_chrome_launch.bg.js} (98%)
 rename archivebox/plugins/chrome/{on_Snapshot__20_chrome_tab.bg.js => on_Snapshot__10_chrome_tab.bg.js} (86%)
 create mode 100644 archivebox/plugins/chrome/on_Snapshot__11_chrome_wait.js
 delete mode 100644 archivebox/plugins/chrome/tests/__init__.py
 delete mode 100644 archivebox/plugins/consolelog/tests/__init__.py
 delete mode 100644 archivebox/plugins/custom/tests/__init__.py
 delete mode 100644 archivebox/plugins/env/tests/__init__.py
 delete mode 100644 archivebox/plugins/forumdl/binaries.jsonl
 delete mode 100755 archivebox/plugins/forumdl/on_Crawl__13_forumdl_install.py
 create mode 100755 archivebox/plugins/forumdl/on_Crawl__25_forumdl_install.py
 rename archivebox/plugins/forumdl/{on_Snapshot__65_forumdl.bg.py => on_Snapshot__04_forumdl.bg.py} (87%)
 delete mode 100644 archivebox/plugins/gallerydl/binaries.jsonl
 delete mode 100755 archivebox/plugins/gallerydl/on_Crawl__10_gallerydl_install.py
 create mode 100755 archivebox/plugins/gallerydl/on_Crawl__20_gallerydl_install.py
 rename archivebox/plugins/gallerydl/{on_Snapshot__64_gallerydl.bg.py => on_Snapshot__03_gallerydl.bg.py} (81%)
 delete mode 100644 archivebox/plugins/git/binaries.jsonl
 create mode 100755 archivebox/plugins/git/on_Crawl__05_git_install.py
 delete mode 100755 archivebox/plugins/git/on_Crawl__09_git_install.py
 rename archivebox/plugins/git/{on_Snapshot__62_git.py => on_Snapshot__05_git.bg.py} (98%)
 create mode 100644 archivebox/plugins/infiniscroll/templates/icon.html
 rename archivebox/plugins/istilldontcareaboutcookies/{on_Crawl__02_istilldontcareaboutcookies_install.js => on_Crawl__81_install_istilldontcareaboutcookies_extension.js} (97%)
 delete mode 100644 archivebox/plugins/mercury/binaries.jsonl
 delete mode 100755 archivebox/plugins/mercury/on_Crawl__12_mercury_install.py
 create mode 100755 archivebox/plugins/mercury/on_Crawl__40_mercury_install.py
 delete mode 100644 archivebox/plugins/merkletree/tests/__init__.py
 create mode 100644 archivebox/plugins/modalcloser/templates/icon.html
 create mode 100644 archivebox/plugins/npm/on_Crawl__00_npm_install.py
 delete mode 100644 archivebox/plugins/npm/tests/__init__.py
 delete mode 100644 archivebox/plugins/papersdl/binaries.jsonl
 delete mode 100755 archivebox/plugins/papersdl/on_Crawl__14_papersdl_install.py
 create mode 100755 archivebox/plugins/papersdl/on_Crawl__30_papersdl_install.py
 delete mode 100644 archivebox/plugins/parse_dom_outlinks/tests/__init__.py
 delete mode 100644 archivebox/plugins/pip/tests/__init__.py
 create mode 100644 archivebox/plugins/puppeteer/__init__.py
 create mode 100644 archivebox/plugins/puppeteer/on_Binary__12_puppeteer_install.py
 create mode 100644 archivebox/plugins/puppeteer/on_Crawl__60_puppeteer_install.py
 create mode 100644 archivebox/plugins/puppeteer/tests/test_puppeteer.py
 delete mode 100644 archivebox/plugins/readability/binaries.jsonl
 delete mode 100755 archivebox/plugins/readability/on_Crawl__11_readability_install.py
 create mode 100755 archivebox/plugins/readability/on_Crawl__35_readability_install.py
 rename archivebox/plugins/redirects/{on_Snapshot__31_redirects.bg.js => on_Snapshot__25_redirects.bg.js} (93%)
 delete mode 100644 archivebox/plugins/redirects/tests/__init__.py
 delete mode 100644 archivebox/plugins/responses/tests/__init__.py
 delete mode 100644 archivebox/plugins/search_backend_ripgrep/binaries.jsonl
 delete mode 100755 archivebox/plugins/search_backend_ripgrep/on_Crawl__00_ripgrep_install.py
 create mode 100755 archivebox/plugins/search_backend_ripgrep/on_Crawl__50_ripgrep_install.py
 delete mode 100644 archivebox/plugins/search_backend_ripgrep/tests/__init__.py
 delete mode 100644 archivebox/plugins/search_backend_sqlite/tests/__init__.py
 delete mode 100644 archivebox/plugins/seo/tests/__init__.py
 delete mode 100644 archivebox/plugins/singlefile/binaries.jsonl
 delete mode 100755 archivebox/plugins/singlefile/on_Crawl__08_singlefile_install.py
 create mode 100755 archivebox/plugins/singlefile/on_Crawl__45_singlefile_install.py
 rename archivebox/plugins/singlefile/{on_Crawl__04_singlefile_install.js => on_Crawl__82_singlefile_install.js} (99%)
 delete mode 100644 archivebox/plugins/ssl/tests/__init__.py
 rename archivebox/plugins/staticfile/{on_Snapshot__32_staticfile.bg.js => on_Snapshot__26_staticfile.bg.js} (95%)
 delete mode 100644 archivebox/plugins/staticfile/tests/__init__.py
 rename archivebox/plugins/twocaptcha/{on_Crawl__05_twocaptcha_install.js => on_Crawl__83_twocaptcha_install.js} (93%)
 rename archivebox/plugins/twocaptcha/{on_Crawl__25_twocaptcha_config.js => on_Crawl__95_twocaptcha_config.js} (99%)
 rename archivebox/plugins/ublock/{on_Crawl__03_ublock_install.js => on_Crawl__80_install_ublock_extension.js} (95%)
 delete mode 100644 archivebox/plugins/wget/binaries.jsonl
 delete mode 100755 archivebox/plugins/wget/on_Crawl__06_wget_install.py
 create mode 100755 archivebox/plugins/wget/on_Crawl__10_wget_install.py
 rename archivebox/plugins/wget/{on_Snapshot__61_wget.py => on_Snapshot__06_wget.bg.py} (92%)
 delete mode 100644 archivebox/plugins/ytdlp/binaries.jsonl
 delete mode 100755 archivebox/plugins/ytdlp/on_Crawl__07_ytdlp_install.py
 create mode 100755 archivebox/plugins/ytdlp/on_Crawl__15_ytdlp_install.py
 rename archivebox/plugins/ytdlp/{on_Snapshot__63_ytdlp.bg.py => on_Snapshot__02_ytdlp.bg.py} (81%)
 create mode 100644 archivebox/tests/test_cli_add_interrupt.py
 create mode 100644 archivebox/tests/test_real_world_add.py
 create mode 100644 archivebox/tests/test_settings_signal_webhooks.py

diff --git a/archivebox/cli/archivebox_add.py b/archivebox/cli/archivebox_add.py
index 65a34c02..d21c11c6 100644
--- a/archivebox/cli/archivebox_add.py
+++ b/archivebox/cli/archivebox_add.py
@@ -4,6 +4,7 @@ __package__ = 'archivebox.cli'
 __command__ = 'archivebox add'
 
 import sys
+from pathlib import Path
 
 from typing import TYPE_CHECKING
 
@@ -14,7 +15,7 @@ from django.db.models import QuerySet
 
 from archivebox.misc.util import enforce_types, docstring
 from archivebox import CONSTANTS
-from archivebox.config.common import ARCHIVING_CONFIG
+from archivebox.config.common import ARCHIVING_CONFIG, SERVER_CONFIG
 from archivebox.config.permissions import USER, HOSTNAME
 
 
@@ -57,8 +58,11 @@ def add(urls: str | list[str],
     from archivebox.crawls.models import Crawl
     from archivebox.base_models.models import get_or_create_system_user_pk
     from archivebox.workers.orchestrator import Orchestrator
+    from archivebox.misc.logging_util import printable_filesize
+    from archivebox.misc.system import get_dir_size
 
     created_by_id = created_by_id or get_or_create_system_user_pk()
+    started_at = timezone.now()
 
     # 1. Save the provided URLs to sources/2024-11-05__23-59-59__cli_add.txt
     sources_file = CONSTANTS.SOURCES_DIR / f'{timezone.now().strftime("%Y-%m-%d__%H-%M-%S")}__cli_add.txt'
@@ -127,11 +131,56 @@ def add(urls: str | list[str],
         # Background mode: just queue work and return (orchestrator via server will pick it up)
         print('[yellow]\\[*] URLs queued. Orchestrator will process them (run `archivebox server` if not already running).[/yellow]')
     else:
-        # Foreground mode: run CrawlWorker inline until all work is done
-        print(f'[green]\\[*] Starting worker to process crawl...[/green]')
-        from archivebox.workers.worker import CrawlWorker
-        worker = CrawlWorker(crawl_id=str(crawl.id), worker_id=0)
-        worker.runloop()  # Block until complete
+        # Foreground mode: run full orchestrator until all work is done
+        print(f'[green]\\[*] Starting orchestrator to process crawl...[/green]')
+        from archivebox.workers.orchestrator import Orchestrator
+        orchestrator = Orchestrator(exit_on_idle=True, crawl_id=str(crawl.id))
+        orchestrator.runloop()  # Block until complete
+
+        # Print summary for foreground runs
+        try:
+            crawl.refresh_from_db()
+            snapshots_count = crawl.snapshot_set.count()
+            try:
+                total_bytes = sum(s.archive_size for s in crawl.snapshot_set.all())
+            except Exception:
+                total_bytes, _, _ = get_dir_size(crawl.output_dir)
+            total_size = printable_filesize(total_bytes)
+            total_time = timezone.now() - started_at
+            total_seconds = int(total_time.total_seconds())
+            mins, secs = divmod(total_seconds, 60)
+            hours, mins = divmod(mins, 60)
+            if hours:
+                duration_str = f"{hours}h {mins}m {secs}s"
+            elif mins:
+                duration_str = f"{mins}m {secs}s"
+            else:
+                duration_str = f"{secs}s"
+
+            # Output dir relative to DATA_DIR
+            try:
+                rel_output = Path(crawl.output_dir).relative_to(CONSTANTS.DATA_DIR)
+                rel_output_str = f'./{rel_output}'
+            except Exception:
+                rel_output_str = str(crawl.output_dir)
+
+            # Build admin URL from SERVER_CONFIG
+            bind_addr = SERVER_CONFIG.BIND_ADDR
+            if bind_addr.startswith('http://') or bind_addr.startswith('https://'):
+                base_url = bind_addr
+            else:
+                base_url = f'http://{bind_addr}'
+            admin_url = f'{base_url}/admin/crawls/crawl/{crawl.id}/change/'
+
+            print('\n[bold]crawl output saved to:[/bold]')
+            print(f'  {rel_output_str}')
+            print(f'  {admin_url}')
+            print(f'\n[bold]total urls snapshotted:[/bold] {snapshots_count}')
+            print(f'[bold]total size:[/bold] {total_size}')
+            print(f'[bold]total time:[/bold] {duration_str}')
+        except Exception:
+            # Summary is best-effort; avoid failing the command if something goes wrong
+            pass
 
     # 6. Return the list of Snapshots in this crawl
     return crawl.snapshot_set.all()
diff --git a/archivebox/cli/archivebox_pluginmap.py b/archivebox/cli/archivebox_pluginmap.py
index 04a8cba6..fe280faa 100644
--- a/archivebox/cli/archivebox_pluginmap.py
+++ b/archivebox/cli/archivebox_pluginmap.py
@@ -205,7 +205,6 @@ def pluginmap(
 
     from archivebox.hooks import (
         discover_hooks,
-        extract_step,
         is_background_hook,
         BUILTIN_PLUGINS_DIR,
         USER_PLUGINS_DIR,
@@ -277,16 +276,14 @@ def pluginmap(
         # Build hook info list
         hook_infos = []
         for hook_path in hooks:
-            # Get plugin name from parent directory (e.g., 'wget' from 'plugins/wget/on_Snapshot__61_wget.py')
+            # Get plugin name from parent directory (e.g., 'wget' from 'plugins/wget/on_Snapshot__06_wget.bg.py')
             plugin_name = hook_path.parent.name
-            step = extract_step(hook_path.name)
             is_bg = is_background_hook(hook_path.name)
 
             hook_infos.append({
                 'path': str(hook_path),
                 'name': hook_path.name,
                 'plugin': plugin_name,
-                'step': step,
                 'is_background': is_bg,
                 'extension': hook_path.suffix,
             })
@@ -316,20 +313,18 @@ def pluginmap(
                 show_header=True,
                 header_style='bold magenta',
             )
-            table.add_column('Step', justify='center', width=6)
             table.add_column('Plugin', style='cyan', width=20)
             table.add_column('Hook Name', style='green')
             table.add_column('BG', justify='center', width=4)
             table.add_column('Type', justify='center', width=5)
 
-            # Sort by step then by name
-            sorted_hooks = sorted(hook_infos, key=lambda h: (h['step'], h['name']))
+            # Sort lexicographically by hook name
+            sorted_hooks = sorted(hook_infos, key=lambda h: h['name'])
 
             for hook in sorted_hooks:
                 bg_marker = '[yellow]bg[/yellow]' if hook['is_background'] else ''
                 ext = hook['extension'].lstrip('.')
                 table.add_row(
-                    str(hook['step']),
                     hook['plugin'],
                     hook['name'],
                     bg_marker,
@@ -347,7 +342,7 @@ def pluginmap(
         prnt(f'[bold]Total hooks discovered: {total_hooks}[/bold]')
         prnt()
         prnt('[dim]Hook naming convention: on_{Model}__{XX}_{description}[.bg].{ext}[/dim]')
-        prnt('[dim]  - XX: Two-digit order (first digit = step 0-9)[/dim]')
+        prnt('[dim]  - XX: Two-digit lexicographic order (00-99)[/dim]')
         prnt('[dim]  - .bg: Background hook (non-blocking)[/dim]')
         prnt('[dim]  - ext: py, sh, or js[/dim]')
         prnt()
diff --git a/archivebox/config/configset.py b/archivebox/config/configset.py
index d4a02141..19e2e2d2 100644
--- a/archivebox/config/configset.py
+++ b/archivebox/config/configset.py
@@ -258,11 +258,18 @@ def get_config(
     # Add CRAWL_OUTPUT_DIR for snapshot hooks to find shared Chrome session
     if crawl and hasattr(crawl, "output_dir"):
         config['CRAWL_OUTPUT_DIR'] = str(crawl.output_dir)
+        config['CRAWL_ID'] = str(getattr(crawl, "id", "")) if getattr(crawl, "id", None) else config.get('CRAWL_ID')
 
     # Apply snapshot config overrides (highest priority)
     if snapshot and hasattr(snapshot, "config") and snapshot.config:
         config.update(snapshot.config)
 
+    if snapshot:
+        config['SNAPSHOT_ID'] = str(getattr(snapshot, "id", "")) if getattr(snapshot, "id", None) else config.get('SNAPSHOT_ID')
+        config['SNAPSHOT_DEPTH'] = int(getattr(snapshot, "depth", 0) or 0)
+        if getattr(snapshot, "crawl_id", None):
+            config['CRAWL_ID'] = str(snapshot.crawl_id)
+
     # Normalize all aliases to canonical names (after all sources merged)
     # This handles aliases that came from user/crawl/snapshot configs, not just env
     try:
diff --git a/archivebox/core/models.py b/archivebox/core/models.py
index f86ef048..bd943a29 100755
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -344,6 +344,8 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
     @property
     def process_set(self):
         """Get all Process objects related to this snapshot's ArchiveResults."""
+        import json
+        import json
         from archivebox.machine.models import Process
         return Process.objects.filter(archiveresult__snapshot_id=self.id)
 
@@ -613,7 +615,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
 
         ONLY used by: archivebox update (for orphan detection)
         """
-        import json
+        from archivebox.machine.models import Process
 
         # Try index.jsonl first (new format), then index.json (legacy)
         jsonl_path = snapshot_dir / CONSTANTS.JSONL_INDEX_FILENAME
@@ -622,15 +624,12 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
         data = None
         if jsonl_path.exists():
             try:
-                with open(jsonl_path) as f:
-                    for line in f:
-                        line = line.strip()
-                        if line.startswith('{'):
-                            record = json.loads(line)
-                            if record.get('type') == 'Snapshot':
-                                data = record
-                                break
-            except (json.JSONDecodeError, OSError):
+                records = Process.parse_records_from_text(jsonl_path.read_text())
+                for record in records:
+                    if record.get('type') == 'Snapshot':
+                        data = record
+                        break
+            except OSError:
                 pass
         elif json_path.exists():
             try:
@@ -689,7 +688,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
 
         ONLY used by: archivebox update (for orphan import)
         """
-        import json
+        from archivebox.machine.models import Process
 
         # Try index.jsonl first (new format), then index.json (legacy)
         jsonl_path = snapshot_dir / CONSTANTS.JSONL_INDEX_FILENAME
@@ -698,15 +697,12 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
         data = None
         if jsonl_path.exists():
             try:
-                with open(jsonl_path) as f:
-                    for line in f:
-                        line = line.strip()
-                        if line.startswith('{'):
-                            record = json.loads(line)
-                            if record.get('type') == 'Snapshot':
-                                data = record
-                                break
-            except (json.JSONDecodeError, OSError):
+                records = Process.parse_records_from_text(jsonl_path.read_text())
+                for record in records:
+                    if record.get('type') == 'Snapshot':
+                        data = record
+                        break
+            except OSError:
                 pass
         elif json_path.exists():
             try:
@@ -1040,7 +1036,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
 
         Returns dict with keys: 'snapshot', 'archive_results', 'binaries', 'processes'
         """
-        import json
+        from archivebox.machine.models import Process
         from archivebox.misc.jsonl import (
             TYPE_SNAPSHOT, TYPE_ARCHIVERESULT, TYPE_BINARY, TYPE_PROCESS,
         )
@@ -1056,24 +1052,17 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
         if not index_path.exists():
             return result
 
-        with open(index_path, 'r') as f:
-            for line in f:
-                line = line.strip()
-                if not line or not line.startswith('{'):
-                    continue
-                try:
-                    record = json.loads(line)
-                    record_type = record.get('type')
-                    if record_type == TYPE_SNAPSHOT:
-                        result['snapshot'] = record
-                    elif record_type == TYPE_ARCHIVERESULT:
-                        result['archive_results'].append(record)
-                    elif record_type == TYPE_BINARY:
-                        result['binaries'].append(record)
-                    elif record_type == TYPE_PROCESS:
-                        result['processes'].append(record)
-                except json.JSONDecodeError:
-                    continue
+        records = Process.parse_records_from_text(index_path.read_text())
+        for record in records:
+            record_type = record.get('type')
+            if record_type == TYPE_SNAPSHOT:
+                result['snapshot'] = record
+            elif record_type == TYPE_ARCHIVERESULT:
+                result['archive_results'].append(record)
+            elif record_type == TYPE_BINARY:
+                result['binaries'].append(record)
+            elif record_type == TYPE_PROCESS:
+                result['processes'].append(record)
 
         return result
 
@@ -1317,7 +1306,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
             for plugin in all_plugins:
                 result = archive_results.get(plugin)
                 existing = result and result.status == 'succeeded' and (result.output_files or result.output_str)
-                icon = get_plugin_icon(plugin)
+                icon = mark_safe(get_plugin_icon(plugin))
 
                 # Skip plugins with empty icons that have no output
                 # (e.g., staticfile only shows when there's actual output)
@@ -1373,6 +1362,45 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
 
         return str(current_path)
 
+    def ensure_crawl_symlink(self) -> None:
+        """Ensure snapshot is symlinked under its crawl output directory."""
+        import os
+        from pathlib import Path
+        from django.utils import timezone
+        from archivebox import DATA_DIR
+        from archivebox.crawls.models import Crawl
+
+        if not self.crawl_id:
+            return
+        crawl = Crawl.objects.filter(id=self.crawl_id).select_related('created_by').first()
+        if not crawl:
+            return
+
+        date_base = crawl.created_at or self.created_at or timezone.now()
+        date_str = date_base.strftime('%Y%m%d')
+        domain = self.extract_domain_from_url(self.url)
+        username = crawl.created_by.username if crawl.created_by_id else 'system'
+
+        crawl_dir = DATA_DIR / 'users' / username / 'crawls' / date_str / domain / str(crawl.id)
+        link_path = crawl_dir / 'snapshots' / domain / str(self.id)
+        link_parent = link_path.parent
+        link_parent.mkdir(parents=True, exist_ok=True)
+
+        target = Path(self.output_dir)
+        if link_path.exists() or link_path.is_symlink():
+            if link_path.is_symlink():
+                if link_path.resolve() == target.resolve():
+                    return
+                link_path.unlink(missing_ok=True)
+            else:
+                return
+
+        rel_target = os.path.relpath(target, link_parent)
+        try:
+            link_path.symlink_to(rel_target, target_is_directory=True)
+        except OSError:
+            return
+
     @cached_property
     def archive_path(self):
         return f'{CONSTANTS.ARCHIVE_DIR_NAME}/{self.timestamp}'
@@ -1636,6 +1664,8 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
         if update_fields:
             snapshot.save(update_fields=update_fields + ['modified_at'])
 
+        snapshot.ensure_crawl_symlink()
+
         return snapshot
 
     def create_pending_archiveresults(self) -> list['ArchiveResult']:
@@ -1689,7 +1719,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
         """
         # Check if any ARs are still pending/started
         pending = self.archiveresult_set.exclude(
-            status__in=ArchiveResult.FINAL_OR_ACTIVE_STATES
+            status__in=ArchiveResult.FINAL_STATES
         ).exists()
 
         return not pending
@@ -1754,7 +1784,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
         - Plugins run in order (numeric prefix)
         - Each plugin checks its dependencies at runtime
 
-        Dependency handling (e.g., chrome_session → screenshot):
+        Dependency handling (e.g., chrome → screenshot):
         - Plugins check if required outputs exist before running
         - If dependency output missing → plugin returns 'skipped'
         - On retry, if dependency now succeeds → dependent can run
@@ -2117,6 +2147,18 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
         TITLE_LOADING_MSG = 'Not yet archived...'
 
         canonical = self.canonical_outputs()
+        preview_priority = [
+            'singlefile_path',
+            'screenshot_path',
+            'wget_path',
+            'dom_path',
+            'pdf_path',
+            'readability_path',
+        ]
+        best_preview_path = next(
+            (canonical.get(key) for key in preview_priority if canonical.get(key)),
+            canonical.get('index_path', 'index.html'),
+        )
         context = {
             **self.to_dict(extended=True),
             **{f'{k}_path': v for k, v in canonical.items()},
@@ -2132,6 +2174,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
             'oldest_archive_date': ts_to_date_str(self.oldest_archive_date),
             'SAVE_ARCHIVE_DOT_ORG': SAVE_ARCHIVE_DOT_ORG,
             'PREVIEW_ORIGINALS': SERVER_CONFIG.PREVIEW_ORIGINALS,
+            'best_preview_path': best_preview_path,
         }
         rendered_html = render_to_string('snapshot.html', context)
         atomic_write(str(Path(out_dir) / CONSTANTS.HTML_INDEX_FILENAME), rendered_html)
@@ -2669,12 +2712,12 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
         - end_ts, retry_at, cmd, cmd_version, binary FK
         - Processes side-effect records (Snapshot, Tag, etc.) via process_hook_records()
         """
-        import json
         import mimetypes
         from collections import defaultdict
         from pathlib import Path
         from django.utils import timezone
-        from archivebox.hooks import process_hook_records
+        from archivebox.hooks import process_hook_records, extract_records_from_process
+        from archivebox.machine.models import Process
 
         plugin_dir = Path(self.pwd) if self.pwd else None
         if not plugin_dir or not plugin_dir.exists():
@@ -2687,15 +2730,13 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
 
         # Read and parse JSONL output from stdout.log
         stdout_file = plugin_dir / 'stdout.log'
-        stdout = stdout_file.read_text() if stdout_file.exists() else ''
-
         records = []
-        for line in stdout.splitlines():
-            if line.strip() and line.strip().startswith('{'):
-                try:
-                    records.append(json.loads(line))
-                except json.JSONDecodeError:
-                    continue
+        if self.process_id and self.process:
+            records = extract_records_from_process(self.process)
+
+        if not records:
+            stdout = stdout_file.read_text() if stdout_file.exists() else ''
+            records = Process.parse_records_from_text(stdout)
 
         # Find ArchiveResult record and update status/output from it
         ar_records = [r for r in records if r.get('type') == 'ArchiveResult']
@@ -2722,9 +2763,20 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
                 self._set_binary_from_cmd(hook_data['cmd'])
             # Note: cmd_version is derived from binary.version, not stored on Process
         else:
-            # No ArchiveResult record = failed
-            self.status = self.StatusChoices.FAILED
-            self.output_str = 'Hook did not output ArchiveResult record'
+            # No ArchiveResult record: treat background hooks or clean exits as skipped
+            is_background = False
+            try:
+                from archivebox.hooks import is_background_hook
+                is_background = bool(self.hook_name and is_background_hook(self.hook_name))
+            except Exception:
+                pass
+
+            if is_background or (self.process_id and self.process and self.process.exit_code == 0):
+                self.status = self.StatusChoices.SKIPPED
+                self.output_str = 'Hook did not output ArchiveResult record'
+            else:
+                self.status = self.StatusChoices.FAILED
+                self.output_str = 'Hook did not output ArchiveResult record'
 
         # Walk filesystem and populate output_files, output_size, output_mimetypes
         exclude_names = {'stdout.log', 'stderr.log', 'hook.pid', 'listener.pid'}
@@ -2793,14 +2845,9 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
         }
         process_hook_records(filtered_records, overrides=overrides)
 
-        # Cleanup PID files and empty logs
+        # Cleanup PID files (keep logs even if empty so they can be tailed)
         pid_file = plugin_dir / 'hook.pid'
         pid_file.unlink(missing_ok=True)
-        stderr_file = plugin_dir / 'stderr.log'
-        if stdout_file.exists() and stdout_file.stat().st_size == 0:
-            stdout_file.unlink()
-        if stderr_file.exists() and stderr_file.stat().st_size == 0:
-            stderr_file.unlink()
 
     def _set_binary_from_cmd(self, cmd: list) -> None:
         """
@@ -3186,4 +3233,4 @@ class ArchiveResultMachine(BaseStateMachine, strict_states=True):
 # Manually register state machines with python-statemachine registry
 # (normally auto-discovered from statemachines.py, but we define them here for clarity)
 registry.register(SnapshotMachine)
-registry.register(ArchiveResultMachine)
\ No newline at end of file
+registry.register(ArchiveResultMachine)
diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py
index aee8d19d..16b6df0c 100644
--- a/archivebox/core/settings.py
+++ b/archivebox/core/settings.py
@@ -436,6 +436,10 @@ SIGNAL_WEBHOOKS = {
     },
 }
 
+# Avoid background threads touching sqlite connections (especially during tests/migrations).
+if DATABASES["default"]["ENGINE"].endswith("sqlite3"):
+    SIGNAL_WEBHOOKS["TASK_HANDLER"] = "signal_webhooks.handlers.sync_task_handler"
+
 ################################################################################
 ### Admin Data View Settings
 ################################################################################
diff --git a/archivebox/core/views.py b/archivebox/core/views.py
index f0410846..eec08661 100644
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -120,7 +120,15 @@ class SnapshotView(View):
         # Get available extractor plugins from hooks (sorted by numeric prefix for ordering)
         # Convert to base names for display ordering
         all_plugins = [get_plugin_name(e) for e in get_enabled_plugins()]
-        preferred_types = tuple(all_plugins)
+        preview_priority = [
+            'singlefile',
+            'screenshot',
+            'wget',
+            'dom',
+            'pdf',
+            'readability',
+        ]
+        preferred_types = tuple(preview_priority + [p for p in all_plugins if p not in preview_priority])
         all_types = preferred_types + tuple(result_type for result_type in archiveresults.keys() if result_type not in preferred_types)
 
         best_result = {'path': 'None', 'result': None}
diff --git a/archivebox/crawls/models.py b/archivebox/crawls/models.py
index 9083d9f5..969287cc 100755
--- a/archivebox/crawls/models.py
+++ b/archivebox/crawls/models.py
@@ -313,6 +313,12 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
                 if tags:
                     snapshot.save_tags(tags.split(','))
 
+            # Ensure crawl -> snapshot symlink exists for both new and existing snapshots
+            try:
+                snapshot.ensure_crawl_symlink()
+            except Exception:
+                pass
+
         return created_snapshots
 
     def run(self) -> 'Snapshot | None':
@@ -325,7 +331,6 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
             The root Snapshot for this crawl, or None for system crawls that don't create snapshots
         """
         import time
-        import json
         from pathlib import Path
         from archivebox.hooks import run_hook, discover_hooks, process_hook_records
         from archivebox.config.configset import get_config
@@ -339,35 +344,6 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
         # Get merged config with crawl context
         config = get_config(crawl=self)
 
-        # Load all binaries.jsonl files from plugins
-        # This replaces individual on_Crawl install hooks with declarative configuration
-        from archivebox.hooks import BUILTIN_PLUGINS_DIR
-        from archivebox.machine.models import Machine
-
-        machine_id = str(Machine.current().id)
-        binaries_records = []
-
-        for binaries_file in BUILTIN_PLUGINS_DIR.glob('*/binaries.jsonl'):
-            try:
-                with open(binaries_file, 'r') as f:
-                    for line in f:
-                        line = line.strip()
-                        if line and not line.startswith('#'):
-                            try:
-                                record = json.loads(line)
-                                if record.get('type') == 'Binary':
-                                    record['machine_id'] = machine_id
-                                    binaries_records.append(record)
-                            except json.JSONDecodeError:
-                                pass
-            except Exception:
-                pass
-
-        # Process binary declarations before running hooks
-        if binaries_records:
-            overrides = {'crawl': self}
-            process_hook_records(binaries_records, overrides=overrides)
-
         # Discover and run on_Crawl hooks
         with open(debug_log, 'a') as f:
             f.write(f'Discovering Crawl hooks...\n')
@@ -418,6 +394,34 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
             if stats:
                 print(f'[green]✓ Created: {stats}[/green]')
 
+        # Ensure any newly declared binaries are installed before creating snapshots
+        from archivebox.machine.models import Binary, Machine
+        from django.utils import timezone
+
+        machine = Machine.current()
+        while True:
+            pending_binaries = Binary.objects.filter(
+                machine=machine,
+                status=Binary.StatusChoices.QUEUED,
+                retry_at__lte=timezone.now(),
+            ).order_by('retry_at')
+            if not pending_binaries.exists():
+                break
+
+            for binary in pending_binaries:
+                try:
+                    binary.sm.tick()
+                except Exception:
+                    continue
+
+            # Exit if nothing else is immediately retryable
+            if not Binary.objects.filter(
+                machine=machine,
+                status=Binary.StatusChoices.QUEUED,
+                retry_at__lte=timezone.now(),
+            ).exists():
+                break
+
         # Create snapshots from all URLs in self.urls
         with open(debug_log, 'a') as f:
             f.write(f'Creating snapshots from URLs...\n')
diff --git a/archivebox/hooks.py b/archivebox/hooks.py
index 04bfa0ef..e5483e59 100644
--- a/archivebox/hooks.py
+++ b/archivebox/hooks.py
@@ -15,29 +15,29 @@ Hook contract:
     Exit:   0 = success, non-zero = failure
 
 Execution order:
-    - Hooks are numbered 00-99 with first digit determining step (0-9)
-    - All hooks in a step can run in parallel
-    - Steps execute sequentially (step 0 → step 1 → ... → step 9)
-    - Background hooks (.bg suffix) don't block step advancement
+    - Hooks are named with two-digit prefixes (00-99) and sorted lexicographically by filename
+    - Foreground hooks run sequentially in that order
+    - Background hooks (.bg suffix) run concurrently and do not block foreground progress
+    - After all foreground hooks complete, background hooks receive SIGTERM and must finalize
     - Failed extractors don't block subsequent extractors
 
 Hook Naming Convention:
     on_{ModelName}__{run_order}_{description}[.bg].{ext}
 
     Examples:
-        on_Snapshot__00_setup.py         # Step 0, runs first
-        on_Snapshot__20_chrome_tab.bg.js # Step 2, background (doesn't block)
-        on_Snapshot__50_screenshot.js    # Step 5, foreground (blocks step)
-        on_Snapshot__63_media.bg.py      # Step 6, background (long-running)
+        on_Snapshot__00_setup.py         # runs first
+        on_Snapshot__10_chrome_tab.bg.js # background (doesn't block)
+        on_Snapshot__50_screenshot.js    # foreground (blocks)
+        on_Snapshot__63_media.bg.py      # background (long-running)
 
 Dependency handling:
     Extractor plugins that depend on other plugins' output should check at runtime:
 
     ```python
     # Example: screenshot plugin depends on chrome plugin
-    chrome_session_dir = Path(os.environ.get('SNAPSHOT_DIR', '.')) / 'chrome_session'
-    if not (chrome_session_dir / 'session.json').exists():
-        print('{"status": "skipped", "output": "chrome_session not available"}')
+    chrome_dir = Path(os.environ.get('SNAPSHOT_DIR', '.')) / 'chrome'
+    if not (chrome_dir / 'cdp_url.txt').exists():
+        print('{"status": "skipped", "output": "chrome session not available"}')
         sys.exit(1)  # Exit non-zero so it gets retried later
     ```
 
@@ -50,7 +50,7 @@ API (all hook logic lives here):
     discover_hooks(event)     -> List[Path]     Find hook scripts
     run_hook(script, ...)     -> HookResult     Execute a hook script
     run_hooks(event, ...)     -> List[HookResult]  Run all hooks for an event
-    extract_step(hook_name)   -> int            Get step number (0-9) from hook name
+    extract_step(hook_name)   -> int            Deprecated: get two-digit order prefix if present
     is_background_hook(name)  -> bool           Check if hook is background (.bg suffix)
 """
 
@@ -67,6 +67,7 @@ from typing import List, Dict, Any, Optional, TypedDict
 
 from django.conf import settings
 from django.utils import timezone
+from django.utils.safestring import mark_safe
 
 
 # Plugin directories
@@ -80,51 +81,33 @@ USER_PLUGINS_DIR = Path(getattr(settings, 'DATA_DIR', Path.cwd())) / 'plugins'
 
 def extract_step(hook_name: str) -> int:
     """
-    Extract step number (0-9) from hook name.
+    Deprecated: return the two-digit order prefix as an integer (00-99) if present.
 
-    Hooks are numbered 00-99 with the first digit determining the step.
-    Pattern: on_{Model}__{XX}_{description}[.bg].{ext}
-
-    Args:
-        hook_name: Hook filename (e.g., 'on_Snapshot__50_wget.py')
-
-    Returns:
-        Step number 0-9, or 9 (default) for unnumbered hooks.
-
-    Examples:
-        extract_step('on_Snapshot__05_chrome.py') -> 0
-        extract_step('on_Snapshot__50_wget.py') -> 5
-        extract_step('on_Snapshot__63_media.bg.py') -> 6
-        extract_step('on_Snapshot__99_cleanup.sh') -> 9
-        extract_step('on_Snapshot__unnumbered.py') -> 9 (default)
+    Hook execution is based on lexicographic ordering of filenames; callers should
+    not rely on parsed numeric steps for ordering decisions.
     """
-    # Pattern matches __XX_ where XX is two digits
     match = re.search(r'__(\d{2})_', hook_name)
     if match:
-        two_digit = int(match.group(1))
-        step = two_digit // 10  # First digit is the step (0-9)
-        return step
-
-    # Log warning for unnumbered hooks and default to step 9
+        return int(match.group(1))
     import sys
-    print(f"Warning: Hook '{hook_name}' has no step number (expected __XX_), defaulting to step 9", file=sys.stderr)
-    return 9
+    print(f"Warning: Hook '{hook_name}' has no order prefix (expected __XX_), defaulting to 99", file=sys.stderr)
+    return 99
 
 
 def is_background_hook(hook_name: str) -> bool:
     """
-    Check if a hook is a background hook (doesn't block step advancement).
+    Check if a hook is a background hook (doesn't block foreground progression).
 
     Background hooks have '.bg.' in their filename before the extension.
 
     Args:
-        hook_name: Hook filename (e.g., 'on_Snapshot__20_chrome_tab.bg.js')
+        hook_name: Hook filename (e.g., 'on_Snapshot__10_chrome_tab.bg.js')
 
     Returns:
         True if background hook, False if foreground.
 
     Examples:
-        is_background_hook('on_Snapshot__20_chrome_tab.bg.js') -> True
+        is_background_hook('on_Snapshot__10_chrome_tab.bg.js') -> True
         is_background_hook('on_Snapshot__50_wget.py') -> False
         is_background_hook('on_Snapshot__63_media.bg.py') -> True
     """
@@ -273,6 +256,7 @@ def run_hook(
     """
     from archivebox.machine.models import Process, Machine
     import time
+    import sys
     start_time = time.time()
 
     # Auto-detect timeout from plugin config if not explicitly provided
@@ -313,7 +297,7 @@ def run_hook(
     if ext == '.sh':
         cmd = ['bash', str(script)]
     elif ext == '.py':
-        cmd = ['python3', str(script)]
+        cmd = [sys.executable, str(script)]
     elif ext == '.js':
         cmd = ['node', str(script)]
     else:
@@ -393,10 +377,10 @@ def run_hook(
     # Priority: config dict > Machine.config > derive from LIB_DIR
     node_path = config.get('NODE_PATH')
     if not node_path and lib_dir:
-        # Derive from LIB_DIR/npm/node_modules
+        # Derive from LIB_DIR/npm/node_modules (create if needed)
         node_modules_dir = Path(lib_dir) / 'npm' / 'node_modules'
-        if node_modules_dir.exists():
-            node_path = str(node_modules_dir)
+        node_modules_dir.mkdir(parents=True, exist_ok=True)
+        node_path = str(node_modules_dir)
     if not node_path:
         try:
             # Fallback to Machine.config
@@ -462,7 +446,7 @@ def run_hook(
             cmd=cmd,
             timeout=timeout,
             status=Process.StatusChoices.EXITED,
-            exit_code=-1,
+            exit_code=1,
             stderr=f'Failed to run hook: {type(e).__name__}: {e}',
         )
         return process
@@ -472,7 +456,6 @@ def extract_records_from_process(process: 'Process') -> List[Dict[str, Any]]:
     """
     Extract JSONL records from a Process's stdout.
 
-    Uses the same parse_line() logic from misc/jsonl.py.
     Adds plugin metadata to each record.
 
     Args:
@@ -481,32 +464,20 @@ def extract_records_from_process(process: 'Process') -> List[Dict[str, Any]]:
     Returns:
         List of parsed JSONL records with plugin metadata
     """
-    from archivebox.misc.jsonl import parse_line
-
-    records = []
-
-    # Read stdout from process
-    stdout = process.stdout
-    if not stdout and process.stdout_file and process.stdout_file.exists():
-        stdout = process.stdout_file.read_text()
-
-    if not stdout:
-        return records
+    records = process.get_records()
+    if not records:
+        return []
 
     # Extract plugin metadata from process.pwd and process.cmd
     plugin_name = Path(process.pwd).name if process.pwd else 'unknown'
     hook_name = Path(process.cmd[1]).name if len(process.cmd) > 1 else 'unknown'
     plugin_hook = process.cmd[1] if len(process.cmd) > 1 else ''
 
-    # Parse each line as JSONL
-    for line in stdout.splitlines():
-        record = parse_line(line)
-        if record and 'type' in record:
-            # Add plugin metadata to record
-            record.setdefault('plugin', plugin_name)
-            record.setdefault('hook_name', hook_name)
-            record.setdefault('plugin_hook', plugin_hook)
-            records.append(record)
+    for record in records:
+        # Add plugin metadata to record
+        record.setdefault('plugin', plugin_name)
+        record.setdefault('hook_name', hook_name)
+        record.setdefault('plugin_hook', plugin_hook)
 
     return records
 
@@ -538,18 +509,13 @@ def collect_urls_from_plugins(snapshot_dir: Path) -> List[Dict[str, Any]]:
             continue
 
         try:
-            with open(urls_file, 'r') as f:
-                for line in f:
-                    line = line.strip()
-                    if line:
-                        try:
-                            entry = json.loads(line)
-                            if entry.get('url'):
-                                # Track which parser plugin found this URL
-                                entry['plugin'] = subdir.name
-                                urls.append(entry)
-                        except json.JSONDecodeError:
-                            continue
+            from archivebox.machine.models import Process
+            text = urls_file.read_text()
+            for entry in Process.parse_records_from_text(text):
+                if entry.get('url'):
+                    # Track which parser plugin found this URL
+                    entry['plugin'] = subdir.name
+                    urls.append(entry)
         except Exception:
             pass
 
@@ -610,8 +576,8 @@ def get_plugins() -> List[str]:
     The plugin name is the plugin directory name, not the hook script name.
 
     Example:
-    archivebox/plugins/chrome_session/on_Snapshot__20_chrome_tab.bg.js
-    -> plugin = 'chrome_session'
+    archivebox/plugins/chrome/on_Snapshot__10_chrome_tab.bg.js
+    -> plugin = 'chrome'
 
     Sorted alphabetically (plugins control their hook order via numeric prefixes in hook names).
     """
@@ -817,7 +783,7 @@ def discover_plugin_configs() -> Dict[str, Dict[str, Any]]:
 
     Returns:
         Dict mapping plugin names to their parsed JSONSchema configs.
-        e.g., {'wget': {...schema...}, 'chrome_session': {...schema...}}
+        e.g., {'wget': {...schema...}, 'chrome': {...schema...}}
 
     Example config.json:
         {
@@ -928,14 +894,10 @@ def get_plugin_special_config(plugin_name: str, config: Dict[str, Any]) -> Dict[
     if plugins_whitelist:
         # PLUGINS whitelist is specified - only enable plugins in the list
         plugin_names = [p.strip().lower() for p in plugins_whitelist.split(',') if p.strip()]
-        import sys
-        print(f"DEBUG: PLUGINS whitelist='{plugins_whitelist}', checking plugin '{plugin_name}', plugin_names={plugin_names}", file=sys.stderr)
         if plugin_name.lower() not in plugin_names:
             # Plugin not in whitelist - explicitly disabled
-            print(f"DEBUG: Plugin '{plugin_name}' NOT in whitelist, disabling", file=sys.stderr)
             enabled = False
         else:
-            print(f"DEBUG: Plugin '{plugin_name}' IS in whitelist, enabling", file=sys.stderr)
             # Plugin is in whitelist - check if explicitly disabled by PLUGINNAME_ENABLED
             enabled_key = f'{plugin_upper}_ENABLED'
             enabled = config.get(enabled_key)
@@ -945,10 +907,8 @@ def get_plugin_special_config(plugin_name: str, config: Dict[str, Any]) -> Dict[
                 enabled = enabled.lower() not in ('false', '0', 'no', '')
     else:
         # No PLUGINS whitelist - use PLUGINNAME_ENABLED (default True)
-        import sys
         enabled_key = f'{plugin_upper}_ENABLED'
         enabled = config.get(enabled_key)
-        print(f"DEBUG: NO PLUGINS whitelist in config, checking {enabled_key}={enabled}", file=sys.stderr)
         if enabled is None:
             enabled = True
         elif isinstance(enabled, str):
@@ -1064,10 +1024,10 @@ def get_plugin_icon(plugin: str) -> str:
     # Try plugin-provided icon template
     icon_template = get_plugin_template(plugin, 'icon', fallback=False)
     if icon_template:
-        return icon_template.strip()
+        return mark_safe(icon_template.strip())
 
     # Fall back to generic folder icon
-    return '📁'
+    return mark_safe('📁')
 
 
 def get_all_plugin_icons() -> Dict[str, str]:
@@ -1204,18 +1164,14 @@ def create_model_record(record: Dict[str, Any]) -> Any:
         return obj
 
     elif record_type == 'Machine':
-        # Machine config update (special _method handling)
-        method = record.pop('_method', None)
-        if method == 'update':
-            key = record.get('key')
-            value = record.get('value')
-            if key and value:
-                machine = Machine.current()
-                if not machine.config:
-                    machine.config = {}
-                machine.config[key] = value
-                machine.save(update_fields=['config'])
-                return machine
+        config_patch = record.get('config')
+        if isinstance(config_patch, dict) and config_patch:
+            machine = Machine.current()
+            if not machine.config:
+                machine.config = {}
+            machine.config.update(config_patch)
+            machine.save(update_fields=['config'])
+            return machine
         return None
 
     # Add more types as needed (Dependency, Snapshot, etc.)
diff --git a/archivebox/machine/detect.py b/archivebox/machine/detect.py
index 84595d77..9d44df0d 100644
--- a/archivebox/machine/detect.py
+++ b/archivebox/machine/detect.py
@@ -227,33 +227,45 @@ def get_os_info() -> Dict[str, Any]:
     }
 
 def get_host_stats() -> Dict[str, Any]:
-    with tempfile.TemporaryDirectory() as tmp_dir:
-        tmp_usage = psutil.disk_usage(str(tmp_dir))
-    app_usage = psutil.disk_usage(str(PACKAGE_DIR))
-    data_usage = psutil.disk_usage(str(DATA_DIR))
-    mem_usage = psutil.virtual_memory()
-    swap_usage = psutil.swap_memory()
-    return {
-        "cpu_boot_time": datetime.fromtimestamp(psutil.boot_time()).isoformat(),
-        "cpu_count": psutil.cpu_count(logical=False),
-        "cpu_load": psutil.getloadavg(),
-        # "cpu_pct": psutil.cpu_percent(interval=1),
-        "mem_virt_used_pct": mem_usage.percent,
-        "mem_virt_used_gb": round(mem_usage.used / 1024 / 1024 / 1024, 3),
-        "mem_virt_free_gb": round(mem_usage.free / 1024 / 1024 / 1024, 3),
-        "mem_swap_used_pct": swap_usage.percent,
-        "mem_swap_used_gb": round(swap_usage.used / 1024 / 1024 / 1024, 3),
-        "mem_swap_free_gb": round(swap_usage.free / 1024 / 1024 / 1024, 3),
-        "disk_tmp_used_pct": tmp_usage.percent,
-        "disk_tmp_used_gb": round(tmp_usage.used / 1024 / 1024 / 1024, 3),
-        "disk_tmp_free_gb": round(tmp_usage.free / 1024 / 1024 / 1024, 3),  # in GB
-        "disk_app_used_pct": app_usage.percent,
-        "disk_app_used_gb": round(app_usage.used / 1024 / 1024 / 1024, 3),
-        "disk_app_free_gb": round(app_usage.free / 1024 / 1024 / 1024, 3),
-        "disk_data_used_pct": data_usage.percent,
-        "disk_data_used_gb": round(data_usage.used / 1024 / 1024 / 1024, 3),
-        "disk_data_free_gb": round(data_usage.free / 1024 / 1024 / 1024, 3),
-    }
+    try:
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            tmp_usage = psutil.disk_usage(str(tmp_dir))
+        app_usage = psutil.disk_usage(str(PACKAGE_DIR))
+        data_usage = psutil.disk_usage(str(DATA_DIR))
+        mem_usage = psutil.virtual_memory()
+        try:
+            swap_usage = psutil.swap_memory()
+            swap_used_pct = swap_usage.percent
+            swap_used_gb = round(swap_usage.used / 1024 / 1024 / 1024, 3)
+            swap_free_gb = round(swap_usage.free / 1024 / 1024 / 1024, 3)
+        except OSError:
+            # Some sandboxed environments deny access to swap stats
+            swap_used_pct = 0.0
+            swap_used_gb = 0.0
+            swap_free_gb = 0.0
+        return {
+            "cpu_boot_time": datetime.fromtimestamp(psutil.boot_time()).isoformat(),
+            "cpu_count": psutil.cpu_count(logical=False),
+            "cpu_load": psutil.getloadavg(),
+            # "cpu_pct": psutil.cpu_percent(interval=1),
+            "mem_virt_used_pct": mem_usage.percent,
+            "mem_virt_used_gb": round(mem_usage.used / 1024 / 1024 / 1024, 3),
+            "mem_virt_free_gb": round(mem_usage.free / 1024 / 1024 / 1024, 3),
+            "mem_swap_used_pct": swap_used_pct,
+            "mem_swap_used_gb": swap_used_gb,
+            "mem_swap_free_gb": swap_free_gb,
+            "disk_tmp_used_pct": tmp_usage.percent,
+            "disk_tmp_used_gb": round(tmp_usage.used / 1024 / 1024 / 1024, 3),
+            "disk_tmp_free_gb": round(tmp_usage.free / 1024 / 1024 / 1024, 3),  # in GB
+            "disk_app_used_pct": app_usage.percent,
+            "disk_app_used_gb": round(app_usage.used / 1024 / 1024 / 1024, 3),
+            "disk_app_free_gb": round(app_usage.free / 1024 / 1024 / 1024, 3),
+            "disk_data_used_pct": data_usage.percent,
+            "disk_data_used_gb": round(data_usage.used / 1024 / 1024 / 1024, 3),
+            "disk_data_free_gb": round(data_usage.free / 1024 / 1024 / 1024, 3),
+        }
+    except Exception:
+        return {}
 
 def get_host_immutable_info(host_info: Dict[str, Any]) -> Dict[str, Any]:
     return {
diff --git a/archivebox/machine/models.py b/archivebox/machine/models.py
index 07da29ec..210452f9 100755
--- a/archivebox/machine/models.py
+++ b/archivebox/machine/models.py
@@ -113,23 +113,20 @@ class Machine(ModelWithHealthStats):
         Update Machine config from JSON dict.
 
         Args:
-            record: JSON dict with '_method': 'update', 'key': '...', 'value': '...'
+            record: JSON dict with 'config': {key: value} patch
             overrides: Not used
 
         Returns:
             Machine instance or None
         """
-        method = record.get('_method')
-        if method == 'update':
-            key = record.get('key')
-            value = record.get('value')
-            if key and value:
-                machine = Machine.current()
-                if not machine.config:
-                    machine.config = {}
-                machine.config[key] = value
-                machine.save(update_fields=['config'])
-                return machine
+        config_patch = record.get('config')
+        if isinstance(config_patch, dict) and config_patch:
+            machine = Machine.current()
+            if not machine.config:
+                machine.config = {}
+            machine.config.update(config_patch)
+            machine.save(update_fields=['config'])
+            return machine
         return None
 
 
@@ -458,31 +455,31 @@ class Binary(ModelWithHealthStats, ModelWithStateMachine):
                 continue
 
             # Parse JSONL output to check for successful installation
-            stdout_file = plugin_output_dir / 'stdout.log'
-            if stdout_file.exists():
-                stdout = stdout_file.read_text()
-                for line in stdout.splitlines():
-                    if line.strip() and line.strip().startswith('{'):
-                        try:
-                            record = json.loads(line)
-                            if record.get('type') == 'Binary' and record.get('abspath'):
-                                # Update self from successful installation
-                                self.abspath = record['abspath']
-                                self.version = record.get('version', '')
-                                self.sha256 = record.get('sha256', '')
-                                self.binprovider = record.get('binprovider', 'env')
-                                self.status = self.StatusChoices.INSTALLED
-                                self.save()
+            from archivebox.hooks import extract_records_from_process, process_hook_records
+            records = extract_records_from_process(process)
+            if records:
+                process_hook_records(records, overrides={})
+            binary_records = [
+                record for record in records
+                if record.get('type') == 'Binary' and record.get('abspath')
+            ]
+            if binary_records:
+                record = binary_records[0]
+                # Update self from successful installation
+                self.abspath = record['abspath']
+                self.version = record.get('version', '')
+                self.sha256 = record.get('sha256', '')
+                self.binprovider = record.get('binprovider', 'env')
+                self.status = self.StatusChoices.INSTALLED
+                self.save()
 
-                                # Symlink binary into LIB_BIN_DIR if configured
-                                from django.conf import settings
-                                lib_bin_dir = getattr(settings, 'LIB_BIN_DIR', None)
-                                if lib_bin_dir:
-                                    self.symlink_to_lib_bin(lib_bin_dir)
+                # Symlink binary into LIB_BIN_DIR if configured
+                from django.conf import settings
+                lib_bin_dir = getattr(settings, 'LIB_BIN_DIR', None)
+                if lib_bin_dir:
+                    self.symlink_to_lib_bin(lib_bin_dir)
 
-                                return
-                        except json.JSONDecodeError:
-                            continue
+                return
 
         # No hook succeeded - leave status as QUEUED (will retry later)
         # Don't set to FAILED since we don't have that status anymore
@@ -861,6 +858,27 @@ class Process(models.Model):
             record['timeout'] = self.timeout
         return record
 
+    @classmethod
+    def parse_records_from_text(cls, text: str) -> list[dict]:
+        """Parse JSONL records from raw text using the shared JSONL parser."""
+        from archivebox.misc.jsonl import parse_line
+
+        records: list[dict] = []
+        if not text:
+            return records
+        for line in text.splitlines():
+            record = parse_line(line)
+            if record and record.get('type'):
+                records.append(record)
+        return records
+
+    def get_records(self) -> list[dict]:
+        """Parse JSONL records from this process's stdout."""
+        stdout = self.stdout
+        if not stdout and self.stdout_file and self.stdout_file.exists():
+            stdout = self.stdout_file.read_text()
+        return self.parse_records_from_text(stdout or '')
+
     @staticmethod
     def from_json(record: dict, overrides: dict = None):
         """
@@ -919,6 +937,7 @@ class Process(models.Model):
             if (_CURRENT_PROCESS.pid == current_pid and
                 _CURRENT_PROCESS.machine_id == machine.id and
                 timezone.now() < _CURRENT_PROCESS.modified_at + timedelta(seconds=PROCESS_RECHECK_INTERVAL)):
+                _CURRENT_PROCESS.ensure_log_files()
                 return _CURRENT_PROCESS
             _CURRENT_PROCESS = None
 
@@ -945,6 +964,7 @@ class Process(models.Model):
                 db_start_time = existing.started_at.timestamp()
                 if abs(db_start_time - os_start_time) < START_TIME_TOLERANCE:
                     _CURRENT_PROCESS = existing
+                    _CURRENT_PROCESS.ensure_log_files()
                     return existing
 
         # No valid existing record - create new one
@@ -977,6 +997,7 @@ class Process(models.Model):
             started_at=started_at,
             status=cls.StatusChoices.RUNNING,
         )
+        _CURRENT_PROCESS.ensure_log_files()
         return _CURRENT_PROCESS
 
     @classmethod
@@ -1089,7 +1110,7 @@ class Process(models.Model):
             if is_stale:
                 proc.status = cls.StatusChoices.EXITED
                 proc.ended_at = proc.ended_at or timezone.now()
-                proc.exit_code = proc.exit_code if proc.exit_code is not None else -1
+                proc.exit_code = proc.exit_code if proc.exit_code is not None else 0
                 proc.save(update_fields=['status', 'ended_at', 'exit_code'])
                 cleaned += 1
 
@@ -1209,7 +1230,15 @@ class Process(models.Model):
         the actual OS process exists and matches our record.
         """
         proc = self.proc
-        return proc is not None and proc.is_running()
+        if proc is None:
+            return False
+        try:
+            # Treat zombies as not running (they should be reaped)
+            if proc.status() == psutil.STATUS_ZOMBIE:
+                return False
+        except Exception:
+            pass
+        return proc.is_running()
 
     def is_alive(self) -> bool:
         """
@@ -1421,6 +1450,22 @@ class Process(models.Model):
             except OSError:
                 pass
 
+    def ensure_log_files(self) -> None:
+        """Ensure stdout/stderr log files exist for this process."""
+        if not self.pwd:
+            return
+        try:
+            Path(self.pwd).mkdir(parents=True, exist_ok=True)
+        except OSError:
+            return
+        try:
+            if self.stdout_file:
+                self.stdout_file.touch(exist_ok=True)
+            if self.stderr_file:
+                self.stderr_file.touch(exist_ok=True)
+        except OSError:
+            return
+
     def _build_env(self) -> dict:
         """Build environment dict for subprocess, merging stored env with system."""
         import json
@@ -1507,9 +1552,11 @@ class Process(models.Model):
                     proc.wait(timeout=self.timeout)
                     self.exit_code = proc.returncode
                 except subprocess.TimeoutExpired:
+                    import signal
+
                     proc.kill()
                     proc.wait()
-                    self.exit_code = -1
+                    self.exit_code = 128 + signal.SIGKILL
 
                 self.ended_at = timezone.now()
                 if stdout_path.exists():
@@ -1579,9 +1626,19 @@ class Process(models.Model):
             exit_code if exited, None if still running
         """
         if self.status == self.StatusChoices.EXITED:
+            if self.exit_code == -1:
+                self.exit_code = 137
+                self.save(update_fields=['exit_code'])
             return self.exit_code
 
         if not self.is_running:
+            # Reap child process if it's a zombie (best-effort)
+            proc = self.proc
+            if proc is not None:
+                try:
+                    proc.wait(timeout=0)
+                except Exception:
+                    pass
             # Process exited - read output and copy to DB
             if self.stdout_file and self.stdout_file.exists():
                 self.stdout = self.stdout_file.read_text()
@@ -1603,7 +1660,9 @@ class Process(models.Model):
             #         cmd_file.unlink(missing_ok=True)
 
             # Try to get exit code from proc or default to unknown
-            self.exit_code = self.exit_code if self.exit_code is not None else -1
+            self.exit_code = self.exit_code if self.exit_code is not None else 0
+            if self.exit_code == -1:
+                self.exit_code = 137
             self.ended_at = timezone.now()
             self.status = self.StatusChoices.EXITED
             self.save()
@@ -1723,6 +1782,7 @@ class Process(models.Model):
         import os
 
         killed_count = 0
+        used_sigkill = False
         proc = self.proc
         if proc is None:
             # Already dead
@@ -1772,11 +1832,15 @@ class Process(models.Model):
                     try:
                         os.kill(pid, signal.SIGKILL)
                         killed_count += 1
+                        used_sigkill = True
                     except (OSError, ProcessLookupError):
                         pass
 
             # Update self status
-            self.exit_code = 128 + signal.SIGTERM if killed_count > 0 else 0
+            if used_sigkill:
+                self.exit_code = 128 + signal.SIGKILL
+            else:
+                self.exit_code = 128 + signal.SIGTERM if killed_count > 0 else 0
             self.status = self.StatusChoices.EXITED
             self.ended_at = timezone.now()
             self.save()
@@ -1925,6 +1989,50 @@ class Process(models.Model):
 
         return 0
 
+    @classmethod
+    def cleanup_orphaned_workers(cls) -> int:
+        """
+        Kill orphaned worker/hook processes whose root process is no longer running.
+
+        Orphaned if:
+        - Root (orchestrator/cli) is not running, or
+        - No orchestrator/cli ancestor exists.
+
+        Standalone worker runs (archivebox run --snapshot-id) are allowed.
+        """
+        killed = 0
+
+        running_children = cls.objects.filter(
+            process_type__in=[cls.TypeChoices.WORKER, cls.TypeChoices.HOOK],
+            status=cls.StatusChoices.RUNNING,
+        )
+
+        for proc in running_children:
+            if not proc.is_running:
+                continue
+
+            root = proc.root
+            # Standalone worker/hook process (run directly)
+            if root.id == proc.id and root.process_type in (cls.TypeChoices.WORKER, cls.TypeChoices.HOOK):
+                continue
+
+            # If root is an active orchestrator/cli, keep it
+            if root.process_type in (cls.TypeChoices.ORCHESTRATOR, cls.TypeChoices.CLI) and root.is_running:
+                continue
+
+            try:
+                if proc.process_type == cls.TypeChoices.HOOK:
+                    proc.kill_tree(graceful_timeout=1.0)
+                else:
+                    proc.terminate(graceful_timeout=1.0)
+                killed += 1
+            except Exception:
+                continue
+
+        if killed:
+            print(f'[yellow]🧹 Cleaned up {killed} orphaned worker/hook process(es)[/yellow]')
+        return killed
+
 
 # =============================================================================
 # Binary State Machine
@@ -2126,5 +2234,3 @@ class ProcessMachine(BaseStateMachine, strict_states=True):
 # Manually register state machines with python-statemachine registry
 registry.register(BinaryMachine)
 registry.register(ProcessMachine)
-
-
diff --git a/archivebox/machine/tests/test_machine_models.py b/archivebox/machine/tests/test_machine_models.py
index 83875057..b36fd7a2 100644
--- a/archivebox/machine/tests/test_machine_models.py
+++ b/archivebox/machine/tests/test_machine_models.py
@@ -79,9 +79,9 @@ class TestMachineModel(TestCase):
         """Machine.from_json() should update machine config."""
         Machine.current()  # Ensure machine exists
         record = {
-            '_method': 'update',
-            'key': 'WGET_BINARY',
-            'value': '/usr/bin/wget',
+            'config': {
+                'WGET_BINARY': '/usr/bin/wget',
+            },
         }
 
         result = Machine.from_json(record)
@@ -190,12 +190,12 @@ class TestBinaryModel(TestCase):
         old_modified = binary.modified_at
 
         binary.update_and_requeue(
-            status=Binary.StatusChoices.STARTED,
+            status=Binary.StatusChoices.QUEUED,
             retry_at=timezone.now() + timedelta(seconds=60),
         )
 
         binary.refresh_from_db()
-        self.assertEqual(binary.status, Binary.StatusChoices.STARTED)
+        self.assertEqual(binary.status, Binary.StatusChoices.QUEUED)
         self.assertGreater(binary.modified_at, old_modified)
 
 
@@ -221,12 +221,12 @@ class TestBinaryStateMachine(TestCase):
     def test_binary_state_machine_can_start(self):
         """BinaryMachine.can_start() should check name and binproviders."""
         sm = BinaryMachine(self.binary)
-        self.assertTrue(sm.can_start())
+        self.assertTrue(sm.can_install())
 
         self.binary.binproviders = ''
         self.binary.save()
         sm = BinaryMachine(self.binary)
-        self.assertFalse(sm.can_start())
+        self.assertFalse(sm.can_install())
 
 
 class TestProcessModel(TestCase):
@@ -415,11 +415,15 @@ class TestProcessLifecycle(TestCase):
 
     def test_process_is_running_current_pid(self):
         """is_running should be True for current PID."""
+        import psutil
+        from datetime import datetime
+
+        proc_start = datetime.fromtimestamp(psutil.Process(os.getpid()).create_time(), tz=timezone.get_current_timezone())
         proc = Process.objects.create(
             machine=self.machine,
             status=Process.StatusChoices.RUNNING,
             pid=os.getpid(),
-            started_at=timezone.now(),
+            started_at=proc_start,
         )
 
         self.assertTrue(proc.is_running)
@@ -450,6 +454,22 @@ class TestProcessLifecycle(TestCase):
         proc.refresh_from_db()
         self.assertEqual(proc.status, Process.StatusChoices.EXITED)
 
+    def test_process_poll_normalizes_negative_exit_code(self):
+        """poll() should normalize -1 exit codes to 137."""
+        proc = Process.objects.create(
+            machine=self.machine,
+            status=Process.StatusChoices.EXITED,
+            pid=999999,
+            exit_code=-1,
+            started_at=timezone.now(),
+        )
+
+        exit_code = proc.poll()
+
+        self.assertEqual(exit_code, 137)
+        proc.refresh_from_db()
+        self.assertEqual(proc.exit_code, 137)
+
     def test_process_terminate_dead_process(self):
         """terminate() should handle already-dead process."""
         proc = Process.objects.create(
diff --git a/archivebox/misc/checks.py b/archivebox/misc/checks.py
index c5795d8a..09929d36 100644
--- a/archivebox/misc/checks.py
+++ b/archivebox/misc/checks.py
@@ -180,9 +180,11 @@ def check_tmp_dir(tmp_dir=None, throw=False, quiet=False, must_exist=True):
         return len(f'file://{socket_file}') <= 96
 
     tmp_is_valid = False
+    allow_no_unix_sockets = os.environ.get('ARCHIVEBOX_ALLOW_NO_UNIX_SOCKETS', '').lower() in ('1', 'true', 'yes')
     try:
         tmp_is_valid = dir_is_writable(tmp_dir)
-        tmp_is_valid = tmp_is_valid and assert_dir_can_contain_unix_sockets(tmp_dir)
+        if not allow_no_unix_sockets:
+            tmp_is_valid = tmp_is_valid and assert_dir_can_contain_unix_sockets(tmp_dir)
         assert tmp_is_valid, f'ArchiveBox user PUID={ARCHIVEBOX_USER} PGID={ARCHIVEBOX_GROUP} is unable to write to TMP_DIR={tmp_dir}'            
         assert len(f'file://{socket_file}') <= 96, f'ArchiveBox TMP_DIR={tmp_dir} is too long, dir containing unix socket files must be <90 chars.'
         return True
diff --git a/archivebox/misc/progress_layout.py b/archivebox/misc/progress_layout.py
index fc4001d8..2db2e1c1 100644
--- a/archivebox/misc/progress_layout.py
+++ b/archivebox/misc/progress_layout.py
@@ -3,30 +3,29 @@ Rich Layout-based live progress display for ArchiveBox orchestrator.
 
 Shows a comprehensive dashboard with:
 - Top: Crawl queue status (full width)
-- Middle: 4-column grid of SnapshotWorker progress panels
+- Middle: Running process logs (dynamic panels)
 - Bottom: Orchestrator/Daphne logs
 """
 
 __package__ = 'archivebox.misc'
 
 from datetime import datetime, timezone
-from typing import Dict, List, Optional, Any
+from typing import List, Optional, Any
 from collections import deque
+from pathlib import Path
 
 from rich import box
 from rich.align import Align
-from rich.console import Console, Group, RenderableType
+from rich.console import Group
 from rich.layout import Layout
+from rich.columns import Columns
 from rich.panel import Panel
-from rich.progress import Progress, BarColumn, TextColumn, TaskProgressColumn, SpinnerColumn
-from rich.table import Table
 from rich.text import Text
+from rich.table import Table
+from rich.tree import Tree
 
 from archivebox.config import VERSION
 
-# Maximum number of SnapshotWorker columns to display
-MAX_WORKER_COLUMNS = 4
-
 
 class CrawlQueuePanel:
     """Display crawl queue status across full width."""
@@ -35,6 +34,8 @@ class CrawlQueuePanel:
         self.orchestrator_status = "Idle"
         self.crawl_queue_count = 0
         self.crawl_workers_count = 0
+        self.binary_queue_count = 0
+        self.binary_workers_count = 0
         self.max_crawl_workers = 8
         self.crawl_id: Optional[str] = None
 
@@ -51,19 +52,27 @@ class CrawlQueuePanel:
         left_text.append(f"v{VERSION}", style="bold yellow")
         left_text.append(f" • {datetime.now(timezone.utc).strftime('%H:%M:%S')}", style="grey53")
 
-        # Center-left: Crawl queue status
+        # Center-left: Crawl + Binary queue status
         queue_style = "yellow" if self.crawl_queue_count > 0 else "grey53"
         center_left_text = Text()
         center_left_text.append("Crawls: ", style="white")
         center_left_text.append(str(self.crawl_queue_count), style=f"bold {queue_style}")
         center_left_text.append(" queued", style="grey53")
+        center_left_text.append(" • Binaries: ", style="white")
+        binary_queue_style = "yellow" if self.binary_queue_count > 0 else "grey53"
+        center_left_text.append(str(self.binary_queue_count), style=f"bold {binary_queue_style}")
+        center_left_text.append(" queued", style="grey53")
 
-        # Center-right: CrawlWorker status
+        # Center-right: Worker status
         worker_style = "green" if self.crawl_workers_count > 0 else "grey53"
         center_right_text = Text()
         center_right_text.append("Workers: ", style="white")
         center_right_text.append(f"{self.crawl_workers_count}/{self.max_crawl_workers}", style=f"bold {worker_style}")
-        center_right_text.append(" active", style="grey53")
+        center_right_text.append(" crawl", style="grey53")
+        binary_worker_style = "green" if self.binary_workers_count > 0 else "grey53"
+        center_right_text.append(" • ", style="grey53")
+        center_right_text.append(str(self.binary_workers_count), style=f"bold {binary_worker_style}")
+        center_right_text.append(" binary", style="grey53")
 
         # Right: Orchestrator status
         status_color = "green" if self.crawl_workers_count > 0 else "grey53"
@@ -74,151 +83,302 @@ class CrawlQueuePanel:
             right_text.append(f" [{self.crawl_id[:8]}]", style="grey53")
 
         grid.add_row(left_text, center_left_text, center_right_text, right_text)
-        return Panel(grid, style="white on blue", box=box.ROUNDED)
+        return Panel(grid, style="white on blue", box=box.HORIZONTALS)
 
 
-class SnapshotWorkerPanel:
-    """Display progress for a single SnapshotWorker."""
+class ProcessLogPanel:
+    """Display logs for a running Process."""
 
-    def __init__(self, worker_num: int):
-        self.worker_num = worker_num
-        self.snapshot_id: Optional[str] = None
-        self.snapshot_url: Optional[str] = None
-        self.total_hooks: int = 0
-        self.completed_hooks: int = 0
-        self.current_plugin: Optional[str] = None
-        self.status: str = "idle"  # idle, working, completed
-        self.recent_logs: deque = deque(maxlen=5)
+    def __init__(self, process: Any, max_lines: int = 8, compact: bool | None = None):
+        self.process = process
+        self.max_lines = max_lines
+        self.compact = compact
 
     def __rich__(self) -> Panel:
-        if self.status == "idle":
-            content = Align.center(
-                Text("Idle", style="grey53"),
-                vertical="middle",
-            )
-            border_style = "grey53"
-            title_style = "grey53"
-        else:
-            # Build progress display
-            lines = []
+        is_pending = self._is_pending()
+        output_line = '' if is_pending else self._output_line()
+        stdout_lines = []
+        stderr_lines = []
+        try:
+            stdout_lines = list(self.process.tail_stdout(lines=self.max_lines, follow=False))
+            stderr_lines = list(self.process.tail_stderr(lines=self.max_lines, follow=False))
+        except Exception:
+            stdout_lines = []
+            stderr_lines = []
 
-            # URL (truncated)
-            if self.snapshot_url:
-                url_display = self.snapshot_url[:35] + "..." if len(self.snapshot_url) > 35 else self.snapshot_url
-                lines.append(Text(url_display, style="cyan"))
-                lines.append(Text())  # Spacing
+        header_lines = []
+        chrome_launch_line = self._chrome_launch_line(stderr_lines, stdout_lines)
+        if chrome_launch_line:
+            header_lines.append(Text(chrome_launch_line, style="grey53"))
+        if output_line:
+            header_lines.append(Text(output_line, style="grey53"))
+        log_lines = []
+        for line in stdout_lines:
+            if line:
+                log_lines.append(Text(line, style="white"))
+        for line in stderr_lines:
+            if line:
+                log_lines.append(Text(line, style="cyan"))
 
-            # Progress bar
-            if self.total_hooks > 0:
-                pct = (self.completed_hooks / self.total_hooks) * 100
-                bar_width = 30
-                filled = int((pct / 100) * bar_width)
-                bar = "█" * filled + "░" * (bar_width - filled)
+        compact = self.compact if self.compact is not None else self._is_background_hook()
+        max_body = max(1, self.max_lines - len(header_lines))
+        if not log_lines:
+            log_lines = []
 
-                # Color based on progress
-                if pct < 30:
-                    bar_style = "yellow"
-                elif pct < 100:
-                    bar_style = "green"
-                else:
-                    bar_style = "blue"
+        lines = header_lines + log_lines[-max_body:]
 
-                progress_text = Text()
-                progress_text.append(bar, style=bar_style)
-                progress_text.append(f" {pct:.0f}%", style="white")
-                lines.append(progress_text)
-                lines.append(Text())  # Spacing
-
-            # Stats
-            stats = Table.grid(padding=(0, 1))
-            stats.add_column(style="grey53", no_wrap=True)
-            stats.add_column(style="white")
-            stats.add_row("Hooks:", f"{self.completed_hooks}/{self.total_hooks}")
-            if self.current_plugin:
-                stats.add_row("Current:", Text(self.current_plugin, style="yellow"))
-            lines.append(stats)
-            lines.append(Text())  # Spacing
-
-            # Recent logs
-            if self.recent_logs:
-                lines.append(Text("Recent:", style="grey53"))
-                for log_msg, log_style in self.recent_logs:
-                    log_text = Text(f"• {log_msg[:30]}", style=log_style)
-                    lines.append(log_text)
-
-            content = Group(*lines)
-            border_style = "green" if self.status == "working" else "blue"
-            title_style = "green" if self.status == "working" else "blue"
+        content = Group(*lines) if lines else Text("")
 
+        title = self._title()
+        border_style = "grey53" if is_pending else "cyan"
+        height = 2 if is_pending else None
         return Panel(
             content,
-            title=f"[{title_style}]Worker {self.worker_num}",
+            title=title,
             border_style=border_style,
-            box=box.ROUNDED,
-            height=20,
+            box=box.HORIZONTALS,
+            padding=(0, 1),
+            height=height,
         )
 
-    def add_log(self, message: str, style: str = "white"):
-        """Add a log message to this worker's recent logs."""
-        self.recent_logs.append((message, style))
+    def _title(self) -> str:
+        process_type = getattr(self.process, 'process_type', 'process')
+        worker_type = getattr(self.process, 'worker_type', '')
+        pid = getattr(self.process, 'pid', None)
+        label = process_type
+        if process_type == 'worker' and worker_type:
+            label, worker_suffix = self._worker_label(worker_type)
+        elif process_type == 'hook':
+            try:
+                cmd = getattr(self.process, 'cmd', [])
+                hook_path = Path(cmd[1]) if len(cmd) > 1 else None
+                hook_name = hook_path.name if hook_path else 'hook'
+                plugin_name = hook_path.parent.name if hook_path and hook_path.parent.name else 'hook'
+            except Exception:
+                hook_name = 'hook'
+                plugin_name = 'hook'
+            label = f"{plugin_name}/{hook_name}"
+            worker_suffix = ''
+        else:
+            worker_suffix = ''
+
+        url = self._extract_url()
+        url_suffix = f" url={self._abbrev_url(url)}" if url else ""
+        time_suffix = self._elapsed_suffix()
+        title_style = "grey53" if self._is_pending() else "bold white"
+        if pid:
+            return f"[{title_style}]{label}[/{title_style}] [grey53]pid={pid}{worker_suffix}{url_suffix}{time_suffix}[/grey53]"
+        return f"[{title_style}]{label}[/{title_style}]{f' [grey53]{worker_suffix.strip()} {url_suffix.strip()}{time_suffix}[/grey53]' if (worker_suffix or url_suffix or time_suffix) else ''}".rstrip()
+
+    def _is_background_hook(self) -> bool:
+        if getattr(self.process, 'process_type', '') != 'hook':
+            return False
+        try:
+            cmd = getattr(self.process, 'cmd', [])
+            hook_path = Path(cmd[1]) if len(cmd) > 1 else None
+            hook_name = hook_path.name if hook_path else ''
+            return '.bg.' in hook_name
+        except Exception:
+            return False
+
+    def _is_pending(self) -> bool:
+        status = getattr(self.process, 'status', '')
+        if status in ('queued', 'pending', 'backoff'):
+            return True
+        if getattr(self.process, 'process_type', '') == 'hook' and not getattr(self.process, 'pid', None):
+            return True
+        return False
+
+    def _worker_label(self, worker_type: str) -> tuple[str, str]:
+        cmd = getattr(self.process, 'cmd', []) or []
+        if worker_type == 'crawl':
+            crawl_id = self._extract_arg(cmd, '--crawl-id')
+            suffix = ''
+            if crawl_id:
+                suffix = f" id={str(crawl_id)[-8:]}"
+                try:
+                    from archivebox.crawls.models import Crawl
+                    crawl = Crawl.objects.filter(id=crawl_id).first()
+                    if crawl:
+                        urls = crawl.get_urls_list()
+                        if urls:
+                            url_list = self._abbrev_urls(urls)
+                            suffix += f" urls={url_list}"
+                except Exception:
+                    pass
+            return 'crawl', suffix
+        if worker_type == 'snapshot':
+            snapshot_id = self._extract_arg(cmd, '--snapshot-id')
+            suffix = ''
+            if snapshot_id:
+                suffix = f" id={str(snapshot_id)[-8:]}"
+                try:
+                    from archivebox.core.models import Snapshot
+                    snap = Snapshot.objects.filter(id=snapshot_id).first()
+                    if snap and snap.url:
+                        suffix += f" url={self._abbrev_url(snap.url, max_len=48)}"
+                except Exception:
+                    pass
+            return 'snapshot', suffix
+        return f"worker:{worker_type}", ''
+
+    @staticmethod
+    def _extract_arg(cmd: list[str], key: str) -> str | None:
+        for i, part in enumerate(cmd):
+            if part.startswith(f'{key}='):
+                return part.split('=', 1)[1]
+            if part == key and i + 1 < len(cmd):
+                return cmd[i + 1]
+        return None
+
+    def _abbrev_urls(self, urls: list[str], max_len: int = 48) -> str:
+        if not urls:
+            return ''
+        if len(urls) == 1:
+            return self._abbrev_url(urls[0], max_len=max_len)
+        first = self._abbrev_url(urls[0], max_len=max_len)
+        return f"{first},+{len(urls) - 1}"
+
+    def _extract_url(self) -> str:
+        url = getattr(self.process, 'url', None)
+        if url:
+            return str(url)
+        cmd = getattr(self.process, 'cmd', []) or []
+        for i, part in enumerate(cmd):
+            if part.startswith('--url='):
+                return part.split('=', 1)[1].strip()
+            if part == '--url' and i + 1 < len(cmd):
+                return str(cmd[i + 1]).strip()
+        return ''
+
+    def _abbrev_url(self, url: str, max_len: int = 48) -> str:
+        if not url:
+            return ''
+        if len(url) <= max_len:
+            return url
+        return f"{url[:max_len - 3]}..."
+
+    def _chrome_launch_line(self, stderr_lines: list[str], stdout_lines: list[str]) -> str:
+        try:
+            cmd = getattr(self.process, 'cmd', [])
+            hook_path = Path(cmd[1]) if len(cmd) > 1 else None
+            hook_name = hook_path.name if hook_path else ''
+            if 'chrome_launch' not in hook_name:
+                return ''
+
+            pid = ''
+            ws = ''
+            for line in stderr_lines + stdout_lines:
+                if not ws and 'CDP URL:' in line:
+                    ws = line.split('CDP URL:', 1)[1].strip()
+                if not pid and 'PID:' in line:
+                    pid = line.split('PID:', 1)[1].strip()
+
+            if pid and ws:
+                return f"Chrome pid={pid} {ws}"
+            if ws:
+                return f"Chrome {ws}"
+            if pid:
+                return f"Chrome pid={pid}"
+            try:
+                from archivebox import DATA_DIR
+                base = Path(DATA_DIR)
+                pwd = getattr(self.process, 'pwd', None)
+                if pwd:
+                    chrome_dir = Path(pwd)
+                    if not chrome_dir.is_absolute():
+                        chrome_dir = (base / chrome_dir).resolve()
+                    cdp_file = chrome_dir / 'cdp_url.txt'
+                    pid_file = chrome_dir / 'chrome.pid'
+                    if cdp_file.exists():
+                        ws = cdp_file.read_text().strip()
+                    if pid_file.exists():
+                        pid = pid_file.read_text().strip()
+                    if pid and ws:
+                        return f"Chrome pid={pid} {ws}"
+                    if ws:
+                        return f"Chrome {ws}"
+                    if pid:
+                        return f"Chrome pid={pid}"
+            except Exception:
+                pass
+        except Exception:
+            return ''
+        return ''
+
+    def _elapsed_suffix(self) -> str:
+        started_at = getattr(self.process, 'started_at', None)
+        timeout = getattr(self.process, 'timeout', None)
+        if not started_at or not timeout:
+            return ''
+        try:
+            now = datetime.now(timezone.utc) if started_at.tzinfo else datetime.now()
+            elapsed = int((now - started_at).total_seconds())
+            elapsed = max(elapsed, 0)
+            return f" [{elapsed}/{int(timeout)}s]"
+        except Exception:
+            return ''
+
+    def _output_line(self) -> str:
+        pwd = getattr(self.process, 'pwd', None)
+        if not pwd:
+            return ''
+        try:
+            from archivebox import DATA_DIR
+            rel = Path(pwd)
+            base = Path(DATA_DIR)
+            if rel.is_absolute():
+                try:
+                    rel = rel.relative_to(base)
+                except Exception:
+                    pass
+            rel_str = f"./{rel}" if not str(rel).startswith("./") else str(rel)
+            return f"{rel_str}"
+        except Exception:
+            return f"{pwd}"
 
 
-class CrawlWorkerLogPanel:
-    """Display CrawlWorker logs by tailing stdout/stderr from Process."""
+class WorkerLogPanel:
+    """Display worker logs by tailing stdout/stderr from Process."""
 
-    def __init__(self, max_lines: int = 8):
+    def __init__(self, title: str, empty_message: str, running_message: str, max_lines: int = 8):
+        self.title = title
+        self.empty_message = empty_message
+        self.running_message = running_message
         self.log_lines: deque = deque(maxlen=max_lines * 2)  # Allow more buffer
         self.max_lines = max_lines
         self.last_stdout_pos = 0  # Track file position for efficient tailing
         self.last_stderr_pos = 0
+        self.last_process_running = False
 
     def update_from_process(self, process: Any):
         """Update logs by tailing the Process stdout/stderr files."""
-        from pathlib import Path
-
         if not process:
+            self.last_process_running = False
             return
 
-        # Read new stdout lines since last read
+        # Use Process tail helpers for consistency
         try:
-            stdout_path = Path(process.stdout)
-            if stdout_path.exists():
-                with open(stdout_path, 'r') as f:
-                    # Seek to last read position
-                    f.seek(self.last_stdout_pos)
-                    new_lines = f.readlines()
-
-                    # Update position
-                    self.last_stdout_pos = f.tell()
-
-                    # Add new lines (up to max_lines to avoid overflow)
-                    for line in new_lines[-self.max_lines:]:
-                        line = line.rstrip('\n')
-                        if line and not line.startswith('['):  # Skip Rich markup lines
-                            self.log_lines.append(('stdout', line))
+            self.last_process_running = bool(getattr(process, 'is_running', False))
+            stdout_lines = list(process.tail_stdout(lines=self.max_lines, follow=False))
+            stderr_lines = list(process.tail_stderr(lines=self.max_lines, follow=False))
         except Exception:
-            pass
+            return
 
-        # Read new stderr lines since last read
-        try:
-            stderr_path = Path(process.stderr)
-            if stderr_path.exists():
-                with open(stderr_path, 'r') as f:
-                    f.seek(self.last_stderr_pos)
-                    new_lines = f.readlines()
+        self.log_lines.clear()
 
-                    self.last_stderr_pos = f.tell()
-
-                    for line in new_lines[-self.max_lines:]:
-                        line = line.rstrip('\n')
-                        if line and not line.startswith('['):  # Skip Rich markup lines
-                            self.log_lines.append(('stderr', line))
-        except Exception:
-            pass
+        # Preserve ordering by showing stdout then stderr
+        for line in stdout_lines:
+            if line:
+                self.log_lines.append(('stdout', line))
+        for line in stderr_lines:
+            if line:
+                self.log_lines.append(('stderr', line))
 
     def __rich__(self) -> Panel:
         if not self.log_lines:
-            content = Text("No CrawlWorker logs yet", style="grey53", justify="center")
+            message = self.running_message if self.last_process_running else self.empty_message
+            content = Text(message, style="grey53", justify="center")
         else:
             # Get the last max_lines for display
             display_lines = list(self.log_lines)[-self.max_lines:]
@@ -236,9 +396,9 @@ class CrawlWorkerLogPanel:
 
         return Panel(
             content,
-            title="[bold cyan]CrawlWorker Logs (stdout/stderr)",
+            title=f"[bold cyan]{self.title}",
             border_style="cyan",
-            box=box.ROUNDED,
+            box=box.HORIZONTALS,
         )
 
 
@@ -270,10 +430,71 @@ class OrchestratorLogPanel:
             content,
             title="[bold white]Orchestrator / Daphne Logs",
             border_style="white",
-            box=box.ROUNDED,
+            box=box.HORIZONTALS,
         )
 
 
+class CrawlQueueTreePanel:
+    """Display crawl queue with snapshots + hook summary in a tree view."""
+
+    def __init__(self, max_crawls: int = 8, max_snapshots: int = 16):
+        self.crawls: list[dict[str, Any]] = []
+        self.max_crawls = max_crawls
+        self.max_snapshots = max_snapshots
+
+    def update_crawls(self, crawls: list[dict[str, Any]]) -> None:
+        """Update crawl tree data."""
+        self.crawls = crawls[:self.max_crawls]
+
+    def __rich__(self) -> Panel:
+        if not self.crawls:
+            content = Text("No active crawls", style="grey53", justify="center")
+        else:
+            trees = []
+            for crawl in self.crawls:
+                crawl_status = crawl.get('status', '')
+                crawl_label = crawl.get('label', '')
+                crawl_id = crawl.get('id', '')[:8]
+                crawl_text = Text(f"{self._status_icon(crawl_status)} {crawl_id} {crawl_label}", style="white")
+                crawl_tree = Tree(crawl_text, guide_style="grey53")
+
+                snapshots = crawl.get('snapshots', [])[:self.max_snapshots]
+                for snap in snapshots:
+                    snap_status = snap.get('status', '')
+                    snap_label = snap.get('label', '')
+                    snap_text = Text(f"{self._status_icon(snap_status)} {snap_label}", style="white")
+                    snap_node = crawl_tree.add(snap_text)
+
+                    hooks = snap.get('hooks', {})
+                    if hooks:
+                        completed = hooks.get('completed', 0)
+                        running = hooks.get('running', 0)
+                        pending = hooks.get('pending', 0)
+                        summary = f"✅ {completed} | ▶️  {running} | ⌛️ {pending}"
+                        snap_node.add(Text(summary, style="grey53"))
+                trees.append(crawl_tree)
+            content = Group(*trees)
+
+        return Panel(
+            content,
+            title="[bold white]Crawl Queue",
+            border_style="white",
+            box=box.HORIZONTALS,
+        )
+
+    @staticmethod
+    def _status_icon(status: str) -> str:
+        if status in ('queued', 'pending'):
+            return '⏳'
+        if status in ('started', 'running'):
+            return '▶'
+        if status in ('sealed', 'done', 'completed'):
+            return '✅'
+        if status in ('failed', 'error'):
+            return '✖'
+        return '•'
+
+
 class ArchiveBoxProgressLayout:
     """
     Main layout manager for ArchiveBox orchestrator progress display.
@@ -281,15 +502,8 @@ class ArchiveBoxProgressLayout:
     Layout structure:
         ┌─────────────────────────────────────────────────────────────┐
         │              Crawl Queue (full width)                       │
-        ├───────────────┬───────────────┬───────────────┬─────────────┤
-        │  Snapshot     │  Snapshot     │  Snapshot     │  Snapshot   │
-        │  Worker 1     │  Worker 2     │  Worker 3     │  Worker 4   │
-        │               │               │               │             │
-        │  Progress +   │  Progress +   │  Progress +   │  Progress + │
-        │  Stats +      │  Stats +      │  Stats +      │  Stats +    │
-        │  Logs         │  Logs         │  Logs         │  Logs       │
-        ├───────────────┴───────────────┴───────────────┴─────────────┤
-        │              CrawlWorker Logs (stdout/stderr)               │
+        ├─────────────────────────────────────────────────────────────┤
+        │           Running Process Logs (dynamic panels)             │
         ├─────────────────────────────────────────────────────────────┤
         │           Orchestrator / Daphne Logs                        │
         └─────────────────────────────────────────────────────────────┘
@@ -303,51 +517,33 @@ class ArchiveBoxProgressLayout:
         self.crawl_queue = CrawlQueuePanel()
         self.crawl_queue.crawl_id = crawl_id
 
-        # Create 4 worker panels
-        self.worker_panels = [SnapshotWorkerPanel(i + 1) for i in range(MAX_WORKER_COLUMNS)]
-
-        self.crawl_worker_log = CrawlWorkerLogPanel(max_lines=8)
+        self.process_panels: List[ProcessLogPanel] = []
         self.orchestrator_log = OrchestratorLogPanel(max_events=8)
+        self.crawl_queue_tree = CrawlQueueTreePanel(max_crawls=8, max_snapshots=16)
 
         # Create layout
         self.layout = self._make_layout()
 
-        # Track snapshot ID to worker panel mapping
-        self.snapshot_to_worker: Dict[str, int] = {}  # snapshot_id -> worker_panel_index
-
     def _make_layout(self) -> Layout:
         """Define the layout structure."""
         layout = Layout(name="root")
 
-        # Top-level split: crawl_queue, workers, logs
+        # Top-level split: crawl_queue, workers, bottom
         layout.split(
             Layout(name="crawl_queue", size=3),
-            Layout(name="workers", ratio=1),
-            Layout(name="logs", size=20),
-        )
-
-        # Split workers into 4 columns
-        layout["workers"].split_row(
-            Layout(name="worker1"),
-            Layout(name="worker2"),
-            Layout(name="worker3"),
-            Layout(name="worker4"),
-        )
-
-        # Split logs into crawl_worker_logs and orchestrator_logs
-        layout["logs"].split(
-            Layout(name="crawl_worker_logs", size=10),
-            Layout(name="orchestrator_logs", size=10),
+            Layout(name="processes", ratio=1),
+            Layout(name="bottom", size=12),
         )
 
         # Assign components to layout sections
         layout["crawl_queue"].update(self.crawl_queue)
-        layout["worker1"].update(self.worker_panels[0])
-        layout["worker2"].update(self.worker_panels[1])
-        layout["worker3"].update(self.worker_panels[2])
-        layout["worker4"].update(self.worker_panels[3])
-        layout["crawl_worker_logs"].update(self.crawl_worker_log)
+        layout["processes"].update(Columns([]))
+        layout["bottom"].split_row(
+            Layout(name="orchestrator_logs", ratio=2),
+            Layout(name="crawl_tree", ratio=1),
+        )
         layout["orchestrator_logs"].update(self.orchestrator_log)
+        layout["crawl_tree"].update(self.crawl_queue_tree)
 
         return layout
 
@@ -356,82 +552,53 @@ class ArchiveBoxProgressLayout:
         status: str,
         crawl_queue_count: int = 0,
         crawl_workers_count: int = 0,
+        binary_queue_count: int = 0,
+        binary_workers_count: int = 0,
         max_crawl_workers: int = 8,
     ):
         """Update orchestrator status in the crawl queue panel."""
         self.crawl_queue.orchestrator_status = status
         self.crawl_queue.crawl_queue_count = crawl_queue_count
         self.crawl_queue.crawl_workers_count = crawl_workers_count
+        self.crawl_queue.binary_queue_count = binary_queue_count
+        self.crawl_queue.binary_workers_count = binary_workers_count
         self.crawl_queue.max_crawl_workers = max_crawl_workers
 
-    def update_snapshot_worker(
-        self,
-        snapshot_id: str,
-        url: str,
-        total: int,
-        completed: int,
-        current_plugin: str = "",
-    ):
-        """Update or assign a snapshot to a worker panel."""
-        # Find or assign worker panel for this snapshot
-        if snapshot_id not in self.snapshot_to_worker:
-            # Find first idle worker panel
-            worker_idx = None
-            for idx, panel in enumerate(self.worker_panels):
-                if panel.status == "idle":
-                    worker_idx = idx
-                    break
+    def update_process_panels(self, processes: List[Any], pending: Optional[List[Any]] = None) -> None:
+        """Update process panels to show all running processes."""
+        panels = []
+        all_processes = list(processes) + list(pending or [])
+        for process in all_processes:
+            is_hook = getattr(process, 'process_type', '') == 'hook'
+            is_bg = False
+            if is_hook:
+                try:
+                    cmd = getattr(process, 'cmd', [])
+                    hook_path = Path(cmd[1]) if len(cmd) > 1 else None
+                    hook_name = hook_path.name if hook_path else ''
+                    is_bg = '.bg.' in hook_name
+                except Exception:
+                    is_bg = False
+            is_pending = getattr(process, 'status', '') in ('queued', 'pending', 'backoff') or (is_hook and not getattr(process, 'pid', None))
+            max_lines = 2 if is_pending else (4 if is_bg else 7)
+            panels.append(ProcessLogPanel(process, max_lines=max_lines, compact=is_bg))
+        if not panels:
+            self.layout["processes"].size = 0
+            self.layout["processes"].update(Text(""))
+            return
 
-            # If no idle worker, use round-robin (shouldn't happen often)
-            if worker_idx is None:
-                worker_idx = len(self.snapshot_to_worker) % MAX_WORKER_COLUMNS
+        self.layout["processes"].size = None
+        self.layout["processes"].ratio = 1
+        self.layout["processes"].update(Columns(panels, equal=True, expand=True))
 
-            self.snapshot_to_worker[snapshot_id] = worker_idx
+    def update_crawl_tree(self, crawls: list[dict[str, Any]]) -> None:
+        """Update the crawl queue tree panel."""
+        self.crawl_queue_tree.update_crawls(crawls)
 
-        # Get assigned worker panel
-        worker_idx = self.snapshot_to_worker[snapshot_id]
-        panel = self.worker_panels[worker_idx]
-
-        # Update panel
-        panel.snapshot_id = snapshot_id
-        panel.snapshot_url = url
-        panel.total_hooks = total
-        panel.completed_hooks = completed
-        panel.current_plugin = current_plugin
-        panel.status = "working" if completed < total else "completed"
-
-    def remove_snapshot_worker(self, snapshot_id: str):
-        """Mark a snapshot worker as idle after completion."""
-        if snapshot_id in self.snapshot_to_worker:
-            worker_idx = self.snapshot_to_worker[snapshot_id]
-            panel = self.worker_panels[worker_idx]
-
-            # Mark as idle
-            panel.status = "idle"
-            panel.snapshot_id = None
-            panel.snapshot_url = None
-            panel.total_hooks = 0
-            panel.completed_hooks = 0
-            panel.current_plugin = None
-            panel.recent_logs.clear()
-
-            # Remove mapping
-            del self.snapshot_to_worker[snapshot_id]
-
-    def log_to_worker(self, snapshot_id: str, message: str, style: str = "white"):
-        """Add a log message to a specific worker's panel."""
-        if snapshot_id in self.snapshot_to_worker:
-            worker_idx = self.snapshot_to_worker[snapshot_id]
-            self.worker_panels[worker_idx].add_log(message, style)
-
-    def log_event(self, message: str, style: str = "white"):
+    def log_event(self, message: str, style: str = "white") -> None:
         """Add an event to the orchestrator log."""
         self.orchestrator_log.add_event(message, style)
 
-    def update_crawl_worker_logs(self, process: Any):
-        """Update CrawlWorker logs by tailing the Process stdout/stderr files."""
-        self.crawl_worker_log.update_from_process(process)
-
     def get_layout(self) -> Layout:
         """Get the Rich Layout object for rendering."""
         return self.layout
diff --git a/archivebox/plugins/accessibility/templates/icon.html b/archivebox/plugins/accessibility/templates/icon.html
index e69de29b..e1c30fa0 100644
--- a/archivebox/plugins/accessibility/templates/icon.html
+++ b/archivebox/plugins/accessibility/templates/icon.html
@@ -0,0 +1 @@
+<span class="abx-output-icon abx-output-icon--accessibility" title="Accessibility"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="4.5" r="2" fill="currentColor" stroke="none"/><path d="M4 7.5h16"/><path d="M12 7.5v12"/><path d="M7 20l5-6 5 6"/></svg></span>
diff --git a/archivebox/plugins/accessibility/tests/__init__.py b/archivebox/plugins/accessibility/tests/__init__.py
deleted file mode 100644
index fffe074b..00000000
--- a/archivebox/plugins/accessibility/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Tests for the accessibility plugin."""
diff --git a/archivebox/plugins/apt/on_Binary__13_apt_install.py b/archivebox/plugins/apt/on_Binary__13_apt_install.py
index af8506df..82e343ff 100644
--- a/archivebox/plugins/apt/on_Binary__13_apt_install.py
+++ b/archivebox/plugins/apt/on_Binary__13_apt_install.py
@@ -10,7 +10,7 @@ import json
 import sys
 
 import rich_click as click
-from abx_pkg import Binary, AptProvider
+from abx_pkg import Binary, AptProvider, BinProviderOverrides
 
 # Fix pydantic forward reference issue
 AptProvider.model_rebuild()
diff --git a/archivebox/plugins/apt/tests/__init__.py b/archivebox/plugins/apt/tests/__init__.py
deleted file mode 100644
index fdde694e..00000000
--- a/archivebox/plugins/apt/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Tests for the apt binary provider plugin."""
diff --git a/archivebox/plugins/apt/tests/test_apt_provider.py b/archivebox/plugins/apt/tests/test_apt_provider.py
index be55e901..430fde24 100644
--- a/archivebox/plugins/apt/tests/test_apt_provider.py
+++ b/archivebox/plugins/apt/tests/test_apt_provider.py
@@ -21,7 +21,7 @@ from django.test import TestCase
 
 # Get the path to the apt provider hook
 PLUGIN_DIR = Path(__file__).parent.parent
-INSTALL_HOOK = PLUGIN_DIR / 'on_Binary__install_using_apt_provider.py'
+INSTALL_HOOK = next(PLUGIN_DIR.glob('on_Binary__*_apt_install.py'), None)
 
 
 def apt_available() -> bool:
@@ -48,7 +48,7 @@ class TestAptProviderHook(TestCase):
 
     def test_hook_script_exists(self):
         """Hook script should exist."""
-        self.assertTrue(INSTALL_HOOK.exists(), f"Hook not found: {INSTALL_HOOK}")
+        self.assertTrue(INSTALL_HOOK and INSTALL_HOOK.exists(), f"Hook not found: {INSTALL_HOOK}")
 
     def test_hook_skips_when_apt_not_allowed(self):
         """Hook should skip when apt not in allowed binproviders."""
diff --git a/archivebox/plugins/archivedotorg/on_Snapshot__13_archivedotorg.py b/archivebox/plugins/archivedotorg/on_Snapshot__13_archivedotorg.py
index 5490008d..36522417 100644
--- a/archivebox/plugins/archivedotorg/on_Snapshot__13_archivedotorg.py
+++ b/archivebox/plugins/archivedotorg/on_Snapshot__13_archivedotorg.py
@@ -47,6 +47,9 @@ def submit_to_archivedotorg(url: str) -> tuple[bool, str | None, str]:
 
     Returns: (success, output_path, error_message)
     """
+    def log(message: str) -> None:
+        print(f'[archivedotorg] {message}', file=sys.stderr)
+
     try:
         import requests
     except ImportError:
@@ -56,6 +59,8 @@ def submit_to_archivedotorg(url: str) -> tuple[bool, str | None, str]:
     user_agent = get_env('USER_AGENT', 'Mozilla/5.0 (compatible; ArchiveBox/1.0)')
 
     submit_url = f'https://web.archive.org/save/{url}'
+    log(f'Submitting to Wayback Machine (timeout={timeout}s)')
+    log(f'GET {submit_url}')
 
     try:
         response = requests.get(
@@ -64,31 +69,40 @@ def submit_to_archivedotorg(url: str) -> tuple[bool, str | None, str]:
             headers={'User-Agent': user_agent},
             allow_redirects=True,
         )
+        log(f'HTTP {response.status_code} final_url={response.url}')
 
         # Check for successful archive
         content_location = response.headers.get('Content-Location', '')
         x_archive_orig_url = response.headers.get('X-Archive-Orig-Url', '')
+        if content_location:
+            log(f'Content-Location: {content_location}')
+        if x_archive_orig_url:
+            log(f'X-Archive-Orig-Url: {x_archive_orig_url}')
 
         # Build archive URL
         if content_location:
             archive_url = f'https://web.archive.org{content_location}'
             Path(OUTPUT_FILE).write_text(archive_url, encoding='utf-8')
+            log(f'Saved archive URL -> {archive_url}')
             return True, OUTPUT_FILE, ''
         elif 'web.archive.org' in response.url:
             # We were redirected to an archive page
             Path(OUTPUT_FILE).write_text(response.url, encoding='utf-8')
+            log(f'Redirected to archive page -> {response.url}')
             return True, OUTPUT_FILE, ''
         else:
             # Check for errors in response
             if 'RobotAccessControlException' in response.text:
                 # Blocked by robots.txt - save submit URL for manual retry
                 Path(OUTPUT_FILE).write_text(submit_url, encoding='utf-8')
+                log('Blocked by robots.txt, saved submit URL for manual retry')
                 return True, OUTPUT_FILE, ''  # Consider this a soft success
             elif response.status_code >= 400:
                 return False, None, f'HTTP {response.status_code}'
             else:
                 # Save submit URL anyway
                 Path(OUTPUT_FILE).write_text(submit_url, encoding='utf-8')
+                log('No archive URL returned, saved submit URL for manual retry')
                 return True, OUTPUT_FILE, ''
 
     except requests.Timeout:
diff --git a/archivebox/plugins/archivedotorg/templates/icon.html b/archivebox/plugins/archivedotorg/templates/icon.html
index 09f24b76..e3f48634 100644
--- a/archivebox/plugins/archivedotorg/templates/icon.html
+++ b/archivebox/plugins/archivedotorg/templates/icon.html
@@ -1 +1 @@
-🏛️
\ No newline at end of file
+<span class="abx-output-icon abx-output-icon--archivedotorg" title="Archive.org"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M3 7h18"/><rect x="3" y="7" width="18" height="13" rx="2"/><path d="M9 12h6"/></svg></span>
diff --git a/archivebox/plugins/chrome/binaries.jsonl b/archivebox/plugins/chrome/binaries.jsonl
deleted file mode 100644
index 55ccbad0..00000000
--- a/archivebox/plugins/chrome/binaries.jsonl
+++ /dev/null
@@ -1 +0,0 @@
-{"type": "Binary", "name": "chrome", "binproviders": "npm,env,brew,apt", "overrides": {"npm": {"packages": ["@puppeteer/browsers"]}}}
diff --git a/archivebox/plugins/chrome/chrome_utils.js b/archivebox/plugins/chrome/chrome_utils.js
index f61cfcdd..6369f1e7 100755
--- a/archivebox/plugins/chrome/chrome_utils.js
+++ b/archivebox/plugins/chrome/chrome_utils.js
@@ -1253,7 +1253,7 @@ function getExtensionTargets(browser) {
 }
 
 /**
- * Find Chromium/Chrome binary path.
+ * Find Chromium binary path.
  * Checks CHROME_BINARY env var first, then falls back to system locations.
  *
  * @returns {string|null} - Absolute path to browser binary or null if not found
@@ -1276,7 +1276,9 @@ function findChromium() {
     const chromeBinary = getEnv('CHROME_BINARY');
     if (chromeBinary) {
         const absPath = path.resolve(chromeBinary);
-        if (validateBinary(absPath)) {
+        if (absPath.includes('Google Chrome') || absPath.includes('google-chrome')) {
+            console.error('[!] Warning: CHROME_BINARY points to Chrome. Chromium is required for extension support.');
+        } else if (validateBinary(absPath)) {
             return absPath;
         }
         console.error(`[!] Warning: CHROME_BINARY="${chromeBinary}" is not valid`);
@@ -1309,7 +1311,7 @@ function findChromium() {
         return null;
     };
 
-    // 3. Search fallback locations (Chromium first, then Chrome)
+    // 3. Search fallback locations (Chromium only)
     const fallbackLocations = [
         // System Chromium
         '/Applications/Chromium.app/Contents/MacOS/Chromium',
@@ -1318,10 +1320,6 @@ function findChromium() {
         // Puppeteer cache
         path.join(process.env.HOME || '', '.cache/puppeteer/chromium'),
         path.join(process.env.HOME || '', '.cache/puppeteer'),
-        // Chrome (fallback - extensions may not work in 137+)
-        '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
-        '/usr/bin/google-chrome',
-        '/usr/bin/google-chrome-stable',
     ];
 
     for (const loc of fallbackLocations) {
@@ -1332,9 +1330,6 @@ function findChromium() {
                 return binary;
             }
         } else if (validateBinary(loc)) {
-            if (loc.includes('Google Chrome') || loc.includes('google-chrome')) {
-                console.error('[!] Warning: Using Chrome instead of Chromium. Extension loading may not work in Chrome 137+');
-            }
             return loc;
         }
     }
@@ -1699,10 +1694,10 @@ module.exports = {
     // Chrome launching
     launchChromium,
     killChrome,
-    // Chrome/Chromium install
+    // Chromium install
     installChromium,
     installPuppeteerCore,
-    // Chrome/Chromium binary finding
+    // Chromium binary finding
     findChromium,
     // Extension utilities
     getExtensionId,
@@ -1744,7 +1739,7 @@ if (require.main === module) {
         console.log('Usage: chrome_utils.js <command> [args...]');
         console.log('');
         console.log('Commands:');
-        console.log('  findChromium              Find Chrome/Chromium binary');
+        console.log('  findChromium              Find Chromium binary');
         console.log('  installChromium           Install Chromium via @puppeteer/browsers');
         console.log('  installPuppeteerCore      Install puppeteer-core npm package');
         console.log('  launchChromium            Launch Chrome with CDP debugging');
diff --git a/archivebox/plugins/chrome/config.json b/archivebox/plugins/chrome/config.json
index 79d1946d..f4d6a4d8 100644
--- a/archivebox/plugins/chrome/config.json
+++ b/archivebox/plugins/chrome/config.json
@@ -7,13 +7,13 @@
       "type": "boolean",
       "default": true,
       "x-aliases": ["USE_CHROME"],
-      "description": "Enable Chrome/Chromium browser integration for archiving"
+      "description": "Enable Chromium browser integration for archiving"
     },
     "CHROME_BINARY": {
       "type": "string",
       "default": "chromium",
       "x-aliases": ["CHROMIUM_BINARY", "GOOGLE_CHROME_BINARY"],
-      "description": "Path to Chrome/Chromium binary"
+      "description": "Path to Chromium binary"
     },
     "CHROME_NODE_BINARY": {
       "type": "string",
diff --git a/archivebox/plugins/chrome/on_Crawl__01_chrome_install.py b/archivebox/plugins/chrome/on_Crawl__01_chrome_install.py
deleted file mode 100755
index 6730333f..00000000
--- a/archivebox/plugins/chrome/on_Crawl__01_chrome_install.py
+++ /dev/null
@@ -1,265 +0,0 @@
-#!/usr/bin/env python3
-"""
-Install hook for Chrome/Chromium and puppeteer-core.
-
-Runs at crawl start to install/find Chromium and puppeteer-core.
-Also validates config and computes derived values.
-
-Outputs:
-    - JSONL for Binary and Machine config updates
-    - COMPUTED:KEY=VALUE lines that hooks.py parses and adds to env
-
-Respects CHROME_BINARY env var for custom binary paths.
-Uses `npx @puppeteer/browsers install chromium@latest` and parses output.
-
-NOTE: We use Chromium instead of Chrome because Chrome 137+ removed support for
---load-extension and --disable-extensions-except flags, which are needed for
-loading unpacked extensions in headless mode.
-"""
-
-import os
-import sys
-import json
-import subprocess
-from pathlib import Path
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def detect_docker() -> bool:
-    """Detect if running inside Docker container."""
-    return (
-        os.path.exists('/.dockerenv') or
-        os.environ.get('IN_DOCKER', '').lower() in ('true', '1', 'yes') or
-        os.path.exists('/run/.containerenv')
-    )
-
-
-def get_chrome_version(binary_path: str) -> str | None:
-    """Get Chrome/Chromium version string."""
-    try:
-        result = subprocess.run(
-            [binary_path, '--version'],
-            capture_output=True,
-            text=True,
-            timeout=5
-        )
-        if result.returncode == 0:
-            return result.stdout.strip()
-    except Exception:
-        pass
-    return None
-
-
-def install_puppeteer_core() -> bool:
-    """Install puppeteer-core to NODE_MODULES_DIR if not present."""
-    node_modules_dir = os.environ.get('NODE_MODULES_DIR', '').strip()
-    if not node_modules_dir:
-        # No isolated node_modules, skip (will use global)
-        return True
-
-    node_modules_path = Path(node_modules_dir)
-    if (node_modules_path / 'puppeteer-core').exists():
-        return True
-
-    # Get npm prefix from NODE_MODULES_DIR (parent of node_modules)
-    npm_prefix = node_modules_path.parent
-
-    try:
-        print(f"[*] Installing puppeteer-core to {npm_prefix}...", file=sys.stderr)
-        result = subprocess.run(
-            ['npm', 'install', '--prefix', str(npm_prefix), 'puppeteer-core', '@puppeteer/browsers'],
-            capture_output=True,
-            text=True,
-            timeout=60
-        )
-        if result.returncode == 0:
-            print(f"[+] puppeteer-core installed", file=sys.stderr)
-            return True
-        else:
-            print(f"[!] Failed to install puppeteer-core: {result.stderr}", file=sys.stderr)
-            return False
-    except Exception as e:
-        print(f"[!] Failed to install puppeteer-core: {e}", file=sys.stderr)
-        return False
-
-
-def install_chromium() -> dict | None:
-    """Install Chromium using @puppeteer/browsers and parse output for binary path.
-
-    Output format: "chromium@<version> <path_to_binary>"
-    e.g.: "chromium@1563294 /Users/x/.cache/puppeteer/chromium/.../Chromium"
-
-    Note: npx is fast when chromium is already cached - it returns the path without re-downloading.
-    """
-    try:
-        print("[*] Installing Chromium via @puppeteer/browsers...", file=sys.stderr)
-
-        # Use --path to install to puppeteer's standard cache location
-        cache_path = os.path.expanduser('~/.cache/puppeteer')
-
-        result = subprocess.run(
-            ['npx', '@puppeteer/browsers', 'install', 'chromium@1563297', f'--path={cache_path}'],
-            capture_output=True,
-            text=True,
-            stdin=subprocess.DEVNULL,
-            timeout=300
-        )
-
-        if result.returncode != 0:
-            print(f"[!] Failed to install Chromium: {result.stderr}", file=sys.stderr)
-            return None
-
-        # Parse output: "chromium@1563294 /path/to/Chromium"
-        output = result.stdout.strip()
-        parts = output.split(' ', 1)
-        if len(parts) != 2:
-            print(f"[!] Failed to parse install output: {output}", file=sys.stderr)
-            return None
-
-        version_str = parts[0]  # "chromium@1563294"
-        binary_path = parts[1].strip()
-
-        if not binary_path or not os.path.exists(binary_path):
-            print(f"[!] Binary not found at: {binary_path}", file=sys.stderr)
-            return None
-
-        # Extract version number
-        version = version_str.split('@')[1] if '@' in version_str else None
-
-        print(f"[+] Chromium installed: {binary_path}", file=sys.stderr)
-
-        return {
-            'name': 'chromium',
-            'abspath': binary_path,
-            'version': version,
-            'binprovider': 'puppeteer',
-        }
-
-    except subprocess.TimeoutExpired:
-        print("[!] Chromium install timed out", file=sys.stderr)
-    except FileNotFoundError:
-        print("[!] npx not found - is Node.js installed?", file=sys.stderr)
-    except Exception as e:
-        print(f"[!] Failed to install Chromium: {e}", file=sys.stderr)
-
-    return None
-
-
-def main():
-    warnings = []
-    errors = []
-    computed = {}
-
-    # Install puppeteer-core if NODE_MODULES_DIR is set
-    install_puppeteer_core()
-
-    # Check if Chrome is enabled
-    chrome_enabled = get_env_bool('CHROME_ENABLED', True)
-
-    # Detect Docker and adjust sandbox
-    in_docker = detect_docker()
-    computed['IN_DOCKER'] = str(in_docker).lower()
-
-    chrome_sandbox = get_env_bool('CHROME_SANDBOX', True)
-    if in_docker and chrome_sandbox:
-        warnings.append(
-            "Running in Docker with CHROME_SANDBOX=true. "
-            "Chrome may fail to start. Consider setting CHROME_SANDBOX=false."
-        )
-        # Auto-disable sandbox in Docker unless explicitly set
-        if not get_env('CHROME_SANDBOX'):
-            computed['CHROME_SANDBOX'] = 'false'
-
-    # Check Node.js availability
-    node_binary = get_env('NODE_BINARY', 'node')
-    computed['NODE_BINARY'] = node_binary
-
-    # Check if CHROME_BINARY is already set and valid
-    configured_binary = get_env('CHROME_BINARY', '')
-    if configured_binary and os.path.isfile(configured_binary) and os.access(configured_binary, os.X_OK):
-        version = get_chrome_version(configured_binary)
-        computed['CHROME_BINARY'] = configured_binary
-        computed['CHROME_VERSION'] = version or 'unknown'
-
-        print(json.dumps({
-            'type': 'Binary',
-            'name': 'chromium',
-            'abspath': configured_binary,
-            'version': version,
-            'binprovider': 'env',
-        }))
-
-        # Output computed values
-        for key, value in computed.items():
-            print(f"COMPUTED:{key}={value}")
-        for warning in warnings:
-            print(f"WARNING:{warning}", file=sys.stderr)
-
-        sys.exit(0)
-
-    # Install/find Chromium via puppeteer
-    result = install_chromium()
-
-    if result and result.get('abspath'):
-        computed['CHROME_BINARY'] = result['abspath']
-        computed['CHROME_VERSION'] = result['version'] or 'unknown'
-
-        print(json.dumps({
-            'type': 'Binary',
-            'name': result['name'],
-            'abspath': result['abspath'],
-            'version': result['version'],
-            'binprovider': result['binprovider'],
-        }))
-
-        print(json.dumps({
-            'type': 'Machine',
-            '_method': 'update',
-            'key': 'config/CHROME_BINARY',
-            'value': result['abspath'],
-        }))
-
-        if result['version']:
-            print(json.dumps({
-                'type': 'Machine',
-                '_method': 'update',
-                'key': 'config/CHROMIUM_VERSION',
-                'value': result['version'],
-            }))
-
-        # Output computed values
-        for key, value in computed.items():
-            print(f"COMPUTED:{key}={value}")
-        for warning in warnings:
-            print(f"WARNING:{warning}", file=sys.stderr)
-
-        sys.exit(0)
-    else:
-        errors.append("Chromium binary not found")
-        computed['CHROME_BINARY'] = ''
-
-        # Output computed values and errors
-        for key, value in computed.items():
-            print(f"COMPUTED:{key}={value}")
-        for warning in warnings:
-            print(f"WARNING:{warning}", file=sys.stderr)
-        for error in errors:
-            print(f"ERROR:{error}", file=sys.stderr)
-
-        sys.exit(1)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/chrome/on_Crawl__70_chrome_install.py b/archivebox/plugins/chrome/on_Crawl__70_chrome_install.py
new file mode 100755
index 00000000..af0b8ec7
--- /dev/null
+++ b/archivebox/plugins/chrome/on_Crawl__70_chrome_install.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python3
+"""
+Emit Chromium Binary dependency for the crawl.
+
+NOTE: We use Chromium instead of Chrome because Chrome 137+ removed support for
+--load-extension and --disable-extensions-except flags, which are needed for
+loading unpacked extensions in headless mode.
+"""
+
+import json
+import os
+import sys
+
+
+def main():
+    # Check if Chrome is enabled
+    chrome_enabled = os.environ.get('CHROME_ENABLED', 'true').lower() not in ('false', '0', 'no', 'off')
+    if not chrome_enabled:
+        sys.exit(0)
+
+    record = {
+        'type': 'Binary',
+        'name': 'chromium',
+        'binproviders': 'puppeteer,env',
+        'overrides': {
+            'puppeteer': ['chromium@latest', '--install-deps'],
+        },
+    }
+    print(json.dumps(record))
+    sys.exit(0)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/archivebox/plugins/chrome/on_Crawl__20_chrome_launch.bg.js b/archivebox/plugins/chrome/on_Crawl__90_chrome_launch.bg.js
similarity index 98%
rename from archivebox/plugins/chrome/on_Crawl__20_chrome_launch.bg.js
rename to archivebox/plugins/chrome/on_Crawl__90_chrome_launch.bg.js
index f4d659e1..c50eb847 100644
--- a/archivebox/plugins/chrome/on_Crawl__20_chrome_launch.bg.js
+++ b/archivebox/plugins/chrome/on_Crawl__90_chrome_launch.bg.js
@@ -3,12 +3,12 @@
  * Launch a shared Chromium browser session for the entire crawl.
  *
  * This runs once per crawl and keeps Chromium alive for all snapshots to share.
- * Each snapshot creates its own tab via on_Snapshot__20_chrome_tab.bg.js.
+ * Each snapshot creates its own tab via on_Snapshot__10_chrome_tab.bg.js.
  *
  * NOTE: We use Chromium instead of Chrome because Chrome 137+ removed support for
  * --load-extension and --disable-extensions-except flags.
  *
- * Usage: on_Crawl__20_chrome_launch.bg.js --crawl-id=<uuid> --source-url=<url>
+ * Usage: on_Crawl__90_chrome_launch.bg.js --crawl-id=<uuid> --source-url=<url>
  * Output: Writes to current directory (executor creates chrome/ dir):
  *   - cdp_url.txt: WebSocket URL for CDP connection
  *   - chrome.pid: Chromium process ID (for cleanup)
@@ -31,7 +31,7 @@ if (process.env.NODE_MODULES_DIR) {
 
 const fs = require('fs');
 const path = require('path');
-const puppeteer = require('puppeteer-core');
+const puppeteer = require('puppeteer');
 const {
     findChromium,
     launchChromium,
diff --git a/archivebox/plugins/chrome/on_Snapshot__20_chrome_tab.bg.js b/archivebox/plugins/chrome/on_Snapshot__10_chrome_tab.bg.js
similarity index 86%
rename from archivebox/plugins/chrome/on_Snapshot__20_chrome_tab.bg.js
rename to archivebox/plugins/chrome/on_Snapshot__10_chrome_tab.bg.js
index db953ef0..fca4acdc 100755
--- a/archivebox/plugins/chrome/on_Snapshot__20_chrome_tab.bg.js
+++ b/archivebox/plugins/chrome/on_Snapshot__10_chrome_tab.bg.js
@@ -2,11 +2,11 @@
 /**
  * Create a Chrome tab for this snapshot in the shared crawl Chrome session.
  *
- * If a crawl-level Chrome session exists (from on_Crawl__20_chrome_launch.bg.js),
+ * If a crawl-level Chrome session exists (from on_Crawl__90_chrome_launch.bg.js),
  * this connects to it and creates a new tab. Otherwise, falls back to launching
  * its own Chrome instance.
  *
- * Usage: on_Snapshot__20_chrome_tab.bg.js --url=<url> --snapshot-id=<uuid> --crawl-id=<uuid>
+ * Usage: on_Snapshot__10_chrome_tab.bg.js --url=<url> --snapshot-id=<uuid> --crawl-id=<uuid>
  * Output: Creates chrome/ directory under snapshot output dir with:
  *   - cdp_url.txt: WebSocket URL for CDP connection
  *   - chrome.pid: Chrome process ID (from crawl)
@@ -15,11 +15,14 @@
  *
  * Environment variables:
  *     CRAWL_OUTPUT_DIR: Crawl output directory (to find crawl's Chrome session)
- *     CHROME_BINARY: Path to Chrome/Chromium binary (for fallback)
+ *     CHROME_BINARY: Path to Chromium binary (for fallback)
  *     CHROME_RESOLUTION: Page resolution (default: 1440,2000)
  *     CHROME_USER_AGENT: User agent string (optional)
  *     CHROME_CHECK_SSL_VALIDITY: Whether to check SSL certificates (default: true)
  *     CHROME_HEADLESS: Run in headless mode (default: true)
+ *
+ * This is a background hook that stays alive until SIGTERM so the tab
+ * can be closed cleanly at the end of the snapshot run.
  */
 
 const fs = require('fs');
@@ -28,7 +31,7 @@ const { spawn } = require('child_process');
 // Add NODE_MODULES_DIR to module resolution paths if set
 if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
 
-const puppeteer = require('puppeteer-core');
+const puppeteer = require('puppeteer');
 const {
     findChromium,
     getEnv,
@@ -43,6 +46,11 @@ const PLUGIN_NAME = 'chrome_tab';
 const OUTPUT_DIR = '.';  // Hook already runs in chrome/ output directory
 const CHROME_SESSION_DIR = '.';
 
+let finalStatus = 'failed';
+let finalOutput = '';
+let finalError = '';
+let cmdVersion = '';
+let finalized = false;
 
 // Parse command line arguments
 function parseArgs() {
@@ -56,8 +64,31 @@ function parseArgs() {
     return args;
 }
 
+function emitResult(statusOverride) {
+    if (finalized) return;
+    finalized = true;
+
+    const status = statusOverride || finalStatus;
+    const outputStr = status === 'succeeded'
+        ? finalOutput
+        : (finalError || finalOutput || '');
+
+    const result = {
+        type: 'ArchiveResult',
+        status,
+        output_str: outputStr,
+    };
+    if (cmdVersion) {
+        result.cmd_version = cmdVersion;
+    }
+    console.log(JSON.stringify(result));
+}
+
 // Cleanup handler for SIGTERM - close this snapshot's tab
-async function cleanup() {
+async function cleanup(signal) {
+    if (signal) {
+        console.error(`\nReceived ${signal}, closing chrome tab...`);
+    }
     try {
         const cdpFile = path.join(OUTPUT_DIR, 'cdp_url.txt');
         const targetIdFile = path.join(OUTPUT_DIR, 'target_id.txt');
@@ -78,12 +109,13 @@ async function cleanup() {
     } catch (e) {
         // Best effort
     }
-    process.exit(0);
+    emitResult();
+    process.exit(finalStatus === 'succeeded' ? 0 : 1);
 }
 
 // Register signal handlers
-process.on('SIGTERM', cleanup);
-process.on('SIGINT', cleanup);
+process.on('SIGTERM', () => cleanup('SIGTERM'));
+process.on('SIGINT', () => cleanup('SIGINT'));
 
 // Try to find the crawl's Chrome session
 function findCrawlChromeSession(crawlId) {
@@ -272,23 +304,22 @@ async function main() {
     const crawlId = args.crawl_id;
 
     if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__20_chrome_tab.bg.js --url=<url> --snapshot-id=<uuid> [--crawl-id=<uuid>]');
+        console.error('Usage: on_Snapshot__10_chrome_tab.bg.js --url=<url> --snapshot-id=<uuid> [--crawl-id=<uuid>]');
         process.exit(1);
     }
 
-    const startTs = new Date();
     let status = 'failed';
-    let output = null;
+    let output = '';
     let error = '';
     let version = '';
 
     try {
         const binary = findChromium();
         if (!binary) {
-            console.error('ERROR: Chrome/Chromium binary not found');
-            console.error('DEPENDENCY_NEEDED=chrome');
+            console.error('ERROR: Chromium binary not found');
+            console.error('DEPENDENCY_NEEDED=chromium');
             console.error('BIN_PROVIDERS=puppeteer,env,playwright,apt,brew');
-            console.error('INSTALL_HINT=npx @puppeteer/browsers install chrome@stable');
+            console.error('INSTALL_HINT=npx @puppeteer/browsers install chromium@latest');
             process.exit(1);
         }
 
@@ -327,24 +358,22 @@ async function main() {
         status = 'failed';
     }
 
-    const endTs = new Date();
-
     if (error) {
         console.error(`ERROR: ${error}`);
     }
 
-    // Output clean JSONL (no RESULT_JSON= prefix)
-    const result = {
-        type: 'ArchiveResult',
-        status,
-        output_str: output || error || '',
-    };
-    if (version) {
-        result.cmd_version = version;
-    }
-    console.log(JSON.stringify(result));
+    finalStatus = status;
+    finalOutput = output || '';
+    finalError = error || '';
+    cmdVersion = version || '';
 
-    process.exit(status === 'succeeded' ? 0 : 1);
+    if (status !== 'succeeded') {
+        emitResult(status);
+        process.exit(1);
+    }
+
+    console.log('[*] Chrome tab created, waiting for cleanup signal...');
+    await new Promise(() => {}); // Keep alive until SIGTERM
 }
 
 main().catch(e => {
diff --git a/archivebox/plugins/chrome/on_Snapshot__11_chrome_wait.js b/archivebox/plugins/chrome/on_Snapshot__11_chrome_wait.js
new file mode 100644
index 00000000..219b58b9
--- /dev/null
+++ b/archivebox/plugins/chrome/on_Snapshot__11_chrome_wait.js
@@ -0,0 +1,76 @@
+#!/usr/bin/env node
+/**
+ * Wait for Chrome session files to exist (cdp_url.txt + target_id.txt).
+ *
+ * This is a foreground hook that blocks until the Chrome tab is ready,
+ * so downstream hooks can safely connect to CDP.
+ *
+ * Usage: on_Snapshot__11_chrome_wait.js --url=<url> --snapshot-id=<uuid>
+ */
+
+const fs = require('fs');
+const path = require('path');
+// Add NODE_MODULES_DIR to module resolution paths if set
+if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
+
+const {
+    getEnvInt,
+    waitForChromeSession,
+    readCdpUrl,
+    readTargetId,
+} = require('./chrome_utils.js');
+
+const CHROME_SESSION_DIR = '.';
+
+function parseArgs() {
+    const args = {};
+    process.argv.slice(2).forEach(arg => {
+        if (arg.startsWith('--')) {
+            const [key, ...valueParts] = arg.slice(2).split('=');
+            args[key.replace(/-/g, '_')] = valueParts.join('=') || true;
+        }
+    });
+    return args;
+}
+
+async function main() {
+    const args = parseArgs();
+    const url = args.url;
+    const snapshotId = args.snapshot_id;
+
+    if (!url || !snapshotId) {
+        console.error('Usage: on_Snapshot__11_chrome_wait.js --url=<url> --snapshot-id=<uuid>');
+        process.exit(1);
+    }
+
+    const timeoutSeconds = getEnvInt('CHROME_TAB_TIMEOUT', getEnvInt('CHROME_TIMEOUT', getEnvInt('TIMEOUT', 60)));
+    const timeoutMs = timeoutSeconds * 1000;
+
+    console.error(`[chrome_wait] Waiting for Chrome session (timeout=${timeoutSeconds}s)...`);
+
+    const ready = await waitForChromeSession(CHROME_SESSION_DIR, timeoutMs);
+    if (!ready) {
+        const error = `Chrome session not ready after ${timeoutSeconds}s (cdp_url.txt/target_id.txt missing)`;
+        console.error(`[chrome_wait] ERROR: ${error}`);
+        console.log(JSON.stringify({ type: 'ArchiveResult', status: 'failed', output_str: error }));
+        process.exit(1);
+    }
+
+    const cdpUrl = readCdpUrl(CHROME_SESSION_DIR);
+    const targetId = readTargetId(CHROME_SESSION_DIR);
+    if (!cdpUrl || !targetId) {
+        const error = 'Chrome session files incomplete (cdp_url.txt/target_id.txt missing)';
+        console.error(`[chrome_wait] ERROR: ${error}`);
+        console.log(JSON.stringify({ type: 'ArchiveResult', status: 'failed', output_str: error }));
+        process.exit(1);
+    }
+
+    console.error(`[chrome_wait] Chrome session ready (cdp_url=${cdpUrl.slice(0, 32)}..., target_id=${targetId}).`);
+    console.log(JSON.stringify({ type: 'ArchiveResult', status: 'succeeded', output_str: 'chrome session ready' }));
+    process.exit(0);
+}
+
+main().catch(e => {
+    console.error(`Fatal error: ${e.message}`);
+    process.exit(1);
+});
diff --git a/archivebox/plugins/chrome/on_Snapshot__30_chrome_navigate.js b/archivebox/plugins/chrome/on_Snapshot__30_chrome_navigate.js
index 5e2c95d6..242c9853 100644
--- a/archivebox/plugins/chrome/on_Snapshot__30_chrome_navigate.js
+++ b/archivebox/plugins/chrome/on_Snapshot__30_chrome_navigate.js
@@ -19,7 +19,7 @@ const fs = require('fs');
 const path = require('path');
 // Add NODE_MODULES_DIR to module resolution paths if set
 if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
-const puppeteer = require('puppeteer-core');
+const puppeteer = require('puppeteer');
 
 const PLUGIN_NAME = 'chrome_navigate';
 const CHROME_SESSION_DIR = '.';
diff --git a/archivebox/plugins/chrome/templates/icon.html b/archivebox/plugins/chrome/templates/icon.html
index e69de29b..18555344 100644
--- a/archivebox/plugins/chrome/templates/icon.html
+++ b/archivebox/plugins/chrome/templates/icon.html
@@ -0,0 +1 @@
+<span class="abx-output-icon abx-output-icon--chrome" title="Chrome"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><rect x="3" y="4.5" width="18" height="15" rx="2"/><path d="M3 9h18"/><circle cx="7" cy="7" r="1" fill="currentColor" stroke="none"/><circle cx="11" cy="7" r="1" fill="currentColor" stroke="none"/></svg></span>
diff --git a/archivebox/plugins/chrome/tests/__init__.py b/archivebox/plugins/chrome/tests/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/archivebox/plugins/chrome/tests/chrome_test_helpers.py b/archivebox/plugins/chrome/tests/chrome_test_helpers.py
index 3c2424ca..8be2bb3c 100644
--- a/archivebox/plugins/chrome/tests/chrome_test_helpers.py
+++ b/archivebox/plugins/chrome/tests/chrome_test_helpers.py
@@ -60,6 +60,7 @@ import os
 import platform
 import signal
 import subprocess
+import sys
 import time
 from datetime import datetime
 from pathlib import Path
@@ -72,11 +73,14 @@ CHROME_PLUGIN_DIR = Path(__file__).parent.parent
 PLUGINS_ROOT = CHROME_PLUGIN_DIR.parent
 
 # Hook script locations
-CHROME_INSTALL_HOOK = CHROME_PLUGIN_DIR / 'on_Crawl__01_chrome_install.py'
-CHROME_LAUNCH_HOOK = CHROME_PLUGIN_DIR / 'on_Crawl__20_chrome_launch.bg.js'
-CHROME_TAB_HOOK = CHROME_PLUGIN_DIR / 'on_Snapshot__20_chrome_tab.bg.js'
+CHROME_INSTALL_HOOK = CHROME_PLUGIN_DIR / 'on_Crawl__70_chrome_install.py'
+CHROME_LAUNCH_HOOK = CHROME_PLUGIN_DIR / 'on_Crawl__90_chrome_launch.bg.js'
+CHROME_TAB_HOOK = CHROME_PLUGIN_DIR / 'on_Snapshot__10_chrome_tab.bg.js'
 CHROME_NAVIGATE_HOOK = next(CHROME_PLUGIN_DIR.glob('on_Snapshot__*_chrome_navigate.*'), None)
 CHROME_UTILS = CHROME_PLUGIN_DIR / 'chrome_utils.js'
+PUPPETEER_BINARY_HOOK = PLUGINS_ROOT / 'puppeteer' / 'on_Binary__12_puppeteer_install.py'
+PUPPETEER_CRAWL_HOOK = PLUGINS_ROOT / 'puppeteer' / 'on_Crawl__60_puppeteer_install.py'
+NPM_BINARY_HOOK = PLUGINS_ROOT / 'npm' / 'on_Binary__10_npm_install.py'
 
 
 # =============================================================================
@@ -402,7 +406,7 @@ def run_hook(
 
     # Determine interpreter based on file extension
     if hook_script.suffix == '.py':
-        cmd = ['python', str(hook_script)]
+        cmd = [sys.executable, str(hook_script)]
     elif hook_script.suffix == '.js':
         cmd = ['node', str(hook_script)]
     else:
@@ -451,6 +455,128 @@ def parse_jsonl_output(stdout: str, record_type: str = 'ArchiveResult') -> Optio
     return None
 
 
+def parse_jsonl_records(stdout: str) -> List[Dict[str, Any]]:
+    """Parse all JSONL records from stdout."""
+    records: List[Dict[str, Any]] = []
+    for line in stdout.strip().split('\n'):
+        line = line.strip()
+        if not line.startswith('{'):
+            continue
+        try:
+            records.append(json.loads(line))
+        except json.JSONDecodeError:
+            continue
+    return records
+
+
+def apply_machine_updates(records: List[Dict[str, Any]], env: dict) -> None:
+    """Apply Machine update records to env dict in-place."""
+    for record in records:
+        if record.get('type') != 'Machine':
+            continue
+        config = record.get('config')
+        if not isinstance(config, dict):
+            continue
+        env.update(config)
+
+
+def install_chromium_with_hooks(env: dict, timeout: int = 300) -> str:
+    """Install Chromium via chrome crawl hook + puppeteer/npm hooks.
+
+    Returns absolute path to Chromium binary.
+    """
+    puppeteer_result = subprocess.run(
+        [sys.executable, str(PUPPETEER_CRAWL_HOOK)],
+        capture_output=True,
+        text=True,
+        timeout=timeout,
+        env=env,
+    )
+    if puppeteer_result.returncode != 0:
+        raise RuntimeError(f"Puppeteer crawl hook failed: {puppeteer_result.stderr}")
+
+    puppeteer_record = parse_jsonl_output(puppeteer_result.stdout, record_type='Binary') or {}
+    if not puppeteer_record or puppeteer_record.get('name') != 'puppeteer':
+        raise RuntimeError("Puppeteer Binary record not emitted by crawl hook")
+
+    npm_cmd = [
+        sys.executable,
+        str(NPM_BINARY_HOOK),
+        '--machine-id=test-machine',
+        '--binary-id=test-puppeteer',
+        '--name=puppeteer',
+        f"--binproviders={puppeteer_record.get('binproviders', '*')}",
+    ]
+    puppeteer_overrides = puppeteer_record.get('overrides')
+    if puppeteer_overrides:
+        npm_cmd.append(f'--overrides={json.dumps(puppeteer_overrides)}')
+
+    npm_result = subprocess.run(
+        npm_cmd,
+        capture_output=True,
+        text=True,
+        timeout=timeout,
+        env=env,
+    )
+    if npm_result.returncode != 0:
+        raise RuntimeError(f"Npm install failed: {npm_result.stderr}")
+
+    apply_machine_updates(parse_jsonl_records(npm_result.stdout), env)
+
+    chrome_result = subprocess.run(
+        [sys.executable, str(CHROME_INSTALL_HOOK)],
+        capture_output=True,
+        text=True,
+        timeout=timeout,
+        env=env,
+    )
+    if chrome_result.returncode != 0:
+        raise RuntimeError(f"Chrome install hook failed: {chrome_result.stderr}")
+
+    chrome_record = parse_jsonl_output(chrome_result.stdout, record_type='Binary') or {}
+    if not chrome_record or chrome_record.get('name') not in ('chromium', 'chrome'):
+        raise RuntimeError("Chrome Binary record not emitted by crawl hook")
+
+    chromium_cmd = [
+        sys.executable,
+        str(PUPPETEER_BINARY_HOOK),
+        '--machine-id=test-machine',
+        '--binary-id=test-chromium',
+        f"--name={chrome_record.get('name', 'chromium')}",
+        f"--binproviders={chrome_record.get('binproviders', '*')}",
+    ]
+    chrome_overrides = chrome_record.get('overrides')
+    if chrome_overrides:
+        chromium_cmd.append(f'--overrides={json.dumps(chrome_overrides)}')
+
+    result = subprocess.run(
+        chromium_cmd,
+        capture_output=True,
+        text=True,
+        timeout=timeout,
+        env=env,
+    )
+    if result.returncode != 0:
+        raise RuntimeError(f"Puppeteer chromium install failed: {result.stderr}")
+
+    records = parse_jsonl_records(result.stdout)
+    chromium_record = None
+    for record in records:
+        if record.get('type') == 'Binary' and record.get('name') in ('chromium', 'chrome'):
+            chromium_record = record
+            break
+    if not chromium_record:
+        chromium_record = parse_jsonl_output(result.stdout, record_type='Binary')
+
+    chromium_path = chromium_record.get('abspath')
+    if not chromium_path or not Path(chromium_path).exists():
+        raise RuntimeError(f"Chromium binary not found after install: {chromium_path}")
+
+    env['CHROME_BINARY'] = chromium_path
+    apply_machine_updates(records, env)
+    return chromium_path
+
+
 def run_hook_and_parse(
     hook_script: Path,
     url: str,
@@ -499,7 +625,7 @@ def setup_test_env(tmpdir: Path) -> dict:
                     crawls/
                     snapshots/
 
-    Calls chrome install hook which handles puppeteer-core and chromium installation.
+    Calls chrome install hook + puppeteer/npm hooks for Chromium installation.
     Returns env dict with DATA_DIR, LIB_DIR, NPM_BIN_DIR, NODE_MODULES_DIR, CHROME_BINARY, etc.
 
     Args:
@@ -559,31 +685,10 @@ def setup_test_env(tmpdir: Path) -> dict:
     if 'CHROME_HEADLESS' not in os.environ:
         env['CHROME_HEADLESS'] = 'true'
 
-    # Call chrome install hook (installs puppeteer-core and chromium, outputs JSONL)
-    result = subprocess.run(
-        ['python', str(CHROME_INSTALL_HOOK)],
-        capture_output=True, text=True, timeout=120, env=env
-    )
-    if result.returncode != 0:
-        pytest.skip(f"Chrome install hook failed: {result.stderr}")
-
-    # Parse JSONL output to get CHROME_BINARY
-    chrome_binary = None
-    for line in result.stdout.strip().split('\n'):
-        if not line.strip():
-            continue
-        try:
-            data = json.loads(line)
-            if data.get('type') == 'Binary' and data.get('abspath'):
-                chrome_binary = data['abspath']
-                break
-        except json.JSONDecodeError:
-            continue
-
-    if not chrome_binary or not Path(chrome_binary).exists():
-        pytest.skip(f"Chromium binary not found: {chrome_binary}")
-
-    env['CHROME_BINARY'] = chrome_binary
+    try:
+        install_chromium_with_hooks(env)
+    except RuntimeError as e:
+        pytest.skip(str(e))
     return env
 
 
@@ -790,17 +895,8 @@ def chrome_session(
             'CHROME_HEADLESS': 'true',
         })
 
-        # CRITICAL: Run chrome install hook first (installs puppeteer-core and chromium)
-        # chrome_launch assumes chrome_install has already run
-        install_result = subprocess.run(
-            ['python', str(CHROME_INSTALL_HOOK)],
-            capture_output=True,
-            text=True,
-            timeout=120,
-            env=env
-        )
-        if install_result.returncode != 0:
-            raise RuntimeError(f"Chrome install failed: {install_result.stderr}")
+        # Install Chromium via npm + puppeteer hooks using normal Binary flow
+        install_chromium_with_hooks(env)
 
         # Launch Chrome at crawl level
         chrome_launch_process = subprocess.Popen(
diff --git a/archivebox/plugins/chrome/tests/test_chrome.py b/archivebox/plugins/chrome/tests/test_chrome.py
index 82672566..c23a48d9 100644
--- a/archivebox/plugins/chrome/tests/test_chrome.py
+++ b/archivebox/plugins/chrome/tests/test_chrome.py
@@ -30,9 +30,8 @@ import platform
 
 from archivebox.plugins.chrome.tests.chrome_test_helpers import (
     get_test_env,
-    get_lib_dir,
-    get_node_modules_dir,
     find_chromium_binary,
+    install_chromium_with_hooks,
     CHROME_PLUGIN_DIR as PLUGIN_DIR,
     CHROME_LAUNCH_HOOK,
     CHROME_TAB_HOOK,
@@ -41,58 +40,24 @@ from archivebox.plugins.chrome.tests.chrome_test_helpers import (
 
 @pytest.fixture(scope="session", autouse=True)
 def ensure_chromium_and_puppeteer_installed(tmp_path_factory):
-    """Ensure Chromium and puppeteer are installed before running tests.
-
-    Puppeteer handles Chromium installation automatically in its own cache.
-    We only need to install puppeteer itself to LIB_DIR/npm.
-    """
-    from abx_pkg import Binary, NpmProvider, BinProviderOverrides
-
-    # Set DATA_DIR if not already set (required by abx_pkg)
+    """Ensure Chromium and puppeteer are installed before running tests."""
     if not os.environ.get('DATA_DIR'):
-        # Use isolated temp dir for direct pytest runs
         test_data_dir = tmp_path_factory.mktemp('chrome_test_data')
         os.environ['DATA_DIR'] = str(test_data_dir)
+    env = get_test_env()
 
-    # Compute paths AFTER setting DATA_DIR
-    lib_dir = get_lib_dir()
-    node_modules_dir = get_node_modules_dir()
-    npm_prefix = lib_dir / 'npm'
+    try:
+        chromium_binary = install_chromium_with_hooks(env)
+    except RuntimeError as e:
+        pytest.skip(str(e))
 
-    # Rebuild pydantic models
-    NpmProvider.model_rebuild()
-
-    # Install puppeteer if not available (it will handle Chromium in its own cache)
-    puppeteer_core_path = node_modules_dir / 'puppeteer-core'
-    if not puppeteer_core_path.exists():
-        print(f"\n[*] Installing puppeteer to {npm_prefix}...")
-        npm_prefix.mkdir(parents=True, exist_ok=True)
-
-        provider = NpmProvider(npm_prefix=npm_prefix)
-        try:
-            binary = Binary(
-                name='puppeteer',
-                binproviders=[provider],
-                overrides={'npm': {'packages': ['puppeteer@^23.5.0']}}
-            )
-            binary.install()
-            print(f"[*] Puppeteer installed successfully to {npm_prefix}")
-        except Exception as e:
-            pytest.skip(f"Failed to install puppeteer: {e}")
-
-    # Find Chromium binary (puppeteer installs it automatically in its cache)
-    chromium_binary = find_chromium_binary()
     if not chromium_binary:
-        pytest.skip("Chromium not found - puppeteer should install it automatically")
+        pytest.skip("Chromium not found after install")
 
-    # Set CHROME_BINARY env var for tests
     os.environ['CHROME_BINARY'] = chromium_binary
-
-
-# Get paths from helpers (will use DATA_DIR if set, or compute based on __file__)
-LIB_DIR = get_lib_dir()
-NODE_MODULES_DIR = get_node_modules_dir()
-NPM_PREFIX = LIB_DIR / 'npm'
+    for key in ('NODE_MODULES_DIR', 'NODE_PATH', 'PATH'):
+        if env.get(key):
+            os.environ[key] = env[key]
 
 
 def test_hook_scripts_exist():
diff --git a/archivebox/plugins/consolelog/on_Snapshot__21_consolelog.bg.js b/archivebox/plugins/consolelog/on_Snapshot__21_consolelog.bg.js
index c312f0c5..92351c05 100755
--- a/archivebox/plugins/consolelog/on_Snapshot__21_consolelog.bg.js
+++ b/archivebox/plugins/consolelog/on_Snapshot__21_consolelog.bg.js
@@ -32,6 +32,13 @@ const OUTPUT_DIR = '.';
 const OUTPUT_FILE = 'console.jsonl';
 const CHROME_SESSION_DIR = '../chrome';
 
+let browser = null;
+let page = null;
+let logCount = 0;
+let errorCount = 0;
+let requestFailCount = 0;
+let shuttingDown = false;
+
 async function serializeArgs(args) {
     const serialized = [];
     for (const arg of args) {
@@ -73,6 +80,7 @@ async function setupListeners() {
                 location: msg.location(),
             };
             fs.appendFileSync(outputPath, JSON.stringify(logEntry) + '\n');
+            logCount += 1;
         } catch (e) {
             // Ignore errors
         }
@@ -87,6 +95,7 @@ async function setupListeners() {
                 stack: error.stack || '',
             };
             fs.appendFileSync(outputPath, JSON.stringify(logEntry) + '\n');
+            errorCount += 1;
         } catch (e) {
             // Ignore
         }
@@ -103,6 +112,7 @@ async function setupListeners() {
                 url: request.url(),
             };
             fs.appendFileSync(outputPath, JSON.stringify(logEntry) + '\n');
+            requestFailCount += 1;
         } catch (e) {
             // Ignore
         }
@@ -111,6 +121,29 @@ async function setupListeners() {
     return { browser, page };
 }
 
+function emitResult(status = 'succeeded') {
+    if (shuttingDown) return;
+    shuttingDown = true;
+
+    const counts = `${logCount} console, ${errorCount} errors, ${requestFailCount} failed requests`;
+    console.log(JSON.stringify({
+        type: 'ArchiveResult',
+        status,
+        output_str: `${OUTPUT_FILE} (${counts})`,
+    }));
+}
+
+async function handleShutdown(signal) {
+    console.error(`\nReceived ${signal}, emitting final results...`);
+    emitResult('succeeded');
+    if (browser) {
+        try {
+            browser.disconnect();
+        } catch (e) {}
+    }
+    process.exit(0);
+}
+
 async function main() {
     const args = parseArgs();
     const url = args.url;
@@ -127,23 +160,27 @@ async function main() {
         process.exit(0);
     }
 
-    const timeout = getEnvInt('CONSOLELOG_TIMEOUT', 30) * 1000;
-
     try {
         // Set up listeners BEFORE navigation
-        await setupListeners();
+        const connection = await setupListeners();
+        browser = connection.browser;
+        page = connection.page;
 
-        // Wait for chrome_navigate to complete (BLOCKING)
-        await waitForPageLoaded(CHROME_SESSION_DIR, timeout * 4, 500);
+        // Register signal handlers for graceful shutdown
+        process.on('SIGTERM', () => handleShutdown('SIGTERM'));
+        process.on('SIGINT', () => handleShutdown('SIGINT'));
 
-        // Output clean JSONL
-        console.log(JSON.stringify({
-            type: 'ArchiveResult',
-            status: 'succeeded',
-            output_str: OUTPUT_FILE,
-        }));
+        // Wait for chrome_navigate to complete (non-fatal)
+        try {
+            const timeout = getEnvInt('CONSOLELOG_TIMEOUT', 30) * 1000;
+            await waitForPageLoaded(CHROME_SESSION_DIR, timeout * 4, 500);
+        } catch (e) {
+            console.error(`WARN: ${e.message}`);
+        }
 
-        process.exit(0);
+        // console.error('Consolelog active, waiting for cleanup signal...');
+        await new Promise(() => {}); // Keep alive until SIGTERM
+        return;
 
     } catch (e) {
         const error = `${e.name}: ${e.message}`;
diff --git a/archivebox/plugins/consolelog/templates/icon.html b/archivebox/plugins/consolelog/templates/icon.html
index e69de29b..c68b8db5 100644
--- a/archivebox/plugins/consolelog/templates/icon.html
+++ b/archivebox/plugins/consolelog/templates/icon.html
@@ -0,0 +1 @@
+<span class="abx-output-icon abx-output-icon--consolelog" title="Console Log"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><rect x="3" y="4.5" width="18" height="15" rx="2"/><path d="M7 12l2 2-2 2"/><path d="M11 16h6"/></svg></span>
diff --git a/archivebox/plugins/consolelog/tests/__init__.py b/archivebox/plugins/consolelog/tests/__init__.py
deleted file mode 100644
index 456c345d..00000000
--- a/archivebox/plugins/consolelog/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Tests for the consolelog plugin."""
diff --git a/archivebox/plugins/consolelog/tests/test_consolelog.py b/archivebox/plugins/consolelog/tests/test_consolelog.py
index 2f9189ff..7d590aaa 100644
--- a/archivebox/plugins/consolelog/tests/test_consolelog.py
+++ b/archivebox/plugins/consolelog/tests/test_consolelog.py
@@ -10,6 +10,7 @@ import shutil
 import subprocess
 import sys
 import tempfile
+import time
 from pathlib import Path
 
 import pytest
@@ -76,26 +77,33 @@ class TestConsolelogWithChrome(TestCase):
                 # Use the environment from chrome_session (already has CHROME_HEADLESS=true)
 
 
-                # Run consolelog hook with the active Chrome session
-                result = subprocess.run(
+                # Run consolelog hook with the active Chrome session (background hook)
+                result = subprocess.Popen(
                     ['node', str(CONSOLELOG_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
                     cwd=str(snapshot_chrome_dir),
-                    capture_output=True,
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.PIPE,
                     text=True,
-                    timeout=120,  # Longer timeout as it waits for navigation
                     env=env
                 )
 
                 # Check for output file
                 console_output = snapshot_chrome_dir / 'console.jsonl'
 
-                # Verify hook ran (may succeed or timeout waiting for navigation)
-                # The hook is designed to wait for page_loaded.txt from chrome_navigate
-                # In test mode, that file may not exist, so hook may timeout
-                # But it should still create the console.jsonl file
+                # Allow it to run briefly, then terminate (background hook)
+                time.sleep(3)
+                if result.poll() is None:
+                    result.terminate()
+                    try:
+                        stdout, stderr = result.communicate(timeout=5)
+                    except subprocess.TimeoutExpired:
+                        result.kill()
+                        stdout, stderr = result.communicate()
+                else:
+                    stdout, stderr = result.communicate()
 
                 # At minimum, verify no crash
-                self.assertNotIn('Traceback', result.stderr)
+                self.assertNotIn('Traceback', stderr)
 
                 # If output file exists, verify it's valid JSONL
                 if console_output.exists():
diff --git a/archivebox/plugins/custom/on_Binary__14_custom_install.py b/archivebox/plugins/custom/on_Binary__14_custom_install.py
index b0ed6c15..7e523d54 100644
--- a/archivebox/plugins/custom/on_Binary__14_custom_install.py
+++ b/archivebox/plugins/custom/on_Binary__14_custom_install.py
@@ -59,9 +59,16 @@ def main(binary_id: str, machine_id: str, name: str, binproviders: str, custom_c
     provider = EnvProvider()
     try:
         binary = Binary(name=name, binproviders=[provider]).load()
-    except Exception as e:
-        click.echo(f"{name} not found after custom install: {e}", err=True)
-        sys.exit(1)
+    except Exception:
+        try:
+            binary = Binary(
+                name=name,
+                binproviders=[provider],
+                overrides={'env': {'version': '0.0.1'}},
+            ).load()
+        except Exception as e:
+            click.echo(f"{name} not found after custom install: {e}", err=True)
+            sys.exit(1)
 
     if not binary.abspath:
         click.echo(f"{name} not found after custom install", err=True)
diff --git a/archivebox/plugins/custom/tests/__init__.py b/archivebox/plugins/custom/tests/__init__.py
deleted file mode 100644
index 63791d76..00000000
--- a/archivebox/plugins/custom/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Tests for the custom binary provider plugin."""
diff --git a/archivebox/plugins/custom/tests/test_custom_provider.py b/archivebox/plugins/custom/tests/test_custom_provider.py
index 301f8657..22a2cb1d 100644
--- a/archivebox/plugins/custom/tests/test_custom_provider.py
+++ b/archivebox/plugins/custom/tests/test_custom_provider.py
@@ -17,7 +17,7 @@ from django.test import TestCase
 
 # Get the path to the custom provider hook
 PLUGIN_DIR = Path(__file__).parent.parent
-INSTALL_HOOK = PLUGIN_DIR / 'on_Binary__install_using_custom_bash.py'
+INSTALL_HOOK = next(PLUGIN_DIR.glob('on_Binary__*_custom_install.py'), None)
 
 
 class TestCustomProviderHook(TestCase):
@@ -34,7 +34,7 @@ class TestCustomProviderHook(TestCase):
 
     def test_hook_script_exists(self):
         """Hook script should exist."""
-        self.assertTrue(INSTALL_HOOK.exists(), f"Hook not found: {INSTALL_HOOK}")
+        self.assertTrue(INSTALL_HOOK and INSTALL_HOOK.exists(), f"Hook not found: {INSTALL_HOOK}")
 
     def test_hook_skips_when_custom_not_allowed(self):
         """Hook should skip when custom not in allowed binproviders."""
diff --git a/archivebox/plugins/dns/on_Snapshot__22_dns.bg.js b/archivebox/plugins/dns/on_Snapshot__22_dns.bg.js
index 721674f1..105f13d8 100755
--- a/archivebox/plugins/dns/on_Snapshot__22_dns.bg.js
+++ b/archivebox/plugins/dns/on_Snapshot__22_dns.bg.js
@@ -32,6 +32,11 @@ const OUTPUT_DIR = '.';
 const OUTPUT_FILE = 'dns.jsonl';
 const CHROME_SESSION_DIR = '../chrome';
 
+let browser = null;
+let page = null;
+let recordCount = 0;
+let shuttingDown = false;
+
 function extractHostname(url) {
     try {
         const urlObj = new URL(url);
@@ -121,6 +126,7 @@ async function setupListener(targetUrl) {
 
             // Append to output file
             fs.appendFileSync(outputPath, JSON.stringify(dnsRecord) + '\n');
+            recordCount += 1;
 
         } catch (e) {
             // Ignore errors
@@ -170,6 +176,7 @@ async function setupListener(targetUrl) {
                 };
 
                 fs.appendFileSync(outputPath, JSON.stringify(dnsRecord) + '\n');
+                recordCount += 1;
             }
         } catch (e) {
             // Ignore errors
@@ -179,6 +186,28 @@ async function setupListener(targetUrl) {
     return { browser, page, client };
 }
 
+function emitResult(status = 'succeeded') {
+    if (shuttingDown) return;
+    shuttingDown = true;
+
+    console.log(JSON.stringify({
+        type: 'ArchiveResult',
+        status,
+        output_str: `${OUTPUT_FILE} (${recordCount} DNS records)`,
+    }));
+}
+
+async function handleShutdown(signal) {
+    console.error(`\nReceived ${signal}, emitting final results...`);
+    emitResult('succeeded');
+    if (browser) {
+        try {
+            browser.disconnect();
+        } catch (e) {}
+    }
+    process.exit(0);
+}
+
 async function main() {
     const args = parseArgs();
     const url = args.url;
@@ -195,31 +224,27 @@ async function main() {
         process.exit(0);
     }
 
-    const timeout = getEnvInt('DNS_TIMEOUT', 30) * 1000;
-
     try {
         // Set up listener BEFORE navigation
-        await setupListener(url);
+        const connection = await setupListener(url);
+        browser = connection.browser;
+        page = connection.page;
 
-        // Wait for chrome_navigate to complete (BLOCKING)
-        await waitForPageLoaded(CHROME_SESSION_DIR, timeout * 4, 500);
+        // Register signal handlers for graceful shutdown
+        process.on('SIGTERM', () => handleShutdown('SIGTERM'));
+        process.on('SIGINT', () => handleShutdown('SIGINT'));
 
-        // Count DNS records
-        const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
-        let recordCount = 0;
-        if (fs.existsSync(outputPath)) {
-            const content = fs.readFileSync(outputPath, 'utf8');
-            recordCount = content.split('\n').filter(line => line.trim()).length;
+        // Wait for chrome_navigate to complete (non-fatal)
+        try {
+            const timeout = getEnvInt('DNS_TIMEOUT', 30) * 1000;
+            await waitForPageLoaded(CHROME_SESSION_DIR, timeout * 4, 500);
+        } catch (e) {
+            console.error(`WARN: ${e.message}`);
         }
 
-        // Output clean JSONL
-        console.log(JSON.stringify({
-            type: 'ArchiveResult',
-            status: 'succeeded',
-            output_str: `${OUTPUT_FILE} (${recordCount} DNS records)`,
-        }));
-
-        process.exit(0);
+        // console.error('DNS listener active, waiting for cleanup signal...');
+        await new Promise(() => {}); // Keep alive until SIGTERM
+        return;
 
     } catch (e) {
         const error = `${e.name}: ${e.message}`;
diff --git a/archivebox/plugins/dns/templates/icon.html b/archivebox/plugins/dns/templates/icon.html
index e69de29b..1a558d40 100644
--- a/archivebox/plugins/dns/templates/icon.html
+++ b/archivebox/plugins/dns/templates/icon.html
@@ -0,0 +1 @@
+<span class="abx-output-icon abx-output-icon--dns" title="DNS"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><circle cx="6" cy="12" r="2"/><circle cx="18" cy="6" r="2"/><circle cx="18" cy="18" r="2"/><path d="M8 12h6"/><path d="M16 8l-2 2"/><path d="M16 16l-2-2"/></svg></span>
diff --git a/archivebox/plugins/dom/on_Snapshot__53_dom.js b/archivebox/plugins/dom/on_Snapshot__53_dom.js
index cc35645e..f62662f8 100644
--- a/archivebox/plugins/dom/on_Snapshot__53_dom.js
+++ b/archivebox/plugins/dom/on_Snapshot__53_dom.js
@@ -52,7 +52,21 @@ const CHROME_SESSION_DIR = '../chrome';
 // Check if staticfile extractor already downloaded this URL
 const STATICFILE_DIR = '../staticfile';
 function hasStaticFileOutput() {
-    return fs.existsSync(STATICFILE_DIR) && fs.readdirSync(STATICFILE_DIR).length > 0;
+    if (!fs.existsSync(STATICFILE_DIR)) return false;
+    const stdoutPath = path.join(STATICFILE_DIR, 'stdout.log');
+    if (!fs.existsSync(stdoutPath)) return false;
+    const stdout = fs.readFileSync(stdoutPath, 'utf8');
+    for (const line of stdout.split('\n')) {
+        const trimmed = line.trim();
+        if (!trimmed.startsWith('{')) continue;
+        try {
+            const record = JSON.parse(trimmed);
+            if (record.type === 'ArchiveResult' && record.status === 'succeeded') {
+                return true;
+            }
+        } catch (e) {}
+    }
+    return false;
 }
 
 // Wait for chrome tab to be fully loaded
diff --git a/archivebox/plugins/dom/templates/icon.html b/archivebox/plugins/dom/templates/icon.html
index f8995a81..56efac8d 100644
--- a/archivebox/plugins/dom/templates/icon.html
+++ b/archivebox/plugins/dom/templates/icon.html
@@ -1 +1 @@
-🌐
\ No newline at end of file
+<span class="abx-output-icon abx-output-icon--dom" title="DOM"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M8 9l-3 3 3 3"/><path d="M16 9l3 3-3 3"/><path d="M10 20l4-16"/></svg></span>
diff --git a/archivebox/plugins/dom/tests/test_dom.py b/archivebox/plugins/dom/tests/test_dom.py
index fea41b8d..7312a72f 100644
--- a/archivebox/plugins/dom/tests/test_dom.py
+++ b/archivebox/plugins/dom/tests/test_dom.py
@@ -142,7 +142,7 @@ def test_staticfile_present_skips():
         #   dom/         <- dom extractor runs here, looks for ../staticfile
         staticfile_dir = tmpdir / 'staticfile'
         staticfile_dir.mkdir()
-        (staticfile_dir / 'index.html').write_text('<html>test</html>')
+        (staticfile_dir / 'stdout.log').write_text('{"type":"ArchiveResult","status":"succeeded","output_str":"index.html"}\n')
 
         dom_dir = tmpdir / 'dom'
         dom_dir.mkdir()
diff --git a/archivebox/plugins/env/on_Binary__15_env_install.py b/archivebox/plugins/env/on_Binary__15_env_install.py
index 0e867063..35b3a9ca 100644
--- a/archivebox/plugins/env/on_Binary__15_env_install.py
+++ b/archivebox/plugins/env/on_Binary__15_env_install.py
@@ -25,7 +25,8 @@ from abx_pkg import Binary, EnvProvider
 @click.option('--binary-id', required=True, help="Dependency UUID")
 @click.option('--name', required=True, help="Binary name to find")
 @click.option('--binproviders', default='*', help="Allowed providers (comma-separated)")
-def main(binary_id: str, machine_id: str, name: str, binproviders: str):
+@click.option('--overrides', default=None, help="JSON-encoded overrides dict (unused)")
+def main(binary_id: str, machine_id: str, name: str, binproviders: str, overrides: str | None):
     """Check if binary is available in PATH and record it."""
 
     # Check if env provider is allowed
diff --git a/archivebox/plugins/env/tests/__init__.py b/archivebox/plugins/env/tests/__init__.py
deleted file mode 100644
index 4fe95e6e..00000000
--- a/archivebox/plugins/env/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Tests for the env binary provider plugin."""
diff --git a/archivebox/plugins/env/tests/test_env_provider.py b/archivebox/plugins/env/tests/test_env_provider.py
index bf3cc590..2bffcfca 100644
--- a/archivebox/plugins/env/tests/test_env_provider.py
+++ b/archivebox/plugins/env/tests/test_env_provider.py
@@ -17,7 +17,7 @@ from django.test import TestCase
 
 # Get the path to the env provider hook
 PLUGIN_DIR = Path(__file__).parent.parent
-INSTALL_HOOK = PLUGIN_DIR / 'on_Binary__install_using_env_provider.py'
+INSTALL_HOOK = next(PLUGIN_DIR.glob('on_Binary__*_env_install.py'), None)
 
 
 class TestEnvProviderHook(TestCase):
@@ -34,7 +34,7 @@ class TestEnvProviderHook(TestCase):
 
     def test_hook_script_exists(self):
         """Hook script should exist."""
-        self.assertTrue(INSTALL_HOOK.exists(), f"Hook not found: {INSTALL_HOOK}")
+        self.assertTrue(INSTALL_HOOK and INSTALL_HOOK.exists(), f"Hook not found: {INSTALL_HOOK}")
 
     def test_hook_finds_python(self):
         """Hook should find python3 binary in PATH."""
diff --git a/archivebox/plugins/favicon/on_Snapshot__11_favicon.py b/archivebox/plugins/favicon/on_Snapshot__11_favicon.py
index ea5e9200..4b40d726 100644
--- a/archivebox/plugins/favicon/on_Snapshot__11_favicon.py
+++ b/archivebox/plugins/favicon/on_Snapshot__11_favicon.py
@@ -126,7 +126,12 @@ def main(url: str, snapshot_id: str):
     try:
         # Run extraction
         success, output, error = get_favicon(url)
-        status = 'succeeded' if success else 'failed'
+        if success:
+            status = 'succeeded'
+        elif error == 'No favicon found':
+            status = 'skipped'
+        else:
+            status = 'failed'
 
     except Exception as e:
         error = f'{type(e).__name__}: {e}'
@@ -143,7 +148,7 @@ def main(url: str, snapshot_id: str):
     }
     print(json.dumps(result))
 
-    sys.exit(0 if status == 'succeeded' else 1)
+    sys.exit(0 if status in ('succeeded', 'skipped') else 1)
 
 
 if __name__ == '__main__':
diff --git a/archivebox/plugins/favicon/templates/icon.html b/archivebox/plugins/favicon/templates/icon.html
index ec6acc11..7ba648b3 100644
--- a/archivebox/plugins/favicon/templates/icon.html
+++ b/archivebox/plugins/favicon/templates/icon.html
@@ -1 +1 @@
-⭐
\ No newline at end of file
+<span class="abx-output-icon abx-output-icon--favicon" title="Favicon"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M12 3l2.5 5.5 6 .5-4.5 3.8 1.5 5.7L12 15.5 6.5 18.5 8 12.8 3.5 9l6-.5z"/></svg></span>
diff --git a/archivebox/plugins/forumdl/binaries.jsonl b/archivebox/plugins/forumdl/binaries.jsonl
deleted file mode 100644
index 2d085bdd..00000000
--- a/archivebox/plugins/forumdl/binaries.jsonl
+++ /dev/null
@@ -1 +0,0 @@
-{"type": "Binary", "name": "forum-dl", "binproviders": "pip,env"}
diff --git a/archivebox/plugins/forumdl/on_Crawl__13_forumdl_install.py b/archivebox/plugins/forumdl/on_Crawl__13_forumdl_install.py
deleted file mode 100755
index f52a72f2..00000000
--- a/archivebox/plugins/forumdl/on_Crawl__13_forumdl_install.py
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/usr/bin/env python3
-"""
-Detect forum-dl binary and emit Binary JSONL record.
-
-Output: Binary JSONL record to stdout if forum-dl is found
-"""
-
-import json
-import os
-import sys
-
-from abx_pkg import Binary, EnvProvider
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def output_binary_found(binary: Binary, name: str):
-    """Output Binary JSONL record for an installed binary."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'abspath': str(binary.abspath),
-        'version': str(binary.version) if binary.version else '',
-        'sha256': binary.sha256 or '',
-        'binprovider': 'env',  # Already installed
-        'machine_id': machine_id,
-    }
-    print(json.dumps(record))
-
-
-def output_binary_missing(name: str, binproviders: str):
-    """Output Binary JSONL record for a missing binary that needs installation."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,  # Providers that can install it
-        'machine_id': machine_id,
-    }
-    print(json.dumps(record))
-
-
-def main():
-    forumdl_enabled = get_env_bool('FORUMDL_ENABLED', True)
-    forumdl_binary = get_env('FORUMDL_BINARY', 'forum-dl')
-
-    if not forumdl_enabled:
-        sys.exit(0)
-
-    provider = EnvProvider()
-    try:
-        binary = Binary(name=forumdl_binary, binproviders=[provider]).load()
-        if binary.abspath:
-            # Binary found
-            output_binary_found(binary, name='forum-dl')
-        else:
-            # Binary not found
-            output_binary_missing(name='forum-dl', binproviders='pip')
-    except Exception:
-        # Binary not found
-        output_binary_missing(name='forum-dl', binproviders='pip')
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/forumdl/on_Crawl__25_forumdl_install.py b/archivebox/plugins/forumdl/on_Crawl__25_forumdl_install.py
new file mode 100755
index 00000000..73a72a24
--- /dev/null
+++ b/archivebox/plugins/forumdl/on_Crawl__25_forumdl_install.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python3
+"""
+Emit forum-dl Binary dependency for the crawl.
+"""
+
+import json
+import os
+import sys
+
+
+def get_env(name: str, default: str = '') -> str:
+    return os.environ.get(name, default).strip()
+
+def get_env_bool(name: str, default: bool = False) -> bool:
+    val = get_env(name, '').lower()
+    if val in ('true', '1', 'yes', 'on'):
+        return True
+    if val in ('false', '0', 'no', 'off'):
+        return False
+    return default
+
+
+def output_binary(name: str, binproviders: str, overrides: dict | None = None):
+    """Output Binary JSONL record for a dependency."""
+    machine_id = os.environ.get('MACHINE_ID', '')
+
+    record = {
+        'type': 'Binary',
+        'name': name,
+        'binproviders': binproviders,
+        'machine_id': machine_id,
+    }
+    if overrides:
+        record['overrides'] = overrides
+    print(json.dumps(record))
+
+
+def main():
+    forumdl_enabled = get_env_bool('FORUMDL_ENABLED', True)
+
+    if not forumdl_enabled:
+        sys.exit(0)
+
+    output_binary(
+        name='forum-dl',
+        binproviders='pip,env',
+        overrides={
+            'pip': {
+                'packages': [
+                    '--no-deps',
+                    'forum-dl',
+                    'pydantic',
+                    'pydantic-core',
+                    'typing-extensions',
+                    'annotated-types',
+                    'typing-inspection',
+                    'beautifulsoup4',
+                    'soupsieve',
+                    'lxml',
+                    'requests',
+                    'urllib3',
+                    'certifi',
+                    'idna',
+                    'charset-normalizer',
+                    'tenacity',
+                    'python-dateutil',
+                    'six',
+                    'html2text',
+                    'warcio',
+                ]
+            }
+        },
+    )
+
+    sys.exit(0)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/archivebox/plugins/forumdl/on_Snapshot__65_forumdl.bg.py b/archivebox/plugins/forumdl/on_Snapshot__04_forumdl.bg.py
similarity index 87%
rename from archivebox/plugins/forumdl/on_Snapshot__65_forumdl.bg.py
rename to archivebox/plugins/forumdl/on_Snapshot__04_forumdl.bg.py
index 8cb97d54..9d2c2461 100755
--- a/archivebox/plugins/forumdl/on_Snapshot__65_forumdl.bg.py
+++ b/archivebox/plugins/forumdl/on_Snapshot__04_forumdl.bg.py
@@ -2,7 +2,7 @@
 """
 Download forum content from a URL using forum-dl.
 
-Usage: on_Snapshot__forumdl.py --url=<url> --snapshot-id=<uuid>
+Usage: on_Snapshot__04_forumdl.bg.py --url=<url> --snapshot-id=<uuid>
 Output: Downloads forum content to $PWD/
 
 Environment variables:
@@ -19,6 +19,7 @@ import json
 import os
 import subprocess
 import sys
+import threading
 from pathlib import Path
 
 import rich_click as click
@@ -131,13 +132,41 @@ def save_forum(url: str, binary: str) -> tuple[bool, str | None, str]:
     cmd.append(url)
 
     try:
-        result = subprocess.run(cmd, capture_output=True, timeout=timeout, text=True)
+        print(f'[forumdl] Starting download (timeout={timeout}s)', file=sys.stderr)
+        output_lines: list[str] = []
+        process = subprocess.Popen(
+            cmd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+            bufsize=1,
+        )
+
+        def _read_output() -> None:
+            if not process.stdout:
+                return
+            for line in process.stdout:
+                output_lines.append(line)
+                sys.stderr.write(line)
+
+        reader = threading.Thread(target=_read_output, daemon=True)
+        reader.start()
+
+        try:
+            process.wait(timeout=timeout)
+        except subprocess.TimeoutExpired:
+            process.kill()
+            reader.join(timeout=1)
+            return False, None, f'Timed out after {timeout} seconds'
+
+        reader.join(timeout=1)
+        combined_output = ''.join(output_lines)
 
         # Check if output file was created
         if output_file.exists() and output_file.stat().st_size > 0:
             return True, str(output_file), ''
         else:
-            stderr = result.stderr
+            stderr = combined_output
 
             # These are NOT errors - page simply has no downloadable forum content
             stderr_lower = stderr.lower()
@@ -147,7 +176,7 @@ def save_forum(url: str, binary: str) -> tuple[bool, str | None, str]:
                 return True, None, ''  # No forum found - success, no output
             if 'extractornotfounderror' in stderr_lower:
                 return True, None, ''  # No forum extractor for this URL - success, no output
-            if result.returncode == 0:
+            if process.returncode == 0:
                 return True, None, ''  # forum-dl exited cleanly, just no forum - success
 
             # These ARE errors - something went wrong
diff --git a/archivebox/plugins/forumdl/templates/icon.html b/archivebox/plugins/forumdl/templates/icon.html
index 4c000f72..01cace0d 100644
--- a/archivebox/plugins/forumdl/templates/icon.html
+++ b/archivebox/plugins/forumdl/templates/icon.html
@@ -1 +1 @@
-💬
\ No newline at end of file
+<span class="abx-output-icon abx-output-icon--forumdl" title="Forum"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M4 5h16v10H7l-3 3V5z"/></svg></span>
diff --git a/archivebox/plugins/gallerydl/binaries.jsonl b/archivebox/plugins/gallerydl/binaries.jsonl
deleted file mode 100644
index 1fb165f1..00000000
--- a/archivebox/plugins/gallerydl/binaries.jsonl
+++ /dev/null
@@ -1 +0,0 @@
-{"type": "Binary", "name": "gallery-dl", "binproviders": "pip,brew,apt,env"}
diff --git a/archivebox/plugins/gallerydl/on_Crawl__10_gallerydl_install.py b/archivebox/plugins/gallerydl/on_Crawl__10_gallerydl_install.py
deleted file mode 100755
index df627ab4..00000000
--- a/archivebox/plugins/gallerydl/on_Crawl__10_gallerydl_install.py
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/usr/bin/env python3
-"""
-Detect gallery-dl binary and emit Binary JSONL record.
-
-Output: Binary JSONL record to stdout if gallery-dl is found
-"""
-
-import json
-import os
-import sys
-
-from abx_pkg import Binary, EnvProvider
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def output_binary_found(binary: Binary, name: str):
-    """Output Binary JSONL record for an installed binary."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'abspath': str(binary.abspath),
-        'version': str(binary.version) if binary.version else '',
-        'sha256': binary.sha256 or '',
-        'binprovider': 'env',  # Already installed
-        'machine_id': machine_id,
-    }
-    print(json.dumps(record))
-
-
-def output_binary_missing(name: str, binproviders: str):
-    """Output Binary JSONL record for a missing binary that needs installation."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,  # Providers that can install it
-        'machine_id': machine_id,
-    }
-    print(json.dumps(record))
-
-
-def main():
-    gallerydl_enabled = get_env_bool('GALLERYDL_ENABLED', True)
-    gallerydl_binary = get_env('GALLERYDL_BINARY', 'gallery-dl')
-
-    if not gallerydl_enabled:
-        sys.exit(0)
-
-    provider = EnvProvider()
-    try:
-        binary = Binary(name=gallerydl_binary, binproviders=[provider]).load()
-        if binary.abspath:
-            # Binary found
-            output_binary_found(binary, name='gallery-dl')
-        else:
-            # Binary not found
-            output_binary_missing(name='gallery-dl', binproviders='pip')
-    except Exception:
-        # Binary not found
-        output_binary_missing(name='gallery-dl', binproviders='pip')
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/gallerydl/on_Crawl__20_gallerydl_install.py b/archivebox/plugins/gallerydl/on_Crawl__20_gallerydl_install.py
new file mode 100755
index 00000000..06d95f4d
--- /dev/null
+++ b/archivebox/plugins/gallerydl/on_Crawl__20_gallerydl_install.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+"""
+Emit gallery-dl Binary dependency for the crawl.
+"""
+
+import json
+import os
+import sys
+
+
+def get_env(name: str, default: str = '') -> str:
+    return os.environ.get(name, default).strip()
+
+def get_env_bool(name: str, default: bool = False) -> bool:
+    val = get_env(name, '').lower()
+    if val in ('true', '1', 'yes', 'on'):
+        return True
+    if val in ('false', '0', 'no', 'off'):
+        return False
+    return default
+
+
+def output_binary(name: str, binproviders: str):
+    """Output Binary JSONL record for a dependency."""
+    machine_id = os.environ.get('MACHINE_ID', '')
+
+    record = {
+        'type': 'Binary',
+        'name': name,
+        'binproviders': binproviders,
+        'machine_id': machine_id,
+    }
+    print(json.dumps(record))
+
+
+def main():
+    gallerydl_enabled = get_env_bool('GALLERYDL_ENABLED', True)
+
+    if not gallerydl_enabled:
+        sys.exit(0)
+
+    output_binary(name='gallery-dl', binproviders='pip,brew,apt,env')
+
+    sys.exit(0)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/archivebox/plugins/gallerydl/on_Snapshot__64_gallerydl.bg.py b/archivebox/plugins/gallerydl/on_Snapshot__03_gallerydl.bg.py
similarity index 81%
rename from archivebox/plugins/gallerydl/on_Snapshot__64_gallerydl.bg.py
rename to archivebox/plugins/gallerydl/on_Snapshot__03_gallerydl.bg.py
index 78c1128a..d4c2a08d 100755
--- a/archivebox/plugins/gallerydl/on_Snapshot__64_gallerydl.bg.py
+++ b/archivebox/plugins/gallerydl/on_Snapshot__03_gallerydl.bg.py
@@ -2,7 +2,7 @@
 """
 Download image galleries from a URL using gallery-dl.
 
-Usage: on_Snapshot__gallerydl.py --url=<url> --snapshot-id=<uuid>
+Usage: on_Snapshot__03_gallerydl.bg.py --url=<url> --snapshot-id=<uuid>
 Output: Downloads gallery images to $PWD/gallerydl/
 
 Environment variables:
@@ -19,6 +19,7 @@ import json
 import os
 import subprocess
 import sys
+import threading
 from pathlib import Path
 
 import rich_click as click
@@ -70,7 +71,22 @@ STATICFILE_DIR = '../staticfile'
 def has_staticfile_output() -> bool:
     """Check if staticfile extractor already downloaded this URL."""
     staticfile_dir = Path(STATICFILE_DIR)
-    return staticfile_dir.exists() and any(staticfile_dir.iterdir())
+    if not staticfile_dir.exists():
+        return False
+    stdout_log = staticfile_dir / 'stdout.log'
+    if not stdout_log.exists():
+        return False
+    for line in stdout_log.read_text(errors='ignore').splitlines():
+        line = line.strip()
+        if not line.startswith('{'):
+            continue
+        try:
+            record = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+        if record.get('type') == 'ArchiveResult' and record.get('status') == 'succeeded':
+            return True
+    return False
 
 
 def save_gallery(url: str, binary: str) -> tuple[bool, str | None, str]:
@@ -109,7 +125,35 @@ def save_gallery(url: str, binary: str) -> tuple[bool, str | None, str]:
     cmd.append(url)
 
     try:
-        result = subprocess.run(cmd, capture_output=True, timeout=timeout, text=True)
+        print(f'[gallerydl] Starting download (timeout={timeout}s)', file=sys.stderr)
+        output_lines: list[str] = []
+        process = subprocess.Popen(
+            cmd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+            bufsize=1,
+        )
+
+        def _read_output() -> None:
+            if not process.stdout:
+                return
+            for line in process.stdout:
+                output_lines.append(line)
+                sys.stderr.write(line)
+
+        reader = threading.Thread(target=_read_output, daemon=True)
+        reader.start()
+
+        try:
+            process.wait(timeout=timeout)
+        except subprocess.TimeoutExpired:
+            process.kill()
+            reader.join(timeout=1)
+            return False, None, f'Timed out after {timeout} seconds'
+
+        reader.join(timeout=1)
+        combined_output = ''.join(output_lines)
 
         # Check if any gallery files were downloaded (search recursively)
         gallery_extensions = (
@@ -132,7 +176,7 @@ def save_gallery(url: str, binary: str) -> tuple[bool, str | None, str]:
             output = str(image_files[0]) if image_files else str(downloaded_files[0])
             return True, output, ''
         else:
-            stderr = result.stderr
+            stderr = combined_output
 
             # These are NOT errors - page simply has no downloadable gallery
             # Return success with no output (legitimate "nothing to download")
@@ -141,7 +185,7 @@ def save_gallery(url: str, binary: str) -> tuple[bool, str | None, str]:
                 return True, None, ''  # Not a gallery site - success, no output
             if 'no results' in stderr_lower:
                 return True, None, ''  # No gallery found - success, no output
-            if result.returncode == 0:
+            if process.returncode == 0:
                 return True, None, ''  # gallery-dl exited cleanly, just no gallery - success
 
             # These ARE errors - something went wrong
diff --git a/archivebox/plugins/gallerydl/templates/icon.html b/archivebox/plugins/gallerydl/templates/icon.html
index b6bb6d16..a8ef89e7 100644
--- a/archivebox/plugins/gallerydl/templates/icon.html
+++ b/archivebox/plugins/gallerydl/templates/icon.html
@@ -1 +1 @@
-🖼️
\ No newline at end of file
+<span class="abx-output-icon abx-output-icon--gallerydl" title="Gallery"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><rect x="3" y="5" width="18" height="14" rx="2"/><circle cx="8" cy="10" r="1.5" fill="currentColor" stroke="none"/><path d="M21 17l-5-5-5 5"/></svg></span>
diff --git a/archivebox/plugins/git/binaries.jsonl b/archivebox/plugins/git/binaries.jsonl
deleted file mode 100644
index b459ab22..00000000
--- a/archivebox/plugins/git/binaries.jsonl
+++ /dev/null
@@ -1 +0,0 @@
-{"type": "Binary", "name": "git", "binproviders": "apt,brew,env"}
diff --git a/archivebox/plugins/git/on_Crawl__05_git_install.py b/archivebox/plugins/git/on_Crawl__05_git_install.py
new file mode 100755
index 00000000..e090d546
--- /dev/null
+++ b/archivebox/plugins/git/on_Crawl__05_git_install.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+"""
+Emit git Binary dependency for the crawl.
+"""
+
+import json
+import os
+import sys
+
+
+def get_env(name: str, default: str = '') -> str:
+    return os.environ.get(name, default).strip()
+
+def get_env_bool(name: str, default: bool = False) -> bool:
+    val = get_env(name, '').lower()
+    if val in ('true', '1', 'yes', 'on'):
+        return True
+    if val in ('false', '0', 'no', 'off'):
+        return False
+    return default
+
+
+def output_binary(name: str, binproviders: str):
+    """Output Binary JSONL record for a dependency."""
+    machine_id = os.environ.get('MACHINE_ID', '')
+
+    record = {
+        'type': 'Binary',
+        'name': name,
+        'binproviders': binproviders,
+        'machine_id': machine_id,
+    }
+    print(json.dumps(record))
+
+
+def main():
+    git_enabled = get_env_bool('GIT_ENABLED', True)
+
+    if not git_enabled:
+        sys.exit(0)
+
+    output_binary(name='git', binproviders='apt,brew,env')
+
+    sys.exit(0)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/archivebox/plugins/git/on_Crawl__09_git_install.py b/archivebox/plugins/git/on_Crawl__09_git_install.py
deleted file mode 100755
index 4179ed81..00000000
--- a/archivebox/plugins/git/on_Crawl__09_git_install.py
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/usr/bin/env python3
-"""
-Detect git binary and emit Binary JSONL record.
-
-Output: Binary JSONL record to stdout if git is found
-"""
-
-import json
-import os
-import sys
-
-from abx_pkg import Binary, EnvProvider
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def output_binary_found(binary: Binary, name: str):
-    """Output Binary JSONL record for an installed binary."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'abspath': str(binary.abspath),
-        'version': str(binary.version) if binary.version else '',
-        'sha256': binary.sha256 or '',
-        'binprovider': 'env',  # Already installed
-        'machine_id': machine_id,
-    }
-    print(json.dumps(record))
-
-
-def output_binary_missing(name: str, binproviders: str):
-    """Output Binary JSONL record for a missing binary that needs installation."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,  # Providers that can install it
-        'machine_id': machine_id,
-    }
-    print(json.dumps(record))
-
-
-def main():
-    git_enabled = get_env_bool('GIT_ENABLED', True)
-    git_binary = get_env('GIT_BINARY', 'git')
-
-    if not git_enabled:
-        sys.exit(0)
-
-    provider = EnvProvider()
-    try:
-        binary = Binary(name=git_binary, binproviders=[provider]).load()
-        if binary.abspath:
-            # Binary found
-            output_binary_found(binary, name='git')
-        else:
-            # Binary not found
-            output_binary_missing(name='git', binproviders='apt,brew')
-    except Exception:
-        # Binary not found
-        output_binary_missing(name='git', binproviders='apt,brew')
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/git/on_Snapshot__62_git.py b/archivebox/plugins/git/on_Snapshot__05_git.bg.py
similarity index 98%
rename from archivebox/plugins/git/on_Snapshot__62_git.py
rename to archivebox/plugins/git/on_Snapshot__05_git.bg.py
index 04dbbd70..14ad7894 100644
--- a/archivebox/plugins/git/on_Snapshot__62_git.py
+++ b/archivebox/plugins/git/on_Snapshot__05_git.bg.py
@@ -2,7 +2,7 @@
 """
 Clone a git repository from a URL.
 
-Usage: on_Snapshot__git.py --url=<url> --snapshot-id=<uuid>
+Usage: on_Snapshot__05_git.bg.py --url=<url> --snapshot-id=<uuid>
 Output: Clones repository to $PWD/repo
 
 Environment variables:
diff --git a/archivebox/plugins/git/templates/icon.html b/archivebox/plugins/git/templates/icon.html
index de2a340a..e16f0231 100644
--- a/archivebox/plugins/git/templates/icon.html
+++ b/archivebox/plugins/git/templates/icon.html
@@ -1 +1 @@
-📂
\ No newline at end of file
+<span class="abx-output-icon abx-output-icon--git" title="Git"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><circle cx="6" cy="6" r="2"/><circle cx="6" cy="18" r="2"/><circle cx="18" cy="12" r="2"/><path d="M8 6h5a3 3 0 0 1 3 3v1"/><path d="M8 18h5a3 3 0 0 0 3-3v-1"/></svg></span>
diff --git a/archivebox/plugins/headers/templates/icon.html b/archivebox/plugins/headers/templates/icon.html
index e74c28f8..f693e709 100644
--- a/archivebox/plugins/headers/templates/icon.html
+++ b/archivebox/plugins/headers/templates/icon.html
@@ -1 +1 @@
-📋
\ No newline at end of file
+<span class="abx-output-icon abx-output-icon--headers" title="Headers"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><circle cx="4" cy="7" r="1" fill="currentColor" stroke="none"/><circle cx="4" cy="12" r="1" fill="currentColor" stroke="none"/><circle cx="4" cy="17" r="1" fill="currentColor" stroke="none"/><path d="M7 7h13"/><path d="M7 12h13"/><path d="M7 17h13"/></svg></span>
diff --git a/archivebox/plugins/htmltotext/on_Snapshot__58_htmltotext.py b/archivebox/plugins/htmltotext/on_Snapshot__58_htmltotext.py
index c719c027..30134446 100644
--- a/archivebox/plugins/htmltotext/on_Snapshot__58_htmltotext.py
+++ b/archivebox/plugins/htmltotext/on_Snapshot__58_htmltotext.py
@@ -76,22 +76,28 @@ def find_html_source() -> str | None:
     # Hooks run in snapshot_dir, sibling extractor outputs are in subdirectories
     search_patterns = [
         'singlefile/singlefile.html',
+        '*_singlefile/singlefile.html',
         'singlefile/*.html',
+        '*_singlefile/*.html',
         'dom/output.html',
+        '*_dom/output.html',
         'dom/*.html',
+        '*_dom/*.html',
         'wget/**/*.html',
+        '*_wget/**/*.html',
         'wget/**/*.htm',
+        '*_wget/**/*.htm',
     ]
 
-    cwd = Path.cwd()
-    for pattern in search_patterns:
-        matches = list(cwd.glob(pattern))
-        for match in matches:
-            if match.is_file() and match.stat().st_size > 0:
-                try:
-                    return match.read_text(errors='ignore')
-                except Exception:
-                    continue
+    for base in (Path.cwd(), Path.cwd().parent):
+        for pattern in search_patterns:
+            matches = list(base.glob(pattern))
+            for match in matches:
+                if match.is_file() and match.stat().st_size > 0:
+                    try:
+                        return match.read_text(errors='ignore')
+                    except Exception:
+                        continue
 
     return None
 
diff --git a/archivebox/plugins/htmltotext/templates/icon.html b/archivebox/plugins/htmltotext/templates/icon.html
index 070c6ec4..d1c8c78d 100644
--- a/archivebox/plugins/htmltotext/templates/icon.html
+++ b/archivebox/plugins/htmltotext/templates/icon.html
@@ -1 +1 @@
-📃
\ No newline at end of file
+<span class="abx-output-icon abx-output-icon--htmltotext" title="HTML to Text"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M4 7h16"/><path d="M4 12h12"/><path d="M4 17h14"/></svg></span>
diff --git a/archivebox/plugins/infiniscroll/templates/icon.html b/archivebox/plugins/infiniscroll/templates/icon.html
new file mode 100644
index 00000000..7de95bf4
--- /dev/null
+++ b/archivebox/plugins/infiniscroll/templates/icon.html
@@ -0,0 +1 @@
+<span class="abx-output-icon abx-output-icon--infiniscroll" title="Infinite Scroll"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M12 5v9"/><path d="M8 10l4 4 4-4"/><circle cx="6" cy="19" r="1" fill="currentColor" stroke="none"/><circle cx="12" cy="19" r="1" fill="currentColor" stroke="none"/><circle cx="18" cy="19" r="1" fill="currentColor" stroke="none"/></svg></span>
diff --git a/archivebox/plugins/istilldontcareaboutcookies/on_Crawl__02_istilldontcareaboutcookies_install.js b/archivebox/plugins/istilldontcareaboutcookies/on_Crawl__81_install_istilldontcareaboutcookies_extension.js
similarity index 97%
rename from archivebox/plugins/istilldontcareaboutcookies/on_Crawl__02_istilldontcareaboutcookies_install.js
rename to archivebox/plugins/istilldontcareaboutcookies/on_Crawl__81_install_istilldontcareaboutcookies_extension.js
index f2df6629..ab29cdac 100755
--- a/archivebox/plugins/istilldontcareaboutcookies/on_Crawl__02_istilldontcareaboutcookies_install.js
+++ b/archivebox/plugins/istilldontcareaboutcookies/on_Crawl__81_install_istilldontcareaboutcookies_extension.js
@@ -7,7 +7,7 @@
  *
  * Extension: https://chromewebstore.google.com/detail/edibdbjcniadpccecjdfdjjppcpchdlm
  *
- * Priority: 02 (early) - Must install before Chrome session starts at Crawl level
+ * Priority: 81 - Must install before Chrome session starts at Crawl level
  * Hook: on_Crawl (runs once per crawl, not per snapshot)
  *
  * This extension automatically:
diff --git a/archivebox/plugins/mercury/binaries.jsonl b/archivebox/plugins/mercury/binaries.jsonl
deleted file mode 100644
index 9b9be5cf..00000000
--- a/archivebox/plugins/mercury/binaries.jsonl
+++ /dev/null
@@ -1 +0,0 @@
-{"type": "Binary", "name": "postlight-parser", "binproviders": "npm,env", "overrides": {"npm": {"packages": ["@postlight/parser"]}}}
diff --git a/archivebox/plugins/mercury/on_Crawl__12_mercury_install.py b/archivebox/plugins/mercury/on_Crawl__12_mercury_install.py
deleted file mode 100755
index 25d1c9c1..00000000
--- a/archivebox/plugins/mercury/on_Crawl__12_mercury_install.py
+++ /dev/null
@@ -1,85 +0,0 @@
-#!/usr/bin/env python3
-"""
-Detect postlight-parser binary and emit Binary JSONL record.
-
-Output: Binary JSONL record to stdout if postlight-parser is found
-"""
-
-import json
-import os
-import sys
-
-from abx_pkg import Binary, EnvProvider
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def output_binary_found(binary: Binary, name: str):
-    """Output Binary JSONL record for an installed binary."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'abspath': str(binary.abspath),
-        'version': str(binary.version) if binary.version else '',
-        'sha256': binary.sha256 or '',
-        'binprovider': 'env',  # Already installed
-        'machine_id': machine_id,
-    }
-    print(json.dumps(record))
-
-
-def output_binary_missing(name: str, binproviders: str):
-    """Output Binary JSONL record for a missing binary that needs installation."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,  # Providers that can install it
-        'overrides': {
-            'npm': {
-                'packages': ['@postlight/parser'],
-            }
-        },
-        'machine_id': machine_id,
-    }
-    print(json.dumps(record))
-
-
-def main():
-    mercury_enabled = get_env_bool('MERCURY_ENABLED', True)
-    mercury_binary = get_env('MERCURY_BINARY', 'postlight-parser')
-
-    if not mercury_enabled:
-        sys.exit(0)
-
-    provider = EnvProvider()
-    try:
-        binary = Binary(name=mercury_binary, binproviders=[provider]).load()
-        if binary.abspath:
-            # Binary found
-            output_binary_found(binary, name='postlight-parser')
-        else:
-            # Binary not found
-            output_binary_missing(name='postlight-parser', binproviders='npm')
-    except Exception:
-        # Binary not found
-        output_binary_missing(name='postlight-parser', binproviders='npm')
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/mercury/on_Crawl__40_mercury_install.py b/archivebox/plugins/mercury/on_Crawl__40_mercury_install.py
new file mode 100755
index 00000000..7ec64d8b
--- /dev/null
+++ b/archivebox/plugins/mercury/on_Crawl__40_mercury_install.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python3
+"""
+Emit postlight-parser Binary dependency for the crawl.
+"""
+
+import json
+import os
+import sys
+
+
+def get_env(name: str, default: str = '') -> str:
+    return os.environ.get(name, default).strip()
+
+def get_env_bool(name: str, default: bool = False) -> bool:
+    val = get_env(name, '').lower()
+    if val in ('true', '1', 'yes', 'on'):
+        return True
+    if val in ('false', '0', 'no', 'off'):
+        return False
+    return default
+
+
+def output_binary(name: str, binproviders: str):
+    """Output Binary JSONL record for a dependency."""
+    machine_id = os.environ.get('MACHINE_ID', '')
+
+    record = {
+        'type': 'Binary',
+        'name': name,
+        'binproviders': binproviders,
+        'overrides': {
+            'npm': {
+                'packages': ['@postlight/parser'],
+            }
+        },
+        'machine_id': machine_id,
+    }
+    print(json.dumps(record))
+
+
+def main():
+    mercury_enabled = get_env_bool('MERCURY_ENABLED', True)
+
+    if not mercury_enabled:
+        sys.exit(0)
+
+    output_binary(name='postlight-parser', binproviders='npm,env')
+
+    sys.exit(0)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/archivebox/plugins/mercury/templates/icon.html b/archivebox/plugins/mercury/templates/icon.html
index 776ed9b1..bd17e0cf 100644
--- a/archivebox/plugins/mercury/templates/icon.html
+++ b/archivebox/plugins/mercury/templates/icon.html
@@ -1 +1 @@
-☿️
\ No newline at end of file
+<span class="abx-output-icon abx-output-icon--mercury" title="Mercury"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><rect x="3" y="5" width="18" height="14" rx="2"/><path d="M7 9h6"/><path d="M7 13h10"/><path d="M15 9h3"/></svg></span>
diff --git a/archivebox/plugins/merkletree/templates/icon.html b/archivebox/plugins/merkletree/templates/icon.html
index e69de29b..b8d3579c 100644
--- a/archivebox/plugins/merkletree/templates/icon.html
+++ b/archivebox/plugins/merkletree/templates/icon.html
@@ -0,0 +1 @@
+<span class="abx-output-icon abx-output-icon--merkletree" title="Merkle Tree"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="5" r="2"/><circle cx="6" cy="18" r="2"/><circle cx="18" cy="18" r="2"/><path d="M12 7v6"/><path d="M12 13l-4 3"/><path d="M12 13l4 3"/></svg></span>
diff --git a/archivebox/plugins/merkletree/tests/__init__.py b/archivebox/plugins/merkletree/tests/__init__.py
deleted file mode 100644
index 1eb43866..00000000
--- a/archivebox/plugins/merkletree/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Tests for the merkletree plugin."""
diff --git a/archivebox/plugins/modalcloser/on_Snapshot__15_modalcloser.bg.js b/archivebox/plugins/modalcloser/on_Snapshot__15_modalcloser.bg.js
index 3469026d..38b2a604 100644
--- a/archivebox/plugins/modalcloser/on_Snapshot__15_modalcloser.bg.js
+++ b/archivebox/plugins/modalcloser/on_Snapshot__15_modalcloser.bg.js
@@ -287,7 +287,7 @@ async function main() {
             page = pages[pages.length - 1];
         }
 
-        console.error(`Modalcloser listening on ${url}`);
+        // console.error(`Modalcloser listening on ${url}`);
 
         // Set up dialog handler (for JS alert/confirm/prompt/beforeunload)
         page.on('dialog', async (dialog) => {
diff --git a/archivebox/plugins/modalcloser/templates/icon.html b/archivebox/plugins/modalcloser/templates/icon.html
new file mode 100644
index 00000000..e58b588b
--- /dev/null
+++ b/archivebox/plugins/modalcloser/templates/icon.html
@@ -0,0 +1 @@
+<span class="abx-output-icon abx-output-icon--modalcloser" title="Modal Closer"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><rect x="4" y="4" width="16" height="16" rx="3"/><path d="M9 9l6 6"/><path d="M15 9l-6 6"/></svg></span>
diff --git a/archivebox/plugins/npm/on_Binary__10_npm_install.py b/archivebox/plugins/npm/on_Binary__10_npm_install.py
index 4bf1a05c..f0b43893 100644
--- a/archivebox/plugins/npm/on_Binary__10_npm_install.py
+++ b/archivebox/plugins/npm/on_Binary__10_npm_install.py
@@ -90,30 +90,34 @@ def main(binary_id: str, machine_id: str, name: str, binproviders: str, custom_c
     }
     print(json.dumps(record))
 
-    # Emit PATH update if npm bin dir not already in PATH
-    npm_bin_dir = str(npm_prefix / 'bin')
+    # Emit PATH update for npm bin dirs (node_modules/.bin preferred)
+    npm_bin_dirs = [
+        str(npm_prefix / 'node_modules' / '.bin'),
+        str(npm_prefix / 'bin'),
+    ]
     current_path = os.environ.get('PATH', '')
+    path_dirs = current_path.split(':') if current_path else []
+    new_path = current_path
 
-    # Check if npm_bin_dir is already in PATH
-    path_dirs = current_path.split(':')
-    if npm_bin_dir not in path_dirs:
-        # Prepend npm_bin_dir to PATH
-        new_path = f"{npm_bin_dir}:{current_path}" if current_path else npm_bin_dir
-        print(json.dumps({
-            'type': 'Machine',
-            '_method': 'update',
-            'key': 'config/PATH',
-            'value': new_path,
-        }))
-        click.echo(f"  Added {npm_bin_dir} to PATH", err=True)
+    for npm_bin_dir in npm_bin_dirs:
+        if npm_bin_dir and npm_bin_dir not in path_dirs:
+            new_path = f"{npm_bin_dir}:{new_path}" if new_path else npm_bin_dir
+            path_dirs.insert(0, npm_bin_dir)
+
+    print(json.dumps({
+        'type': 'Machine',
+        'config': {
+            'PATH': new_path,
+        },
+    }))
 
     # Also emit NODE_MODULES_DIR for JS module resolution
     node_modules_dir = str(npm_prefix / 'node_modules')
     print(json.dumps({
         'type': 'Machine',
-        '_method': 'update',
-        'key': 'config/NODE_MODULES_DIR',
-        'value': node_modules_dir,
+        'config': {
+            'NODE_MODULES_DIR': node_modules_dir,
+        },
     }))
 
     # Log human-readable info to stderr
diff --git a/archivebox/plugins/npm/on_Crawl__00_npm_install.py b/archivebox/plugins/npm/on_Crawl__00_npm_install.py
new file mode 100644
index 00000000..5660dd01
--- /dev/null
+++ b/archivebox/plugins/npm/on_Crawl__00_npm_install.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+"""
+Emit node/npm Binary dependencies for the crawl.
+
+This hook runs early in the Crawl lifecycle so node/npm are installed
+before any npm-based extractors (e.g., puppeteer) run.
+"""
+
+import json
+import os
+import sys
+
+
+def get_env(name: str, default: str = '') -> str:
+    return os.environ.get(name, default).strip()
+
+
+def output_binary(name: str, binproviders: str, overrides: dict | None = None) -> None:
+    machine_id = os.environ.get('MACHINE_ID', '')
+    record = {
+        'type': 'Binary',
+        'name': name,
+        'binproviders': binproviders,
+        'machine_id': machine_id,
+    }
+    if overrides:
+        record['overrides'] = overrides
+    print(json.dumps(record))
+
+
+def main() -> None:
+    output_binary(
+        name='node',
+        binproviders='apt,brew,env',
+        overrides={'apt': {'packages': ['nodejs']}},
+    )
+
+    output_binary(
+        name='npm',
+        binproviders='apt,brew,env',
+        overrides={
+            'apt': {'packages': ['nodejs', 'npm']},
+            'brew': {'packages': ['node']},
+        },
+    )
+
+    sys.exit(0)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/archivebox/plugins/npm/tests/__init__.py b/archivebox/plugins/npm/tests/__init__.py
deleted file mode 100644
index 08ccd028..00000000
--- a/archivebox/plugins/npm/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Tests for the npm binary provider plugin."""
diff --git a/archivebox/plugins/npm/tests/test_npm_provider.py b/archivebox/plugins/npm/tests/test_npm_provider.py
index c5099475..5492738a 100644
--- a/archivebox/plugins/npm/tests/test_npm_provider.py
+++ b/archivebox/plugins/npm/tests/test_npm_provider.py
@@ -22,7 +22,7 @@ from django.test import TestCase
 
 # Get the path to the npm provider hook
 PLUGIN_DIR = Path(__file__).parent.parent
-INSTALL_HOOK = PLUGIN_DIR / 'on_Binary__install_using_npm_provider.py'
+INSTALL_HOOK = next(PLUGIN_DIR.glob('on_Binary__*_npm_install.py'), None)
 
 
 def npm_available() -> bool:
@@ -45,7 +45,7 @@ class TestNpmProviderHook(TestCase):
 
     def test_hook_script_exists(self):
         """Hook script should exist."""
-        self.assertTrue(INSTALL_HOOK.exists(), f"Hook not found: {INSTALL_HOOK}")
+        self.assertTrue(INSTALL_HOOK and INSTALL_HOOK.exists(), f"Hook not found: {INSTALL_HOOK}")
 
     def test_hook_requires_lib_dir(self):
         """Hook should fail when LIB_DIR is not set."""
diff --git a/archivebox/plugins/papersdl/binaries.jsonl b/archivebox/plugins/papersdl/binaries.jsonl
deleted file mode 100644
index 538af943..00000000
--- a/archivebox/plugins/papersdl/binaries.jsonl
+++ /dev/null
@@ -1 +0,0 @@
-{"type": "Binary", "name": "papers-dl", "binproviders": "pip,env"}
diff --git a/archivebox/plugins/papersdl/on_Crawl__14_papersdl_install.py b/archivebox/plugins/papersdl/on_Crawl__14_papersdl_install.py
deleted file mode 100755
index 8c548c7c..00000000
--- a/archivebox/plugins/papersdl/on_Crawl__14_papersdl_install.py
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/usr/bin/env python3
-"""
-Detect papers-dl binary and emit Binary JSONL record.
-
-Output: Binary JSONL record to stdout if papers-dl is found
-"""
-
-import json
-import os
-import sys
-
-from abx_pkg import Binary, EnvProvider
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def output_binary_found(binary: Binary, name: str):
-    """Output Binary JSONL record for an installed binary."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'abspath': str(binary.abspath),
-        'version': str(binary.version) if binary.version else '',
-        'sha256': binary.sha256 or '',
-        'binprovider': 'env',  # Already installed
-        'machine_id': machine_id,
-    }
-    print(json.dumps(record))
-
-
-def output_binary_missing(name: str, binproviders: str):
-    """Output Binary JSONL record for a missing binary that needs installation."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,  # Providers that can install it
-        'machine_id': machine_id,
-    }
-    print(json.dumps(record))
-
-
-def main():
-    papersdl_enabled = get_env_bool('PAPERSDL_ENABLED', True)
-    papersdl_binary = get_env('PAPERSDL_BINARY', 'papers-dl')
-
-    if not papersdl_enabled:
-        sys.exit(0)
-
-    provider = EnvProvider()
-    try:
-        binary = Binary(name=papersdl_binary, binproviders=[provider]).load()
-        if binary.abspath:
-            # Binary found
-            output_binary_found(binary, name='papers-dl')
-        else:
-            # Binary not found
-            output_binary_missing(name='papers-dl', binproviders='pip')
-    except Exception:
-        # Binary not found
-        output_binary_missing(name='papers-dl', binproviders='pip')
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/papersdl/on_Crawl__30_papersdl_install.py b/archivebox/plugins/papersdl/on_Crawl__30_papersdl_install.py
new file mode 100755
index 00000000..050aa23b
--- /dev/null
+++ b/archivebox/plugins/papersdl/on_Crawl__30_papersdl_install.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+"""
+Emit papers-dl Binary dependency for the crawl.
+"""
+
+import json
+import os
+import sys
+
+
+def get_env(name: str, default: str = '') -> str:
+    return os.environ.get(name, default).strip()
+
+def get_env_bool(name: str, default: bool = False) -> bool:
+    val = get_env(name, '').lower()
+    if val in ('true', '1', 'yes', 'on'):
+        return True
+    if val in ('false', '0', 'no', 'off'):
+        return False
+    return default
+
+
+def output_binary(name: str, binproviders: str):
+    """Output Binary JSONL record for a dependency."""
+    machine_id = os.environ.get('MACHINE_ID', '')
+
+    record = {
+        'type': 'Binary',
+        'name': name,
+        'binproviders': binproviders,
+        'machine_id': machine_id,
+    }
+    print(json.dumps(record))
+
+
+def main():
+    papersdl_enabled = get_env_bool('PAPERSDL_ENABLED', True)
+
+    if not papersdl_enabled:
+        sys.exit(0)
+
+    output_binary(name='papers-dl', binproviders='pip,env')
+
+    sys.exit(0)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/archivebox/plugins/papersdl/on_Snapshot__66_papersdl.bg.py b/archivebox/plugins/papersdl/on_Snapshot__66_papersdl.bg.py
index 859d911e..60015050 100755
--- a/archivebox/plugins/papersdl/on_Snapshot__66_papersdl.bg.py
+++ b/archivebox/plugins/papersdl/on_Snapshot__66_papersdl.bg.py
@@ -23,6 +23,7 @@ import os
 import re
 import subprocess
 import sys
+import threading
 from pathlib import Path
 
 import rich_click as click
@@ -108,7 +109,35 @@ def save_paper(url: str, binary: str) -> tuple[bool, str | None, str]:
         cmd.extend(papersdl_args_extra)
 
     try:
-        result = subprocess.run(cmd, capture_output=True, timeout=timeout, text=True)
+        print(f'[papersdl] Starting download (timeout={timeout}s)', file=sys.stderr)
+        output_lines: list[str] = []
+        process = subprocess.Popen(
+            cmd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+            bufsize=1,
+        )
+
+        def _read_output() -> None:
+            if not process.stdout:
+                return
+            for line in process.stdout:
+                output_lines.append(line)
+                sys.stderr.write(line)
+
+        reader = threading.Thread(target=_read_output, daemon=True)
+        reader.start()
+
+        try:
+            process.wait(timeout=timeout)
+        except subprocess.TimeoutExpired:
+            process.kill()
+            reader.join(timeout=1)
+            return False, None, f'Timed out after {timeout} seconds'
+
+        reader.join(timeout=1)
+        combined_output = ''.join(output_lines)
 
         # Check if any PDF files were downloaded
         pdf_files = list(output_dir.glob('*.pdf'))
@@ -117,8 +146,8 @@ def save_paper(url: str, binary: str) -> tuple[bool, str | None, str]:
             # Return first PDF file
             return True, str(pdf_files[0]), ''
         else:
-            stderr = result.stderr
-            stdout = result.stdout
+            stderr = combined_output
+            stdout = combined_output
 
             # These are NOT errors - page simply has no downloadable paper
             stderr_lower = stderr.lower()
@@ -127,7 +156,7 @@ def save_paper(url: str, binary: str) -> tuple[bool, str | None, str]:
                 return True, None, ''  # Paper not available - success, no output
             if 'no results' in stderr_lower or 'no results' in stdout_lower:
                 return True, None, ''  # No paper found - success, no output
-            if result.returncode == 0:
+            if process.returncode == 0:
                 return True, None, ''  # papers-dl exited cleanly, just no paper - success
 
             # These ARE errors - something went wrong
diff --git a/archivebox/plugins/papersdl/templates/icon.html b/archivebox/plugins/papersdl/templates/icon.html
index 063530f3..94afb781 100644
--- a/archivebox/plugins/papersdl/templates/icon.html
+++ b/archivebox/plugins/papersdl/templates/icon.html
@@ -1 +1 @@
-📄
\ No newline at end of file
+<span class="abx-output-icon abx-output-icon--papersdl" title="Papers"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M14 3H6a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V9z"/><path d="M14 3v6h6"/><path d="M12 12v5"/><path d="M9.5 14.5L12 17l2.5-2.5"/></svg></span>
diff --git a/archivebox/plugins/parse_dom_outlinks/on_Snapshot__75_parse_dom_outlinks.js b/archivebox/plugins/parse_dom_outlinks/on_Snapshot__75_parse_dom_outlinks.js
index d3eafb0b..e900d9b5 100755
--- a/archivebox/plugins/parse_dom_outlinks/on_Snapshot__75_parse_dom_outlinks.js
+++ b/archivebox/plugins/parse_dom_outlinks/on_Snapshot__75_parse_dom_outlinks.js
@@ -193,6 +193,9 @@ async function extractOutlinks(url) {
             type: 'Snapshot',
             url: href,
             plugin: PLUGIN_NAME,
+            depth: depth + 1,
+            parent_snapshot_id: snapshotId || undefined,
+            crawl_id: crawlId || undefined,
         })).join('\n');
 
         if (urlsJsonl) {
@@ -214,6 +217,8 @@ async function main() {
     const args = parseArgs();
     const url = args.url;
     const snapshotId = args.snapshot_id;
+    const crawlId = args.crawl_id || process.env.CRAWL_ID;
+    const depth = parseInt(args.depth || process.env.SNAPSHOT_DEPTH || '0', 10) || 0;
 
     if (!url || !snapshotId) {
         console.error('Usage: on_Snapshot__75_parse_dom_outlinks.js --url=<url> --snapshot-id=<uuid>');
diff --git a/archivebox/plugins/parse_dom_outlinks/templates/icon.html b/archivebox/plugins/parse_dom_outlinks/templates/icon.html
index f77458fd..b333082c 100644
--- a/archivebox/plugins/parse_dom_outlinks/templates/icon.html
+++ b/archivebox/plugins/parse_dom_outlinks/templates/icon.html
@@ -1 +1 @@
-🔗
+<span class="abx-output-icon abx-output-icon--parse_dom_outlinks" title="Outlinks"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M10 13a4 4 0 0 1 0-6l2-2a4 4 0 0 1 6 6l-1 1"/><path d="M14 11a4 4 0 0 1 0 6l-2 2a4 4 0 0 1-6-6l1-1"/></svg></span>
diff --git a/archivebox/plugins/parse_dom_outlinks/tests/__init__.py b/archivebox/plugins/parse_dom_outlinks/tests/__init__.py
deleted file mode 100644
index 47e46db9..00000000
--- a/archivebox/plugins/parse_dom_outlinks/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Tests for the parse_dom_outlinks plugin."""
diff --git a/archivebox/plugins/parse_dom_outlinks/tests/test_parse_dom_outlinks.py b/archivebox/plugins/parse_dom_outlinks/tests/test_parse_dom_outlinks.py
index 33045184..cf6df8ed 100644
--- a/archivebox/plugins/parse_dom_outlinks/tests/test_parse_dom_outlinks.py
+++ b/archivebox/plugins/parse_dom_outlinks/tests/test_parse_dom_outlinks.py
@@ -79,8 +79,7 @@ class TestParseDomOutlinksWithChrome(TestCase):
                 # Run outlinks hook with the active Chrome session
                 result = subprocess.run(
                     ['node', str(OUTLINKS_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                    cwd=str(snapshot_chrome_dir,
-            env=get_test_env()),
+                    cwd=str(snapshot_chrome_dir),
                     capture_output=True,
                     text=True,
                     timeout=60,
diff --git a/archivebox/plugins/parse_html_urls/on_Snapshot__70_parse_html_urls.py b/archivebox/plugins/parse_html_urls/on_Snapshot__70_parse_html_urls.py
index 14fe3a6b..1fc36552 100755
--- a/archivebox/plugins/parse_html_urls/on_Snapshot__70_parse_html_urls.py
+++ b/archivebox/plugins/parse_html_urls/on_Snapshot__70_parse_html_urls.py
@@ -24,14 +24,15 @@ from datetime import datetime, timezone
 from html import unescape
 from html.parser import HTMLParser
 from pathlib import Path
-from urllib.parse import urljoin, urlparse
+from urllib.parse import urljoin, urlparse, urlunparse
 
 import rich_click as click
 
 PLUGIN_NAME = 'parse_html_urls'
 
-# Check if parse_dom_outlinks extractor already ran
-DOM_OUTLINKS_URLS_FILE = Path('parse_dom_outlinks/urls.jsonl')
+# Check if parse_dom_outlinks extractor already ran (sibling plugin output dir)
+DOM_OUTLINKS_URLS_FILE = Path('..') / 'parse_dom_outlinks' / 'urls.jsonl'
+URLS_FILE = Path('urls.jsonl')
 
 
 # URL regex from archivebox/misc/util.py
@@ -95,8 +96,9 @@ def fix_urljoin_bug(url: str, nesting_limit=5) -> str:
 
 def normalize_url(url: str, root_url: str = None) -> str:
     """Normalize a URL, resolving relative paths if root_url provided."""
+    url = clean_url_candidate(url)
     if not root_url:
-        return url
+        return _normalize_trailing_slash(url)
 
     url_is_absolute = url.lower().startswith('http://') or url.lower().startswith('https://')
 
@@ -110,7 +112,40 @@ def normalize_url(url: str, root_url: str = None) -> str:
     if did_urljoin_misbehave(root_url, url, resolved):
         resolved = fix_urljoin_bug(resolved)
 
-    return resolved
+    return _normalize_trailing_slash(resolved)
+
+
+def _normalize_trailing_slash(url: str) -> str:
+    """Drop trailing slash for non-root paths when no query/fragment."""
+    try:
+        parsed = urlparse(url)
+        path = parsed.path or ''
+        if path != '/' and path.endswith('/') and not parsed.query and not parsed.fragment:
+            path = path.rstrip('/')
+            return urlunparse((parsed.scheme, parsed.netloc, path, parsed.params, parsed.query, parsed.fragment))
+    except Exception:
+        pass
+    return url
+
+
+def clean_url_candidate(url: str) -> str:
+    """Strip obvious surrounding/trailing punctuation from extracted URLs."""
+    cleaned = (url or '').strip()
+    if not cleaned:
+        return cleaned
+
+    # Strip common wrappers
+    cleaned = cleaned.strip(' \t\r\n')
+    cleaned = cleaned.strip('"\''"'"'<>[]()')
+
+    # Strip trailing punctuation and escape artifacts
+    cleaned = cleaned.rstrip('.,;:!?)\\\'"')
+    cleaned = cleaned.rstrip('"')
+
+    # Strip leading punctuation artifacts
+    cleaned = cleaned.lstrip('("'\''<')
+
+    return cleaned
 
 
 def fetch_content(url: str) -> str:
@@ -131,6 +166,43 @@ def fetch_content(url: str) -> str:
             return response.read().decode('utf-8', errors='replace')
 
 
+def find_html_sources() -> list[str]:
+    """Find HTML content from other extractors in the snapshot directory."""
+    search_patterns = [
+        'readability/content.html',
+        '*_readability/content.html',
+        'mercury/content.html',
+        '*_mercury/content.html',
+        'singlefile/singlefile.html',
+        '*_singlefile/singlefile.html',
+        'singlefile/*.html',
+        '*_singlefile/*.html',
+        'dom/output.html',
+        '*_dom/output.html',
+        'dom/*.html',
+        '*_dom/*.html',
+        'wget/**/*.html',
+        '*_wget/**/*.html',
+        'wget/**/*.htm',
+        '*_wget/**/*.htm',
+        'wget/**/*.htm*',
+        '*_wget/**/*.htm*',
+    ]
+
+    sources: list[str] = []
+    for base in (Path.cwd(), Path.cwd().parent):
+        for pattern in search_patterns:
+            for match in base.glob(pattern):
+                if not match.is_file() or match.stat().st_size == 0:
+                    continue
+                try:
+                    sources.append(match.read_text(errors='ignore'))
+                except Exception:
+                    continue
+
+    return sources
+
+
 @click.command()
 @click.option('--url', required=True, help='HTML URL to parse')
 @click.option('--snapshot-id', required=False, help='Parent Snapshot UUID')
@@ -138,6 +210,13 @@ def fetch_content(url: str) -> str:
 @click.option('--depth', type=int, default=0, help='Current depth level')
 def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0):
     """Parse HTML and extract href URLs."""
+    env_depth = os.environ.get('SNAPSHOT_DEPTH')
+    if env_depth is not None:
+        try:
+            depth = int(env_depth)
+        except Exception:
+            pass
+    crawl_id = crawl_id or os.environ.get('CRAWL_ID')
 
     # Skip only if parse_dom_outlinks already ran AND found URLs (it uses Chrome for better coverage)
     # If parse_dom_outlinks ran but found nothing, we still try static HTML parsing as fallback
@@ -145,32 +224,38 @@ def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0
         click.echo(f'Skipping parse_html_urls - parse_dom_outlinks already extracted URLs')
         sys.exit(0)
 
-    try:
-        content = fetch_content(url)
-    except Exception as e:
-        click.echo(f'Failed to fetch {url}: {e}', err=True)
-        sys.exit(1)
-
-    # Parse HTML for hrefs
-    parser = HrefParser()
-    try:
-        parser.feed(content)
-    except Exception as e:
-        click.echo(f'Failed to parse HTML: {e}', err=True)
-        sys.exit(1)
+    contents = find_html_sources()
+    if not contents:
+        try:
+            contents = [fetch_content(url)]
+        except Exception as e:
+            click.echo(f'Failed to fetch {url}: {e}', err=True)
+            sys.exit(1)
 
     urls_found = set()
-    for href in parser.urls:
-        # Normalize URL
-        normalized = normalize_url(href, root_url=url)
+    for content in contents:
+        # Parse HTML for hrefs
+        parser = HrefParser()
+        try:
+            parser.feed(content)
+        except Exception:
+            pass
 
-        # Only include http/https URLs
-        if normalized.lower().startswith('http://') or normalized.lower().startswith('https://'):
-            # Skip the source URL itself
-            if normalized != url:
-                urls_found.add(unescape(normalized))
+        for href in parser.urls:
+            normalized = normalize_url(href, root_url=url)
+            if normalized.lower().startswith('http://') or normalized.lower().startswith('https://'):
+                if normalized != url:
+                    urls_found.add(unescape(normalized))
 
-    # Emit Snapshot records to stdout (JSONL)
+        # Also capture explicit URLs in the HTML text
+        for match in URL_REGEX.findall(content):
+            normalized = normalize_url(match, root_url=url)
+            if normalized.lower().startswith('http://') or normalized.lower().startswith('https://'):
+                if normalized != url:
+                    urls_found.add(unescape(normalized))
+
+    # Emit Snapshot records to stdout (JSONL) and urls.jsonl for crawl system
+    records = []
     for found_url in sorted(urls_found):
         record = {
             'type': 'Snapshot',
@@ -183,8 +268,12 @@ def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0
         if crawl_id:
             record['crawl_id'] = crawl_id
 
+        records.append(record)
         print(json.dumps(record))
 
+    if records:
+        URLS_FILE.write_text('\n'.join(json.dumps(r) for r in records) + '\n')
+
     # Emit ArchiveResult record to mark completion
     status = 'succeeded' if urls_found else 'skipped'
     output_str = f'Found {len(urls_found)} URLs' if urls_found else 'No URLs found'
diff --git a/archivebox/plugins/parse_html_urls/templates/icon.html b/archivebox/plugins/parse_html_urls/templates/icon.html
index f77458fd..ee9d8294 100644
--- a/archivebox/plugins/parse_html_urls/templates/icon.html
+++ b/archivebox/plugins/parse_html_urls/templates/icon.html
@@ -1 +1 @@
-🔗
+<span class="abx-output-icon abx-output-icon--parse_html_urls" title="HTML URLs"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M8 9l-3 3 3 3"/><path d="M16 9l3 3-3 3"/><path d="M10 20l4-16"/></svg></span>
diff --git a/archivebox/plugins/parse_jsonl_urls/on_Snapshot__74_parse_jsonl_urls.py b/archivebox/plugins/parse_jsonl_urls/on_Snapshot__74_parse_jsonl_urls.py
index 6b846f5d..086c7f10 100755
--- a/archivebox/plugins/parse_jsonl_urls/on_Snapshot__74_parse_jsonl_urls.py
+++ b/archivebox/plugins/parse_jsonl_urls/on_Snapshot__74_parse_jsonl_urls.py
@@ -132,6 +132,13 @@ def fetch_content(url: str) -> str:
 @click.option('--depth', type=int, default=0, help='Current depth level')
 def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0):
     """Parse JSONL bookmark file and extract URLs."""
+    env_depth = os.environ.get('SNAPSHOT_DEPTH')
+    if env_depth is not None:
+        try:
+            depth = int(env_depth)
+        except Exception:
+            pass
+    crawl_id = crawl_id or os.environ.get('CRAWL_ID')
 
     try:
         content = fetch_content(url)
diff --git a/archivebox/plugins/parse_jsonl_urls/templates/icon.html b/archivebox/plugins/parse_jsonl_urls/templates/icon.html
index 98c76c15..124a8cb4 100644
--- a/archivebox/plugins/parse_jsonl_urls/templates/icon.html
+++ b/archivebox/plugins/parse_jsonl_urls/templates/icon.html
@@ -1 +1 @@
-📋
+<span class="abx-output-icon abx-output-icon--parse_jsonl_urls" title="JSONL URLs"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M8 4H5v16h3"/><path d="M16 4h3v16h-3"/><circle cx="12" cy="8" r="1" fill="currentColor" stroke="none"/><circle cx="12" cy="12" r="1" fill="currentColor" stroke="none"/><circle cx="12" cy="16" r="1" fill="currentColor" stroke="none"/></svg></span>
diff --git a/archivebox/plugins/parse_netscape_urls/on_Snapshot__73_parse_netscape_urls.py b/archivebox/plugins/parse_netscape_urls/on_Snapshot__73_parse_netscape_urls.py
index caccdac5..99e3c8c1 100755
--- a/archivebox/plugins/parse_netscape_urls/on_Snapshot__73_parse_netscape_urls.py
+++ b/archivebox/plugins/parse_netscape_urls/on_Snapshot__73_parse_netscape_urls.py
@@ -168,6 +168,13 @@ def fetch_content(url: str) -> str:
 @click.option('--depth', type=int, default=0, help='Current depth level')
 def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0):
     """Parse Netscape bookmark HTML and extract URLs."""
+    env_depth = os.environ.get('SNAPSHOT_DEPTH')
+    if env_depth is not None:
+        try:
+            depth = int(env_depth)
+        except Exception:
+            pass
+    crawl_id = crawl_id or os.environ.get('CRAWL_ID')
 
     try:
         content = fetch_content(url)
diff --git a/archivebox/plugins/parse_netscape_urls/templates/icon.html b/archivebox/plugins/parse_netscape_urls/templates/icon.html
index 0cc8da81..4c60899c 100644
--- a/archivebox/plugins/parse_netscape_urls/templates/icon.html
+++ b/archivebox/plugins/parse_netscape_urls/templates/icon.html
@@ -1 +1 @@
-🔖
+<span class="abx-output-icon abx-output-icon--parse_netscape_urls" title="Netscape Bookmarks"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M6 4h12v16l-6-4-6 4z"/></svg></span>
diff --git a/archivebox/plugins/parse_rss_urls/on_Snapshot__72_parse_rss_urls.py b/archivebox/plugins/parse_rss_urls/on_Snapshot__72_parse_rss_urls.py
index 5b153123..bdc50afa 100755
--- a/archivebox/plugins/parse_rss_urls/on_Snapshot__72_parse_rss_urls.py
+++ b/archivebox/plugins/parse_rss_urls/on_Snapshot__72_parse_rss_urls.py
@@ -56,6 +56,13 @@ def fetch_content(url: str) -> str:
 @click.option('--depth', type=int, default=0, help='Current depth level')
 def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0):
     """Parse RSS/Atom feed and extract article URLs."""
+    env_depth = os.environ.get('SNAPSHOT_DEPTH')
+    if env_depth is not None:
+        try:
+            depth = int(env_depth)
+        except Exception:
+            pass
+    crawl_id = crawl_id or os.environ.get('CRAWL_ID')
 
     if feedparser is None:
         click.echo('feedparser library not installed', err=True)
diff --git a/archivebox/plugins/parse_rss_urls/templates/icon.html b/archivebox/plugins/parse_rss_urls/templates/icon.html
index 81de8a1a..09b3b8e7 100644
--- a/archivebox/plugins/parse_rss_urls/templates/icon.html
+++ b/archivebox/plugins/parse_rss_urls/templates/icon.html
@@ -1 +1 @@
-📡
+<span class="abx-output-icon abx-output-icon--parse_rss_urls" title="RSS"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><circle cx="5" cy="19" r="1.5" fill="currentColor" stroke="none"/><path d="M5 11a8 8 0 0 1 8 8"/><path d="M5 5a14 14 0 0 1 14 14"/></svg></span>
diff --git a/archivebox/plugins/parse_txt_urls/on_Snapshot__71_parse_txt_urls.py b/archivebox/plugins/parse_txt_urls/on_Snapshot__71_parse_txt_urls.py
index d899c742..29265700 100755
--- a/archivebox/plugins/parse_txt_urls/on_Snapshot__71_parse_txt_urls.py
+++ b/archivebox/plugins/parse_txt_urls/on_Snapshot__71_parse_txt_urls.py
@@ -105,6 +105,13 @@ def fetch_content(url: str) -> str:
 @click.option('--depth', type=int, default=0, help='Current depth level')
 def main(url: str, snapshot_id: str = None, crawl_id: str = None, depth: int = 0):
     """Parse plain text and extract URLs."""
+    env_depth = os.environ.get('SNAPSHOT_DEPTH')
+    if env_depth is not None:
+        try:
+            depth = int(env_depth)
+        except Exception:
+            pass
+    crawl_id = crawl_id or os.environ.get('CRAWL_ID')
 
     try:
         content = fetch_content(url)
diff --git a/archivebox/plugins/parse_txt_urls/templates/icon.html b/archivebox/plugins/parse_txt_urls/templates/icon.html
index 0351b8bf..af23375c 100644
--- a/archivebox/plugins/parse_txt_urls/templates/icon.html
+++ b/archivebox/plugins/parse_txt_urls/templates/icon.html
@@ -1 +1 @@
-📃
+<span class="abx-output-icon abx-output-icon--parse_txt_urls" title="Text URLs"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M6 3h8l4 4v14H6z"/><path d="M14 3v5h5"/><path d="M8 12h8"/><path d="M8 16h6"/></svg></span>
diff --git a/archivebox/plugins/pdf/on_Snapshot__52_pdf.js b/archivebox/plugins/pdf/on_Snapshot__52_pdf.js
index b53a9aea..05648a81 100644
--- a/archivebox/plugins/pdf/on_Snapshot__52_pdf.js
+++ b/archivebox/plugins/pdf/on_Snapshot__52_pdf.js
@@ -52,7 +52,21 @@ const CHROME_SESSION_DIR = '../chrome';
 // Check if staticfile extractor already downloaded this URL
 const STATICFILE_DIR = '../staticfile';
 function hasStaticFileOutput() {
-    return fs.existsSync(STATICFILE_DIR) && fs.readdirSync(STATICFILE_DIR).length > 0;
+    if (!fs.existsSync(STATICFILE_DIR)) return false;
+    const stdoutPath = path.join(STATICFILE_DIR, 'stdout.log');
+    if (!fs.existsSync(stdoutPath)) return false;
+    const stdout = fs.readFileSync(stdoutPath, 'utf8');
+    for (const line of stdout.split('\n')) {
+        const trimmed = line.trim();
+        if (!trimmed.startsWith('{')) continue;
+        try {
+            const record = JSON.parse(trimmed);
+            if (record.type === 'ArchiveResult' && record.status === 'succeeded') {
+                return true;
+            }
+        } catch (e) {}
+    }
+    return false;
 }
 
 // Wait for chrome tab to be fully loaded
diff --git a/archivebox/plugins/pdf/templates/icon.html b/archivebox/plugins/pdf/templates/icon.html
index 063530f3..35a0ed89 100644
--- a/archivebox/plugins/pdf/templates/icon.html
+++ b/archivebox/plugins/pdf/templates/icon.html
@@ -1 +1 @@
-📄
\ No newline at end of file
+<span class="abx-output-icon abx-output-icon--pdf" title="PDF"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M14 3H6a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V9z"/><path d="M14 3v6h6"/><rect x="8" y="12" width="8" height="4" rx="1"/></svg></span>
diff --git a/archivebox/plugins/pip/on_Binary__11_pip_install.py b/archivebox/plugins/pip/on_Binary__11_pip_install.py
index edbeef4b..8737a042 100644
--- a/archivebox/plugins/pip/on_Binary__11_pip_install.py
+++ b/archivebox/plugins/pip/on_Binary__11_pip_install.py
@@ -11,6 +11,8 @@ Environment variables:
 
 import json
 import os
+import shutil
+import subprocess
 import sys
 from pathlib import Path
 
@@ -46,6 +48,26 @@ def main(binary_id: str, machine_id: str, name: str, binproviders: str, override
     # Structure: lib/arm64-darwin/pip/venv (PipProvider will create venv automatically)
     pip_venv_path = Path(lib_dir) / 'pip' / 'venv'
     pip_venv_path.parent.mkdir(parents=True, exist_ok=True)
+    venv_python = pip_venv_path / 'bin' / 'python'
+
+    # Prefer a stable system python for venv creation if provided/available
+    preferred_python = os.environ.get('PIP_VENV_PYTHON', '').strip()
+    if not preferred_python:
+        for candidate in ('python3.12', 'python3.11', 'python3.10'):
+            if shutil.which(candidate):
+                preferred_python = candidate
+                break
+    if preferred_python and not venv_python.exists():
+        try:
+            subprocess.run(
+                [preferred_python, '-m', 'venv', str(pip_venv_path), '--upgrade-deps'],
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+        except Exception:
+            # Fall back to PipProvider-managed venv creation
+            pass
 
     # Use abx-pkg PipProvider to install binary with custom venv
     provider = PipProvider(pip_venv=pip_venv_path)
@@ -87,22 +109,21 @@ def main(binary_id: str, machine_id: str, name: str, binproviders: str, override
     }
     print(json.dumps(record))
 
-    # Emit PATH update if pip bin dir not already in PATH
+    # Emit PATH update for pip bin dir
     pip_bin_dir = str(pip_venv_path / 'bin')
     current_path = os.environ.get('PATH', '')
 
     # Check if pip_bin_dir is already in PATH
     path_dirs = current_path.split(':')
-    if pip_bin_dir not in path_dirs:
-        # Prepend pip_bin_dir to PATH
-        new_path = f"{pip_bin_dir}:{current_path}" if current_path else pip_bin_dir
-        print(json.dumps({
-            'type': 'Machine',
-            '_method': 'update',
-            'key': 'config/PATH',
-            'value': new_path,
-        }))
-        click.echo(f"  Added {pip_bin_dir} to PATH", err=True)
+    new_path = f"{pip_bin_dir}:{current_path}" if current_path else pip_bin_dir
+    if pip_bin_dir in path_dirs:
+        new_path = current_path
+    print(json.dumps({
+        'type': 'Machine',
+        'config': {
+            'PATH': new_path,
+        },
+    }))
 
     # Log human-readable info to stderr
     click.echo(f"Installed {name} at {binary.abspath}", err=True)
diff --git a/archivebox/plugins/pip/tests/__init__.py b/archivebox/plugins/pip/tests/__init__.py
deleted file mode 100644
index 28ac0d82..00000000
--- a/archivebox/plugins/pip/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Tests for the pip binary provider plugin."""
diff --git a/archivebox/plugins/pip/tests/test_pip_provider.py b/archivebox/plugins/pip/tests/test_pip_provider.py
index a22ef183..4a4fe610 100644
--- a/archivebox/plugins/pip/tests/test_pip_provider.py
+++ b/archivebox/plugins/pip/tests/test_pip_provider.py
@@ -22,7 +22,7 @@ from django.test import TestCase
 
 # Get the path to the pip provider hook
 PLUGIN_DIR = Path(__file__).parent.parent
-INSTALL_HOOK = PLUGIN_DIR / 'on_Binary__install_using_pip_provider.py'
+INSTALL_HOOK = next(PLUGIN_DIR.glob('on_Binary__*_pip_install.py'), None)
 
 
 class TestPipProviderHook(TestCase):
@@ -33,6 +33,10 @@ class TestPipProviderHook(TestCase):
         self.temp_dir = tempfile.mkdtemp()
         self.output_dir = Path(self.temp_dir) / 'output'
         self.output_dir.mkdir()
+        self.lib_dir = Path(self.temp_dir) / 'lib' / 'x86_64-linux'
+        self.lib_dir.mkdir(parents=True, exist_ok=True)
+        self.lib_dir = Path(self.temp_dir) / 'lib' / 'x86_64-linux'
+        self.lib_dir.mkdir(parents=True, exist_ok=True)
 
     def tearDown(self):
         """Clean up."""
@@ -41,7 +45,7 @@ class TestPipProviderHook(TestCase):
 
     def test_hook_script_exists(self):
         """Hook script should exist."""
-        self.assertTrue(INSTALL_HOOK.exists(), f"Hook not found: {INSTALL_HOOK}")
+        self.assertTrue(INSTALL_HOOK and INSTALL_HOOK.exists(), f"Hook not found: {INSTALL_HOOK}")
 
     def test_hook_help(self):
         """Hook should accept --help without error."""
@@ -55,16 +59,19 @@ class TestPipProviderHook(TestCase):
         # At minimum should not crash with Python error
         self.assertNotIn('Traceback', result.stderr)
 
-    def test_hook_finds_python(self):
-        """Hook should find Python binary."""
+    def test_hook_finds_pip(self):
+        """Hook should find pip binary."""
         env = os.environ.copy()
         env['DATA_DIR'] = self.temp_dir
+        env['LIB_DIR'] = str(self.lib_dir)
 
         result = subprocess.run(
             [
                 sys.executable, str(INSTALL_HOOK),
-                '--name=python3',
-                '--binproviders=pip,env',
+                '--name=pip',
+                '--binproviders=pip',
+                '--binary-id=test-uuid',
+                '--machine-id=test-machine',
             ],
             capture_output=True,
             text=True,
@@ -80,7 +87,7 @@ class TestPipProviderHook(TestCase):
             if line.startswith('{'):
                 try:
                     record = json.loads(line)
-                    if record.get('type') == 'Binary' and record.get('name') == 'python3':
+                    if record.get('type') == 'Binary' and record.get('name') == 'pip':
                         jsonl_found = True
                         # Verify structure
                         self.assertIn('abspath', record)
@@ -92,19 +99,22 @@ class TestPipProviderHook(TestCase):
         # Should not crash
         self.assertNotIn('Traceback', result.stderr)
 
-        # Should find python3 via pip or env provider
-        self.assertTrue(jsonl_found, "Expected to find python3 binary in JSONL output")
+        # Should find pip via pip provider
+        self.assertTrue(jsonl_found, "Expected to find pip binary in JSONL output")
 
     def test_hook_unknown_package(self):
         """Hook should handle unknown packages gracefully."""
         env = os.environ.copy()
         env['DATA_DIR'] = self.temp_dir
+        env['LIB_DIR'] = str(self.lib_dir)
 
         result = subprocess.run(
             [
                 sys.executable, str(INSTALL_HOOK),
                 '--name=nonexistent_package_xyz123',
                 '--binproviders=pip',
+                '--binary-id=test-uuid',
+                '--machine-id=test-machine',
             ],
             capture_output=True,
             text=True,
@@ -148,6 +158,8 @@ class TestPipProviderIntegration(TestCase):
                 sys.executable, str(INSTALL_HOOK),
                 '--name=pip',
                 '--binproviders=pip,env',
+                '--binary-id=test-uuid',
+                '--machine-id=test-machine',
             ],
             capture_output=True,
             text=True,
diff --git a/archivebox/plugins/puppeteer/__init__.py b/archivebox/plugins/puppeteer/__init__.py
new file mode 100644
index 00000000..e32e0f82
--- /dev/null
+++ b/archivebox/plugins/puppeteer/__init__.py
@@ -0,0 +1 @@
+# Plugin namespace for puppeteer utilities.
diff --git a/archivebox/plugins/puppeteer/on_Binary__12_puppeteer_install.py b/archivebox/plugins/puppeteer/on_Binary__12_puppeteer_install.py
new file mode 100644
index 00000000..a30e9cc0
--- /dev/null
+++ b/archivebox/plugins/puppeteer/on_Binary__12_puppeteer_install.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python3
+"""
+Install Chromium via the Puppeteer CLI.
+
+Usage: on_Binary__12_puppeteer_install.py --binary-id=<uuid> --machine-id=<uuid> --name=<name>
+Output: Binary JSONL record to stdout after installation
+"""
+
+import json
+import os
+import re
+import sys
+from pathlib import Path
+
+import rich_click as click
+from abx_pkg import Binary, EnvProvider, NpmProvider, BinProviderOverrides
+
+# Fix pydantic forward reference issue
+NpmProvider.model_rebuild()
+
+
+@click.command()
+@click.option('--machine-id', required=True, help='Machine UUID')
+@click.option('--binary-id', required=True, help='Binary UUID')
+@click.option('--name', required=True, help='Binary name to install')
+@click.option('--binproviders', default='*', help='Allowed providers (comma-separated)')
+@click.option('--overrides', default=None, help='JSON-encoded overrides dict')
+def main(machine_id: str, binary_id: str, name: str, binproviders: str, overrides: str | None) -> None:
+    if binproviders != '*' and 'puppeteer' not in binproviders.split(','):
+        sys.exit(0)
+
+    if name not in ('chromium', 'chrome'):
+        sys.exit(0)
+
+    lib_dir = os.environ.get('LIB_DIR', '').strip()
+    if not lib_dir:
+        click.echo('ERROR: LIB_DIR environment variable not set', err=True)
+        sys.exit(1)
+
+    npm_prefix = Path(lib_dir) / 'npm'
+    npm_prefix.mkdir(parents=True, exist_ok=True)
+    npm_provider = NpmProvider(npm_prefix=npm_prefix)
+    cache_dir = Path(lib_dir) / 'puppeteer'
+    cache_dir.mkdir(parents=True, exist_ok=True)
+    os.environ.setdefault('PUPPETEER_CACHE_DIR', str(cache_dir))
+
+    puppeteer_binary = Binary(
+        name='puppeteer',
+        binproviders=[npm_provider, EnvProvider()],
+        overrides={'npm': {'packages': ['puppeteer']}},
+    ).load()
+
+    if not puppeteer_binary.abspath:
+        click.echo('ERROR: puppeteer binary not found (install puppeteer first)', err=True)
+        sys.exit(1)
+
+    install_args = _parse_override_packages(overrides, default=['chromium@latest', '--install-deps'])
+    cmd = ['browsers', 'install', *install_args]
+    proc = puppeteer_binary.exec(cmd=cmd, timeout=300)
+    if proc.returncode != 0:
+        click.echo(proc.stdout.strip(), err=True)
+        click.echo(proc.stderr.strip(), err=True)
+        click.echo(f'ERROR: puppeteer install failed ({proc.returncode})', err=True)
+        sys.exit(1)
+
+    chromium_binary = _load_chromium_binary(proc.stdout + '\n' + proc.stderr)
+    if not chromium_binary or not chromium_binary.abspath:
+        click.echo('ERROR: failed to locate Chromium after install', err=True)
+        sys.exit(1)
+
+    _emit_chromium_binary_record(
+        binary=chromium_binary,
+        machine_id=machine_id,
+        binary_id=binary_id,
+    )
+
+    config_patch = {
+        'CHROME_BINARY': str(chromium_binary.abspath),
+        'CHROMIUM_VERSION': str(chromium_binary.version) if chromium_binary.version else '',
+    }
+
+    print(json.dumps({
+        'type': 'Machine',
+        'config': config_patch,
+    }))
+
+    sys.exit(0)
+
+
+def _parse_override_packages(overrides: str | None, default: list[str]) -> list[str]:
+    if not overrides:
+        return default
+    try:
+        overrides_dict = json.loads(overrides)
+    except json.JSONDecodeError:
+        return default
+
+    if isinstance(overrides_dict, dict):
+        provider_overrides = overrides_dict.get('puppeteer')
+        if isinstance(provider_overrides, dict):
+            packages = provider_overrides.get('packages')
+            if isinstance(packages, list) and packages:
+                return [str(arg) for arg in packages]
+        if isinstance(provider_overrides, list) and provider_overrides:
+            return [str(arg) for arg in provider_overrides]
+    if isinstance(overrides_dict, list) and overrides_dict:
+        return [str(arg) for arg in overrides_dict]
+
+    return default
+
+
+def _emit_chromium_binary_record(binary: Binary, machine_id: str, binary_id: str) -> None:
+    record = {
+        'type': 'Binary',
+        'name': 'chromium',
+        'abspath': str(binary.abspath),
+        'version': str(binary.version) if binary.version else '',
+        'sha256': binary.sha256 or '',
+        'binprovider': 'puppeteer',
+        'machine_id': machine_id,
+        'binary_id': binary_id,
+    }
+    print(json.dumps(record))
+
+
+def _load_chromium_binary(output: str) -> Binary | None:
+    candidates: list[Path] = []
+    match = re.search(r'(?:chromium|chrome)@[^\s]+\s+(\S+)', output)
+    if match:
+        candidates.append(Path(match.group(1)))
+
+    cache_dirs: list[Path] = []
+    cache_env = os.environ.get('PUPPETEER_CACHE_DIR')
+    if cache_env:
+        cache_dirs.append(Path(cache_env))
+
+    home = Path.home()
+    cache_dirs.extend([
+        home / '.cache' / 'puppeteer',
+        home / 'Library' / 'Caches' / 'puppeteer',
+    ])
+
+    for base in cache_dirs:
+        for root in (base, base / 'chromium', base / 'chrome'):
+            try:
+                candidates.extend(root.rglob('Chromium.app/Contents/MacOS/Chromium'))
+            except Exception:
+                pass
+            try:
+                candidates.extend(root.rglob('chrome'))
+            except Exception:
+                pass
+
+    for candidate in candidates:
+        try:
+            binary = Binary(
+                name='chromium',
+                binproviders=[EnvProvider()],
+                overrides={'env': {'abspath': str(candidate)}},
+            ).load()
+        except Exception:
+            continue
+        if binary.abspath:
+            return binary
+
+    return None
+
+
+if __name__ == '__main__':
+    main()
diff --git a/archivebox/plugins/puppeteer/on_Crawl__60_puppeteer_install.py b/archivebox/plugins/puppeteer/on_Crawl__60_puppeteer_install.py
new file mode 100644
index 00000000..9125dc2f
--- /dev/null
+++ b/archivebox/plugins/puppeteer/on_Crawl__60_puppeteer_install.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python3
+"""
+Emit Puppeteer Binary dependency for the crawl.
+"""
+
+import json
+import os
+import sys
+
+
+def main() -> None:
+    enabled = os.environ.get('PUPPETEER_ENABLED', 'true').lower() not in ('false', '0', 'no', 'off')
+    if not enabled:
+        sys.exit(0)
+
+    record = {
+        'type': 'Binary',
+        'name': 'puppeteer',
+        'binproviders': 'npm,env',
+        'overrides': {
+            'npm': {
+                'packages': ['puppeteer'],
+            }
+        },
+    }
+    print(json.dumps(record))
+    sys.exit(0)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/archivebox/plugins/puppeteer/tests/test_puppeteer.py b/archivebox/plugins/puppeteer/tests/test_puppeteer.py
new file mode 100644
index 00000000..5d230a7d
--- /dev/null
+++ b/archivebox/plugins/puppeteer/tests/test_puppeteer.py
@@ -0,0 +1,124 @@
+"""Integration tests for puppeteer plugin."""
+
+import json
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+
+import pytest
+
+from archivebox.plugins.chrome.tests.chrome_test_helpers import (
+    get_plugin_dir,
+    get_hook_script,
+)
+
+
+PLUGIN_DIR = get_plugin_dir(__file__)
+CRAWL_HOOK = get_hook_script(PLUGIN_DIR, 'on_Crawl__*_puppeteer_install.py')
+BINARY_HOOK = get_hook_script(PLUGIN_DIR, 'on_Binary__*_puppeteer_install.py')
+NPM_BINARY_HOOK = PLUGIN_DIR.parent / 'npm' / 'on_Binary__10_npm_install.py'
+
+
+def test_hook_scripts_exist():
+    assert CRAWL_HOOK and CRAWL_HOOK.exists(), f"Hook not found: {CRAWL_HOOK}"
+    assert BINARY_HOOK and BINARY_HOOK.exists(), f"Hook not found: {BINARY_HOOK}"
+
+
+def test_crawl_hook_emits_puppeteer_binary():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        env = os.environ.copy()
+        result = subprocess.run(
+            [sys.executable, str(CRAWL_HOOK)],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            env=env,
+            timeout=30,
+        )
+
+        assert result.returncode == 0, f"crawl hook failed: {result.stderr}"
+        records = [json.loads(line) for line in result.stdout.splitlines() if line.strip().startswith('{')]
+        binaries = [r for r in records if r.get('type') == 'Binary' and r.get('name') == 'puppeteer']
+        assert binaries, f"Expected Binary record for puppeteer, got: {records}"
+        assert 'npm' in binaries[0].get('binproviders', ''), "puppeteer should be installable via npm provider"
+
+
+@pytest.mark.skipif(shutil.which('npm') is None, reason='npm is required for puppeteer installation')
+def test_puppeteer_installs_chromium():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = Path(tmpdir)
+        lib_dir = tmpdir / 'lib' / 'arm64-darwin'
+        lib_dir.mkdir(parents=True, exist_ok=True)
+
+        env = os.environ.copy()
+        env['LIB_DIR'] = str(lib_dir)
+
+        crawl_result = subprocess.run(
+            [sys.executable, str(CRAWL_HOOK)],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            env=env,
+            timeout=30,
+        )
+        assert crawl_result.returncode == 0, f"crawl hook failed: {crawl_result.stderr}"
+        crawl_records = [json.loads(line) for line in crawl_result.stdout.splitlines() if line.strip().startswith('{')]
+        puppeteer_record = next(
+            (r for r in crawl_records if r.get('type') == 'Binary' and r.get('name') == 'puppeteer'),
+            None,
+        )
+        assert puppeteer_record, f"Expected puppeteer Binary record, got: {crawl_records}"
+
+        npm_result = subprocess.run(
+            [
+                sys.executable,
+                str(NPM_BINARY_HOOK),
+                '--machine-id=test-machine',
+                '--binary-id=test-puppeteer',
+                '--name=puppeteer',
+                f"--binproviders={puppeteer_record.get('binproviders', '*')}",
+                '--overrides=' + json.dumps(puppeteer_record.get('overrides') or {}),
+            ],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            env=env,
+            timeout=120,
+        )
+        assert npm_result.returncode == 0, (
+            "puppeteer npm install failed\n"
+            f"stdout:\n{npm_result.stdout}\n"
+            f"stderr:\n{npm_result.stderr}"
+        )
+
+        result = subprocess.run(
+            [
+                sys.executable,
+                str(BINARY_HOOK),
+                '--machine-id=test-machine',
+                '--binary-id=test-binary',
+                '--name=chromium',
+                '--binproviders=puppeteer',
+                '--overrides=' + json.dumps({'puppeteer': ['chromium@latest', '--install-deps']}),
+            ],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            env=env,
+            timeout=120,
+        )
+
+        assert result.returncode == 0, (
+            "puppeteer binary hook failed\n"
+            f"stdout:\n{result.stdout}\n"
+            f"stderr:\n{result.stderr}"
+        )
+
+        records = [json.loads(line) for line in result.stdout.splitlines() if line.strip().startswith('{')]
+        binaries = [r for r in records if r.get('type') == 'Binary' and r.get('name') == 'chromium']
+        assert binaries, f"Expected Binary record for chromium, got: {records}"
+        abspath = binaries[0].get('abspath')
+        assert abspath and Path(abspath).exists(), f"Chromium binary path invalid: {abspath}"
diff --git a/archivebox/plugins/readability/binaries.jsonl b/archivebox/plugins/readability/binaries.jsonl
deleted file mode 100644
index e8a1974a..00000000
--- a/archivebox/plugins/readability/binaries.jsonl
+++ /dev/null
@@ -1 +0,0 @@
-{"type": "Binary", "name": "readability-extractor", "binproviders": "npm,env", "overrides": {"npm": {"packages": ["https://github.com/ArchiveBox/readability-extractor"]}}}
diff --git a/archivebox/plugins/readability/on_Crawl__11_readability_install.py b/archivebox/plugins/readability/on_Crawl__11_readability_install.py
deleted file mode 100755
index ea0791ef..00000000
--- a/archivebox/plugins/readability/on_Crawl__11_readability_install.py
+++ /dev/null
@@ -1,83 +0,0 @@
-#!/usr/bin/env python3
-"""
-Detect readability-extractor binary and emit Binary JSONL record.
-
-Output: Binary JSONL record to stdout if readability is found
-"""
-
-import json
-import os
-import sys
-
-from abx_pkg import Binary, EnvProvider
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def output_binary_found(binary: Binary, name: str):
-    """Output Binary JSONL record for an installed binary."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'abspath': str(binary.abspath),
-        'version': str(binary.version) if binary.version else '',
-        'sha256': binary.sha256 or '',
-        'binprovider': 'env',  # Already installed
-        'machine_id': machine_id,
-    }
-    print(json.dumps(record))
-
-
-def output_binary_missing(name: str, binproviders: str):
-    """Output Binary JSONL record for a missing binary that needs installation."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,  # Providers that can install it
-        'overrides': {
-            'packages': ['git+https://github.com/ArchiveBox/readability-extractor.git'],
-        },
-        'machine_id': machine_id,
-    }
-    print(json.dumps(record))
-
-
-def main():
-    readability_enabled = get_env_bool('READABILITY_ENABLED', True)
-    readability_binary = get_env('READABILITY_BINARY', 'readability-extractor')
-
-    if not readability_enabled:
-        sys.exit(0)
-
-    provider = EnvProvider()
-    try:
-        binary = Binary(name=readability_binary, binproviders=[provider]).load()
-        if binary.abspath:
-            # Binary found
-            output_binary_found(binary, name='readability-extractor')
-        else:
-            # Binary not found
-            output_binary_missing(name='readability-extractor', binproviders='npm')
-    except Exception:
-        # Binary not found
-        output_binary_missing(name='readability-extractor', binproviders='npm')
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/readability/on_Crawl__35_readability_install.py b/archivebox/plugins/readability/on_Crawl__35_readability_install.py
new file mode 100755
index 00000000..6705c6bb
--- /dev/null
+++ b/archivebox/plugins/readability/on_Crawl__35_readability_install.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python3
+"""
+Emit readability-extractor Binary dependency for the crawl.
+"""
+
+import json
+import os
+import sys
+
+
+def get_env(name: str, default: str = '') -> str:
+    return os.environ.get(name, default).strip()
+
+def get_env_bool(name: str, default: bool = False) -> bool:
+    val = get_env(name, '').lower()
+    if val in ('true', '1', 'yes', 'on'):
+        return True
+    if val in ('false', '0', 'no', 'off'):
+        return False
+    return default
+
+
+def output_binary(name: str, binproviders: str):
+    """Output Binary JSONL record for a dependency."""
+    machine_id = os.environ.get('MACHINE_ID', '')
+
+    record = {
+        'type': 'Binary',
+        'name': name,
+        'binproviders': binproviders,
+        'overrides': {
+            'npm': {
+                'packages': ['https://github.com/ArchiveBox/readability-extractor'],
+            },
+        },
+        'machine_id': machine_id,
+    }
+    print(json.dumps(record))
+
+
+def main():
+    readability_enabled = get_env_bool('READABILITY_ENABLED', True)
+
+    if not readability_enabled:
+        sys.exit(0)
+
+    output_binary(name='readability-extractor', binproviders='npm,env')
+
+    sys.exit(0)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/archivebox/plugins/readability/on_Snapshot__56_readability.py b/archivebox/plugins/readability/on_Snapshot__56_readability.py
index 2777479a..4c23fa28 100644
--- a/archivebox/plugins/readability/on_Snapshot__56_readability.py
+++ b/archivebox/plugins/readability/on_Snapshot__56_readability.py
@@ -63,19 +63,25 @@ def find_html_source() -> str | None:
     # Hooks run in snapshot_dir, sibling extractor outputs are in subdirectories
     search_patterns = [
         'singlefile/singlefile.html',
+        '*_singlefile/singlefile.html',
         'singlefile/*.html',
+        '*_singlefile/*.html',
         'dom/output.html',
+        '*_dom/output.html',
         'dom/*.html',
+        '*_dom/*.html',
         'wget/**/*.html',
+        '*_wget/**/*.html',
         'wget/**/*.htm',
+        '*_wget/**/*.htm',
     ]
 
-    cwd = Path.cwd()
-    for pattern in search_patterns:
-        matches = list(cwd.glob(pattern))
-        for match in matches:
-            if match.is_file() and match.stat().st_size > 0:
-                return str(match)
+    for base in (Path.cwd(), Path.cwd().parent):
+        for pattern in search_patterns:
+            matches = list(base.glob(pattern))
+            for match in matches:
+                if match.is_file() and match.stat().st_size > 0:
+                    return str(match)
 
     return None
 
diff --git a/archivebox/plugins/readability/templates/icon.html b/archivebox/plugins/readability/templates/icon.html
index 66336e65..ae67c26f 100644
--- a/archivebox/plugins/readability/templates/icon.html
+++ b/archivebox/plugins/readability/templates/icon.html
@@ -1 +1 @@
-📖
\ No newline at end of file
+<span class="abx-output-icon abx-output-icon--readability" title="Readability"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M3 6h7a3 3 0 0 1 3 3v10H6a3 3 0 0 0-3 3z"/><path d="M21 6h-7a3 3 0 0 0-3 3v10h7a3 3 0 0 1 3 3z"/></svg></span>
diff --git a/archivebox/plugins/redirects/on_Snapshot__31_redirects.bg.js b/archivebox/plugins/redirects/on_Snapshot__25_redirects.bg.js
similarity index 93%
rename from archivebox/plugins/redirects/on_Snapshot__31_redirects.bg.js
rename to archivebox/plugins/redirects/on_Snapshot__25_redirects.bg.js
index e051cd50..66aac407 100755
--- a/archivebox/plugins/redirects/on_Snapshot__31_redirects.bg.js
+++ b/archivebox/plugins/redirects/on_Snapshot__25_redirects.bg.js
@@ -6,7 +6,7 @@
  * redirect chain from the initial request. It stays alive through navigation
  * and emits JSONL on SIGTERM.
  *
- * Usage: on_Snapshot__31_redirects.bg.js --url=<url> --snapshot-id=<uuid>
+ * Usage: on_Snapshot__25_redirects.bg.js --url=<url> --snapshot-id=<uuid>
  * Output: Writes redirects.jsonl
  */
 
@@ -169,7 +169,7 @@ async function main() {
     const snapshotId = args.snapshot_id;
 
     if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__31_redirects.bg.js --url=<url> --snapshot-id=<uuid>');
+        console.error('Usage: on_Snapshot__25_redirects.bg.js --url=<url> --snapshot-id=<uuid>');
         process.exit(1);
     }
 
@@ -191,11 +191,15 @@ async function main() {
         // Set up redirect listener BEFORE navigation
         await setupRedirectListener();
 
-        // Wait for chrome_navigate to complete (BLOCKING)
-        await waitForPageLoaded(CHROME_SESSION_DIR, timeout * 4, 1000);
+        // Wait for chrome_navigate to complete (non-fatal)
+        try {
+            await waitForPageLoaded(CHROME_SESSION_DIR, timeout * 4, 1000);
+        } catch (e) {
+            console.error(`WARN: ${e.message}`);
+        }
 
         // Keep process alive until killed by cleanup
-        console.error('Redirect tracking complete, waiting for cleanup signal...');
+        // console.error('Redirect tracking complete, waiting for cleanup signal...');
 
         // Keep the process alive indefinitely
         await new Promise(() => {}); // Never resolves
diff --git a/archivebox/plugins/redirects/templates/icon.html b/archivebox/plugins/redirects/templates/icon.html
index e69de29b..8f32e981 100644
--- a/archivebox/plugins/redirects/templates/icon.html
+++ b/archivebox/plugins/redirects/templates/icon.html
@@ -0,0 +1 @@
+<span class="abx-output-icon abx-output-icon--redirects" title="Redirects"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M4 7h11"/><path d="M12 4l3 3-3 3"/><path d="M20 17H9"/><path d="M12 14l-3 3 3 3"/></svg></span>
diff --git a/archivebox/plugins/redirects/tests/__init__.py b/archivebox/plugins/redirects/tests/__init__.py
deleted file mode 100644
index 6bc72141..00000000
--- a/archivebox/plugins/redirects/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Tests for the redirects plugin."""
diff --git a/archivebox/plugins/redirects/tests/test_redirects.py b/archivebox/plugins/redirects/tests/test_redirects.py
index 0164d461..452c5dd6 100644
--- a/archivebox/plugins/redirects/tests/test_redirects.py
+++ b/archivebox/plugins/redirects/tests/test_redirects.py
@@ -10,6 +10,7 @@ import shutil
 import subprocess
 import sys
 import tempfile
+import time
 from pathlib import Path
 
 import pytest
@@ -77,14 +78,13 @@ class TestRedirectsWithChrome(TestCase):
                 # Use the environment from chrome_session (already has CHROME_HEADLESS=true)
 
 
-                # Run redirects hook with the active Chrome session
-                result = subprocess.run(
+                # Run redirects hook with the active Chrome session (background hook)
+                result = subprocess.Popen(
                     ['node', str(REDIRECTS_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                    cwd=str(snapshot_chrome_dir,
-            env=get_test_env()),
-                    capture_output=True,
+                    cwd=str(snapshot_chrome_dir),
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.PIPE,
                     text=True,
-                    timeout=60,
                     env=env
                 )
 
@@ -93,6 +93,12 @@ class TestRedirectsWithChrome(TestCase):
 
                 redirects_data = None
 
+                # Wait briefly for background hook to write output
+                for _ in range(10):
+                    if redirects_output.exists() and redirects_output.stat().st_size > 0:
+                        break
+                    time.sleep(1)
+
                 # Try parsing from file first
                 if redirects_output.exists():
                     with open(redirects_output) as f:
@@ -107,7 +113,11 @@ class TestRedirectsWithChrome(TestCase):
 
                 # Try parsing from stdout if not in file
                 if not redirects_data:
-                    for line in result.stdout.split('\n'):
+                    try:
+                        stdout, stderr = result.communicate(timeout=5)
+                    except subprocess.TimeoutExpired:
+                        stdout, stderr = "", ""
+                    for line in stdout.split('\n'):
                         line = line.strip()
                         if line.startswith('{'):
                             try:
@@ -120,9 +130,17 @@ class TestRedirectsWithChrome(TestCase):
 
                 # Verify hook ran successfully
                 # example.com typically doesn't redirect, so we just verify no errors
-                self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}")
-                self.assertNotIn('Traceback', result.stderr)
-                self.assertNotIn('Error:', result.stderr)
+                if result.poll() is None:
+                    result.terminate()
+                    try:
+                        stdout, stderr = result.communicate(timeout=5)
+                    except subprocess.TimeoutExpired:
+                        result.kill()
+                        stdout, stderr = result.communicate()
+                else:
+                    stdout, stderr = result.communicate()
+                self.assertNotIn('Traceback', stderr)
+                self.assertNotIn('Error:', stderr)
 
         except RuntimeError as e:
             if 'Chrome' in str(e) or 'CDP' in str(e):
diff --git a/archivebox/plugins/responses/on_Snapshot__24_responses.bg.js b/archivebox/plugins/responses/on_Snapshot__24_responses.bg.js
index 9d8f16ed..c7dd6491 100755
--- a/archivebox/plugins/responses/on_Snapshot__24_responses.bg.js
+++ b/archivebox/plugins/responses/on_Snapshot__24_responses.bg.js
@@ -33,6 +33,11 @@ const PLUGIN_NAME = 'responses';
 const OUTPUT_DIR = '.';
 const CHROME_SESSION_DIR = '../chrome';
 
+let browser = null;
+let page = null;
+let responseCount = 0;
+let shuttingDown = false;
+
 // Resource types to capture (by default, capture everything)
 const DEFAULT_TYPES = ['script', 'stylesheet', 'font', 'image', 'media', 'xhr', 'websocket'];
 
@@ -199,6 +204,7 @@ async function setupListener() {
             };
 
             fs.appendFileSync(indexPath, JSON.stringify(indexEntry) + '\n');
+            responseCount += 1;
 
         } catch (e) {
             // Ignore errors
@@ -208,6 +214,31 @@ async function setupListener() {
     return { browser, page };
 }
 
+function emitResult(status = 'succeeded') {
+    if (shuttingDown) return;
+    shuttingDown = true;
+
+    const outputStr = responseCount > 0
+        ? `responses/ (${responseCount} responses)`
+        : 'responses/';
+    console.log(JSON.stringify({
+        type: 'ArchiveResult',
+        status,
+        output_str: outputStr,
+    }));
+}
+
+async function handleShutdown(signal) {
+    console.error(`\nReceived ${signal}, emitting final results...`);
+    emitResult('succeeded');
+    if (browser) {
+        try {
+            browser.disconnect();
+        } catch (e) {}
+    }
+    process.exit(0);
+}
+
 async function main() {
     const args = parseArgs();
     const url = args.url;
@@ -224,24 +255,27 @@ async function main() {
         process.exit(0);
     }
 
-    const timeout = getEnvInt('RESPONSES_TIMEOUT', 30) * 1000;
-
     try {
         // Set up listener BEFORE navigation
-        await setupListener();
+        const connection = await setupListener();
+        browser = connection.browser;
+        page = connection.page;
 
-        // Wait for chrome_navigate to complete (BLOCKING)
-        // Extra 1s delay for late responses
-        await waitForPageLoaded(CHROME_SESSION_DIR, timeout * 4, 1000);
+        // Register signal handlers for graceful shutdown
+        process.on('SIGTERM', () => handleShutdown('SIGTERM'));
+        process.on('SIGINT', () => handleShutdown('SIGINT'));
 
-        // Output clean JSONL
-        console.log(JSON.stringify({
-            type: 'ArchiveResult',
-            status: 'succeeded',
-            output_str: 'responses/',
-        }));
+        // Wait for chrome_navigate to complete (non-fatal)
+        try {
+            const timeout = getEnvInt('RESPONSES_TIMEOUT', 30) * 1000;
+            await waitForPageLoaded(CHROME_SESSION_DIR, timeout * 4, 1000);
+        } catch (e) {
+            console.error(`WARN: ${e.message}`);
+        }
 
-        process.exit(0);
+        // console.error('Responses listener active, waiting for cleanup signal...');
+        await new Promise(() => {}); // Keep alive until SIGTERM
+        return;
 
     } catch (e) {
         const error = `${e.name}: ${e.message}`;
diff --git a/archivebox/plugins/responses/templates/icon.html b/archivebox/plugins/responses/templates/icon.html
index e69de29b..51210acb 100644
--- a/archivebox/plugins/responses/templates/icon.html
+++ b/archivebox/plugins/responses/templates/icon.html
@@ -0,0 +1 @@
+<span class="abx-output-icon abx-output-icon--responses" title="Responses"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><rect x="4" y="4.5" width="16" height="6" rx="2"/><rect x="4" y="13.5" width="16" height="6" rx="2"/><circle cx="8" cy="7.5" r="1" fill="currentColor" stroke="none"/><circle cx="8" cy="16.5" r="1" fill="currentColor" stroke="none"/></svg></span>
diff --git a/archivebox/plugins/responses/tests/__init__.py b/archivebox/plugins/responses/tests/__init__.py
deleted file mode 100644
index d31fa890..00000000
--- a/archivebox/plugins/responses/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Tests for the responses plugin."""
diff --git a/archivebox/plugins/responses/tests/test_responses.py b/archivebox/plugins/responses/tests/test_responses.py
index c66f7652..82a5fa77 100644
--- a/archivebox/plugins/responses/tests/test_responses.py
+++ b/archivebox/plugins/responses/tests/test_responses.py
@@ -10,6 +10,7 @@ import shutil
 import subprocess
 import sys
 import tempfile
+import time
 from pathlib import Path
 
 import pytest
@@ -76,22 +77,36 @@ class TestResponsesWithChrome(TestCase):
                 # Use the environment from chrome_session (already has CHROME_HEADLESS=true)
 
 
-                # Run responses hook with the active Chrome session
-                result = subprocess.run(
+                # Run responses hook with the active Chrome session (background hook)
+                result = subprocess.Popen(
                     ['node', str(RESPONSES_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                    cwd=str(snapshot_chrome_dir,
-            env=get_test_env()),
-                    capture_output=True,
+                    cwd=str(snapshot_chrome_dir),
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.PIPE,
                     text=True,
-                    timeout=120,  # Longer timeout as it waits for navigation
                     env=env
                 )
 
                 # Check for output directory and index file
                 index_output = snapshot_chrome_dir / 'index.jsonl'
 
-                # Verify hook ran (may timeout waiting for page_loaded.txt in test mode)
-                self.assertNotIn('Traceback', result.stderr)
+                # Wait briefly for background hook to write output
+                for _ in range(10):
+                    if index_output.exists() and index_output.stat().st_size > 0:
+                        break
+                    time.sleep(1)
+
+                # Verify hook ran (may keep running waiting for cleanup signal)
+                if result.poll() is None:
+                    result.terminate()
+                    try:
+                        stdout, stderr = result.communicate(timeout=5)
+                    except subprocess.TimeoutExpired:
+                        result.kill()
+                        stdout, stderr = result.communicate()
+                else:
+                    stdout, stderr = result.communicate()
+                self.assertNotIn('Traceback', stderr)
 
                 # If index file exists, verify it's valid JSONL
                 if index_output.exists():
diff --git a/archivebox/plugins/screenshot/on_Snapshot__51_screenshot.js b/archivebox/plugins/screenshot/on_Snapshot__51_screenshot.js
index fae0bf93..76390846 100644
--- a/archivebox/plugins/screenshot/on_Snapshot__51_screenshot.js
+++ b/archivebox/plugins/screenshot/on_Snapshot__51_screenshot.js
@@ -58,7 +58,21 @@ const CHROME_SESSION_DIR = '../chrome';
 // Check if staticfile extractor already downloaded this URL
 const STATICFILE_DIR = '../staticfile';
 function hasStaticFileOutput() {
-    return fs.existsSync(STATICFILE_DIR) && fs.readdirSync(STATICFILE_DIR).length > 0;
+    if (!fs.existsSync(STATICFILE_DIR)) return false;
+    const stdoutPath = path.join(STATICFILE_DIR, 'stdout.log');
+    if (!fs.existsSync(stdoutPath)) return false;
+    const stdout = fs.readFileSync(stdoutPath, 'utf8');
+    for (const line of stdout.split('\n')) {
+        const trimmed = line.trim();
+        if (!trimmed.startsWith('{')) continue;
+        try {
+            const record = JSON.parse(trimmed);
+            if (record.type === 'ArchiveResult' && record.status === 'succeeded') {
+                return true;
+            }
+        } catch (e) {}
+    }
+    return false;
 }
 
 // Wait for chrome tab to be fully loaded
diff --git a/archivebox/plugins/screenshot/templates/icon.html b/archivebox/plugins/screenshot/templates/icon.html
index e76b5f98..4236aee3 100644
--- a/archivebox/plugins/screenshot/templates/icon.html
+++ b/archivebox/plugins/screenshot/templates/icon.html
@@ -1 +1 @@
-📷
\ No newline at end of file
+<span class="abx-output-icon abx-output-icon--screenshot" title="Screenshot"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><rect x="3" y="6" width="18" height="12" rx="2"/><circle cx="12" cy="12" r="3"/><path d="M8 6l1.5-2h5L16 6"/></svg></span>
diff --git a/archivebox/plugins/screenshot/tests/test_screenshot.py b/archivebox/plugins/screenshot/tests/test_screenshot.py
index 2d804757..9627ec02 100644
--- a/archivebox/plugins/screenshot/tests/test_screenshot.py
+++ b/archivebox/plugins/screenshot/tests/test_screenshot.py
@@ -166,7 +166,7 @@ def test_skips_when_staticfile_exists():
         # Create staticfile output to simulate staticfile extractor already ran
         staticfile_dir = snapshot_dir / 'staticfile'
         staticfile_dir.mkdir()
-        (staticfile_dir / 'index.html').write_text('<html></html>')
+        (staticfile_dir / 'stdout.log').write_text('{"type":"ArchiveResult","status":"succeeded","output_str":"index.html"}\n')
 
         env = get_test_env()
         result = subprocess.run(
diff --git a/archivebox/plugins/search_backend_ripgrep/binaries.jsonl b/archivebox/plugins/search_backend_ripgrep/binaries.jsonl
deleted file mode 100644
index f66337f7..00000000
--- a/archivebox/plugins/search_backend_ripgrep/binaries.jsonl
+++ /dev/null
@@ -1 +0,0 @@
-{"type": "Binary", "name": "rg", "binproviders": "apt,brew,env", "overrides": {"apt": {"packages": ["ripgrep"]}}}
diff --git a/archivebox/plugins/search_backend_ripgrep/on_Crawl__00_ripgrep_install.py b/archivebox/plugins/search_backend_ripgrep/on_Crawl__00_ripgrep_install.py
deleted file mode 100755
index 57502514..00000000
--- a/archivebox/plugins/search_backend_ripgrep/on_Crawl__00_ripgrep_install.py
+++ /dev/null
@@ -1,92 +0,0 @@
-#!/usr/bin/env python3
-"""
-Install hook for ripgrep binary.
-
-Runs at crawl start to verify ripgrep is available when SEARCH_BACKEND_ENGINE='ripgrep'.
-Outputs JSONL for Binary and Machine config updates.
-Uses abx-pkg to handle installation via apt/brew providers.
-"""
-
-import os
-import sys
-import json
-
-
-def find_ripgrep() -> dict | None:
-    """Find ripgrep binary using abx-pkg, respecting RIPGREP_BINARY env var."""
-    # Quick check: if RIPGREP_BINARY is set and exists, skip expensive lookup
-    configured_binary = os.environ.get('RIPGREP_BINARY', '').strip()
-    if configured_binary and os.path.isfile(configured_binary) and os.access(configured_binary, os.X_OK):
-        # Binary is already configured and valid - exit immediately
-        sys.exit(0)
-
-    try:
-        from abx_pkg import Binary, EnvProvider, AptProvider, BrewProvider, BinProviderOverrides
-
-        # Try to find ripgrep using abx-pkg (EnvProvider checks PATH, apt/brew handle installation)
-        binary = Binary(
-            name='rg',
-            binproviders=[EnvProvider(), AptProvider(), BrewProvider()],
-            overrides={
-                'apt': {'packages': ['ripgrep']},
-                'brew': {'packages': ['ripgrep']},
-            }
-        )
-
-        loaded = binary.load()
-        if loaded and loaded.abspath:
-            return {
-                'name': 'rg',
-                'abspath': str(loaded.abspath),
-                'version': str(loaded.version) if loaded.version else None,
-                'sha256': loaded.sha256 if hasattr(loaded, 'sha256') else None,
-                'binprovider': loaded.binprovider.name if loaded.binprovider else 'env',
-            }
-    except Exception as e:
-        print(f"Error loading ripgrep: {e}", file=sys.stderr)
-        pass
-
-    return None
-
-
-def main():
-    # Only proceed if ripgrep backend is enabled
-    search_backend_engine = os.environ.get('SEARCH_BACKEND_ENGINE', 'ripgrep').strip()
-    if search_backend_engine != 'ripgrep':
-        # Not using ripgrep, exit successfully without output
-        sys.exit(0)
-
-    result = find_ripgrep()
-
-    if result and result.get('abspath'):
-        print(json.dumps({
-            'type': 'Binary',
-            'name': result['name'],
-            'abspath': result['abspath'],
-            'version': result['version'],
-            'binprovider': result['binprovider'],
-        }))
-
-        print(json.dumps({
-            'type': 'Machine',
-            '_method': 'update',
-            'key': 'config/RIPGREP_BINARY',
-            'value': result['abspath'],
-        }))
-
-        if result['version']:
-            print(json.dumps({
-                'type': 'Machine',
-                '_method': 'update',
-                'key': 'config/RIPGREP_VERSION',
-                'value': result['version'],
-            }))
-
-        sys.exit(0)
-    else:
-        print(f"Ripgrep binary not found (install with: apt install ripgrep or brew install ripgrep)", file=sys.stderr)
-        sys.exit(1)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/search_backend_ripgrep/on_Crawl__50_ripgrep_install.py b/archivebox/plugins/search_backend_ripgrep/on_Crawl__50_ripgrep_install.py
new file mode 100755
index 00000000..071dbb5b
--- /dev/null
+++ b/archivebox/plugins/search_backend_ripgrep/on_Crawl__50_ripgrep_install.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python3
+"""
+Emit ripgrep Binary dependency for the crawl.
+"""
+
+import os
+import sys
+import json
+
+
+def main():
+    # Only proceed if ripgrep backend is enabled
+    search_backend_engine = os.environ.get('SEARCH_BACKEND_ENGINE', 'ripgrep').strip()
+    if search_backend_engine != 'ripgrep':
+        # Not using ripgrep, exit successfully without output
+        sys.exit(0)
+
+    machine_id = os.environ.get('MACHINE_ID', '')
+    print(json.dumps({
+        'type': 'Binary',
+        'name': 'rg',
+        'binproviders': 'apt,brew,env',
+        'overrides': {
+            'apt': {'packages': ['ripgrep']},
+        },
+        'machine_id': machine_id,
+    }))
+    sys.exit(0)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/archivebox/plugins/search_backend_ripgrep/search.py b/archivebox/plugins/search_backend_ripgrep/search.py
index 140a32d1..171b60bb 100644
--- a/archivebox/plugins/search_backend_ripgrep/search.py
+++ b/archivebox/plugins/search_backend_ripgrep/search.py
@@ -18,8 +18,6 @@ import shutil
 from pathlib import Path
 from typing import List, Iterable
 
-from django.conf import settings
-
 
 def get_env(name: str, default: str = '') -> str:
     return os.environ.get(name, default).strip()
@@ -46,6 +44,16 @@ def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
         return default if default is not None else []
 
 
+def _get_archive_dir() -> Path:
+    archive_dir = os.environ.get('ARCHIVE_DIR', '').strip()
+    if archive_dir:
+        return Path(archive_dir)
+    data_dir = os.environ.get('DATA_DIR', '').strip()
+    if data_dir:
+        return Path(data_dir) / 'archive'
+    return Path.cwd() / 'archive'
+
+
 def search(query: str) -> List[str]:
     """Search for snapshots using ripgrep."""
     rg_binary = get_env('RIPGREP_BINARY', 'rg')
@@ -57,7 +65,7 @@ def search(query: str) -> List[str]:
     ripgrep_args = get_env_array('RIPGREP_ARGS', [])
     ripgrep_args_extra = get_env_array('RIPGREP_ARGS_EXTRA', [])
 
-    archive_dir = Path(settings.ARCHIVE_DIR)
+    archive_dir = _get_archive_dir()
     if not archive_dir.exists():
         return []
 
diff --git a/archivebox/plugins/search_backend_ripgrep/tests/__init__.py b/archivebox/plugins/search_backend_ripgrep/tests/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/archivebox/plugins/search_backend_ripgrep/tests/test_ripgrep_detection.py b/archivebox/plugins/search_backend_ripgrep/tests/test_ripgrep_detection.py
index 8057783a..60eb6e3a 100644
--- a/archivebox/plugins/search_backend_ripgrep/tests/test_ripgrep_detection.py
+++ b/archivebox/plugins/search_backend_ripgrep/tests/test_ripgrep_detection.py
@@ -23,7 +23,7 @@ import pytest
 
 def test_ripgrep_hook_detects_binary_from_path():
     """Test that ripgrep hook finds binary using abx-pkg when env var is just a name."""
-    hook_path = Path(__file__).parent.parent / 'on_Crawl__00_install_ripgrep.py'
+    hook_path = Path(__file__).parent.parent / 'on_Crawl__50_ripgrep_install.py'
 
     # Skip if rg is not installed
     if not shutil.which('rg'):
@@ -44,26 +44,19 @@ def test_ripgrep_hook_detects_binary_from_path():
 
     assert result.returncode == 0, f"Hook failed: {result.stderr}"
 
-    # Parse JSONL output (filter out COMPUTED: lines)
+    # Parse JSONL output (filter out non-JSON lines)
     lines = [line for line in result.stdout.strip().split('\n') if line.strip() and line.strip().startswith('{')]
-    assert len(lines) >= 2, "Expected at least 2 JSONL lines (Binary + Machine config)"
+    assert len(lines) >= 1, "Expected at least 1 JSONL line (Binary)"
 
     binary = json.loads(lines[0])
     assert binary['type'] == 'Binary'
     assert binary['name'] == 'rg'
-    assert '/' in binary['abspath'], "Expected full path, not just binary name"
-    assert Path(binary['abspath']).is_file(), "Binary path should exist"
-    assert binary['version'], "Version should be detected"
-
-    machine_config = json.loads(lines[1])
-    assert machine_config['type'] == 'Machine'
-    assert machine_config['key'] == 'config/RIPGREP_BINARY'
-    assert '/' in machine_config['value'], "Machine config should store full path"
+    assert 'binproviders' in binary, "Expected binproviders declaration"
 
 
 def test_ripgrep_hook_skips_when_backend_not_ripgrep():
     """Test that ripgrep hook exits silently when search backend is not ripgrep."""
-    hook_path = Path(__file__).parent.parent / 'on_Crawl__00_install_ripgrep.py'
+    hook_path = Path(__file__).parent.parent / 'on_Crawl__50_ripgrep_install.py'
 
     env = os.environ.copy()
     env['SEARCH_BACKEND_ENGINE'] = 'sqlite'  # Different backend
@@ -82,7 +75,7 @@ def test_ripgrep_hook_skips_when_backend_not_ripgrep():
 
 def test_ripgrep_hook_handles_absolute_path():
     """Test that ripgrep hook exits successfully when RIPGREP_BINARY is a valid absolute path."""
-    hook_path = Path(__file__).parent.parent / 'on_Crawl__00_install_ripgrep.py'
+    hook_path = Path(__file__).parent.parent / 'on_Crawl__50_ripgrep_install.py'
 
     rg_path = shutil.which('rg')
     if not rg_path:
@@ -100,9 +93,9 @@ def test_ripgrep_hook_handles_absolute_path():
         timeout=10,
     )
 
-    # When binary is already configured with valid absolute path, hook exits early without output
     assert result.returncode == 0, f"Hook should exit successfully when binary already configured: {result.stderr}"
-    # No output is expected/needed when binary is already valid
+    lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
+    assert lines, "Expected Binary JSONL output when backend is ripgrep"
 
 
 @pytest.mark.django_db
@@ -115,6 +108,8 @@ def test_machine_config_overrides_base_config():
     """
     from archivebox.machine.models import Machine, Binary
 
+    import archivebox.machine.models as models
+    models._CURRENT_MACHINE = None
     machine = Machine.current()
 
     # Simulate a hook detecting chrome and storing it with a different path than base config
@@ -177,7 +172,9 @@ def test_install_creates_binary_records():
     This verifies the Binary model works correctly with the database.
     """
     from archivebox.machine.models import Machine, Binary
+    import archivebox.machine.models as models
 
+    models._CURRENT_MACHINE = None
     machine = Machine.current()
     initial_binary_count = Binary.objects.filter(machine=machine).count()
 
@@ -188,7 +185,7 @@ def test_install_creates_binary_records():
         abspath='/usr/bin/test-binary',
         version='1.0.0',
         binprovider='env',
-        status='succeeded'
+        status=Binary.StatusChoices.INSTALLED
     )
 
     # Verify Binary record was created
@@ -220,7 +217,7 @@ def test_ripgrep_only_detected_when_backend_enabled():
     if not shutil.which('rg'):
         pytest.skip("ripgrep not installed")
 
-    hook_path = Path(__file__).parent.parent / 'on_Crawl__00_install_ripgrep.py'
+    hook_path = Path(__file__).parent.parent / 'on_Crawl__50_ripgrep_install.py'
 
     # Test 1: With ripgrep backend - should output Binary record
     env1 = os.environ.copy()
@@ -237,8 +234,7 @@ def test_ripgrep_only_detected_when_backend_enabled():
 
     assert result1.returncode == 0, f"Hook should succeed with ripgrep backend: {result1.stderr}"
     # Should output Binary JSONL when backend is ripgrep
-    assert 'Binary' in result1.stdout or 'COMPUTED:' in result1.stdout, \
-        "Should output Binary or COMPUTED when backend=ripgrep"
+    assert 'Binary' in result1.stdout, "Should output Binary when backend=ripgrep"
 
     # Test 2: With different backend - should output nothing
     env2 = os.environ.copy()
diff --git a/archivebox/plugins/search_backend_ripgrep/tests/test_ripgrep_search.py b/archivebox/plugins/search_backend_ripgrep/tests/test_ripgrep_search.py
index 75513d34..1f0ce7fa 100644
--- a/archivebox/plugins/search_backend_ripgrep/tests/test_ripgrep_search.py
+++ b/archivebox/plugins/search_backend_ripgrep/tests/test_ripgrep_search.py
@@ -117,8 +117,8 @@ class TestRipgrepSearch(TestCase):
             'title/title.txt': 'JavaScript Basics',
         })
         self._create_snapshot('snap-003', {
-            'wget/index.html': '<html><body>Web archiving best practices</body></html>',
-            'title/title.txt': 'Web Archiving Guide',
+            'wget/index.html': '<html><body>Web archiving guide and best practices</body></html>',
+            'title/title.txt': 'Web Archiving guide',
         })
 
         # Patch settings
diff --git a/archivebox/plugins/search_backend_sonic/templates/icon.html b/archivebox/plugins/search_backend_sonic/templates/icon.html
index e69de29b..bf81a372 100644
--- a/archivebox/plugins/search_backend_sonic/templates/icon.html
+++ b/archivebox/plugins/search_backend_sonic/templates/icon.html
@@ -0,0 +1 @@
+<span class="abx-output-icon abx-output-icon--search_backend_sonic" title="Search"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><circle cx="11" cy="11" r="5"/><path d="M16 16l4 4"/></svg></span>
diff --git a/archivebox/plugins/search_backend_sqlite/search.py b/archivebox/plugins/search_backend_sqlite/search.py
index afd52f15..0d3f5539 100644
--- a/archivebox/plugins/search_backend_sqlite/search.py
+++ b/archivebox/plugins/search_backend_sqlite/search.py
@@ -14,8 +14,6 @@ import sqlite3
 from pathlib import Path
 from typing import List, Iterable
 
-from django.conf import settings
-
 
 # Config with old var names for backwards compatibility
 SQLITEFTS_DB = os.environ.get('SQLITEFTS_DB', 'search.sqlite3').strip()
@@ -23,9 +21,16 @@ FTS_SEPARATE_DATABASE = os.environ.get('FTS_SEPARATE_DATABASE', 'true').lower()
 FTS_TOKENIZERS = os.environ.get('FTS_TOKENIZERS', 'porter unicode61 remove_diacritics 2').strip()
 
 
+def _get_data_dir() -> Path:
+    data_dir = os.environ.get('DATA_DIR', '').strip()
+    if data_dir:
+        return Path(data_dir)
+    return Path.cwd() / 'data'
+
+
 def get_db_path() -> Path:
     """Get path to the search index database."""
-    return Path(settings.DATA_DIR) / SQLITEFTS_DB
+    return _get_data_dir() / SQLITEFTS_DB
 
 
 def search(query: str) -> List[str]:
diff --git a/archivebox/plugins/search_backend_sqlite/templates/icon.html b/archivebox/plugins/search_backend_sqlite/templates/icon.html
index e69de29b..3c9f8646 100644
--- a/archivebox/plugins/search_backend_sqlite/templates/icon.html
+++ b/archivebox/plugins/search_backend_sqlite/templates/icon.html
@@ -0,0 +1 @@
+<span class="abx-output-icon abx-output-icon--search_backend_sqlite" title="Search"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><circle cx="11" cy="11" r="5"/><path d="M16 16l4 4"/></svg></span>
diff --git a/archivebox/plugins/search_backend_sqlite/tests/__init__.py b/archivebox/plugins/search_backend_sqlite/tests/__init__.py
deleted file mode 100644
index 6bef82e4..00000000
--- a/archivebox/plugins/search_backend_sqlite/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Tests for the SQLite FTS5 search backend."""
diff --git a/archivebox/plugins/seo/templates/icon.html b/archivebox/plugins/seo/templates/icon.html
index e69de29b..1306d22d 100644
--- a/archivebox/plugins/seo/templates/icon.html
+++ b/archivebox/plugins/seo/templates/icon.html
@@ -0,0 +1 @@
+<span class="abx-output-icon abx-output-icon--seo" title="SEO"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M4 16l6-6 4 4 6-6"/><path d="M14 8h6v6"/></svg></span>
diff --git a/archivebox/plugins/seo/tests/__init__.py b/archivebox/plugins/seo/tests/__init__.py
deleted file mode 100644
index f2b12854..00000000
--- a/archivebox/plugins/seo/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Tests for the SEO plugin."""
diff --git a/archivebox/plugins/seo/tests/test_seo.py b/archivebox/plugins/seo/tests/test_seo.py
index e365e4b0..63233b16 100644
--- a/archivebox/plugins/seo/tests/test_seo.py
+++ b/archivebox/plugins/seo/tests/test_seo.py
@@ -79,8 +79,7 @@ class TestSEOWithChrome(TestCase):
                 # Run SEO hook with the active Chrome session
                 result = subprocess.run(
                     ['node', str(SEO_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                    cwd=str(snapshot_chrome_dir,
-            env=get_test_env()),
+                    cwd=str(snapshot_chrome_dir),
                     capture_output=True,
                     text=True,
                     timeout=60,
diff --git a/archivebox/plugins/singlefile/binaries.jsonl b/archivebox/plugins/singlefile/binaries.jsonl
deleted file mode 100644
index e1241163..00000000
--- a/archivebox/plugins/singlefile/binaries.jsonl
+++ /dev/null
@@ -1 +0,0 @@
-{"type": "Binary", "name": "single-file", "binproviders": "npm,env", "overrides": {"npm": {"packages": ["single-file-cli"]}}}
diff --git a/archivebox/plugins/singlefile/config.json b/archivebox/plugins/singlefile/config.json
index fe4962a0..c522efba 100644
--- a/archivebox/plugins/singlefile/config.json
+++ b/archivebox/plugins/singlefile/config.json
@@ -25,7 +25,7 @@
       "type": "string",
       "default": "",
       "x-fallback": "CHROME_BINARY",
-      "description": "Path to Chrome/Chromium binary"
+      "description": "Path to Chromium binary"
     },
     "SINGLEFILE_TIMEOUT": {
       "type": "integer",
diff --git a/archivebox/plugins/singlefile/on_Crawl__08_singlefile_install.py b/archivebox/plugins/singlefile/on_Crawl__08_singlefile_install.py
deleted file mode 100755
index b1bb2a68..00000000
--- a/archivebox/plugins/singlefile/on_Crawl__08_singlefile_install.py
+++ /dev/null
@@ -1,85 +0,0 @@
-#!/usr/bin/env python3
-"""
-Detect single-file binary and emit Binary JSONL record.
-
-Output: Binary JSONL record to stdout if single-file is found
-"""
-
-import json
-import os
-import sys
-
-from abx_pkg import Binary, EnvProvider
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def output_binary_found(binary: Binary, name: str):
-    """Output Binary JSONL record for an installed binary."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'abspath': str(binary.abspath),
-        'version': str(binary.version) if binary.version else '',
-        'sha256': binary.sha256 or '',
-        'binprovider': 'env',  # Already installed
-        'machine_id': machine_id,
-    }
-    print(json.dumps(record))
-
-
-def output_binary_missing(name: str, binproviders: str):
-    """Output Binary JSONL record for a missing binary that needs installation."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,  # Providers that can install it
-        'machine_id': machine_id,
-    }
-    print(json.dumps(record))
-
-
-def main():
-    singlefile_enabled = get_env_bool('SINGLEFILE_ENABLED', True)
-
-    if not singlefile_enabled:
-        sys.exit(0)
-
-    provider = EnvProvider()
-    found = False
-
-    # Try single-file-cli first, then single-file
-    for binary_name in ['single-file-cli', 'single-file']:
-        try:
-            binary = Binary(name=binary_name, binproviders=[provider]).load()
-            if binary.abspath:
-                # Binary found
-                output_binary_found(binary, name='single-file')
-                found = True
-                break
-        except Exception:
-            continue
-
-    if not found:
-        # Binary not found
-        output_binary_missing(name='single-file', binproviders='npm')
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/singlefile/on_Crawl__45_singlefile_install.py b/archivebox/plugins/singlefile/on_Crawl__45_singlefile_install.py
new file mode 100755
index 00000000..f2d22b3e
--- /dev/null
+++ b/archivebox/plugins/singlefile/on_Crawl__45_singlefile_install.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+"""
+Emit single-file Binary dependency for the crawl.
+"""
+
+import json
+import os
+import sys
+
+
+def get_env(name: str, default: str = '') -> str:
+    return os.environ.get(name, default).strip()
+
+def get_env_bool(name: str, default: bool = False) -> bool:
+    val = get_env(name, '').lower()
+    if val in ('true', '1', 'yes', 'on'):
+        return True
+    if val in ('false', '0', 'no', 'off'):
+        return False
+    return default
+
+
+def output_binary(name: str, binproviders: str, overrides: dict | None = None):
+    """Output Binary JSONL record for a dependency."""
+    machine_id = os.environ.get('MACHINE_ID', '')
+
+    record = {
+        'type': 'Binary',
+        'name': name,
+        'binproviders': binproviders,
+        'machine_id': machine_id,
+    }
+    if overrides:
+        record['overrides'] = overrides
+    print(json.dumps(record))
+
+
+def main():
+    singlefile_enabled = get_env_bool('SINGLEFILE_ENABLED', True)
+
+    if not singlefile_enabled:
+        sys.exit(0)
+
+    output_binary(
+        name='single-file',
+        binproviders='npm,env',
+        overrides={'npm': {'packages': ['single-file-cli']}},
+    )
+
+    sys.exit(0)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/archivebox/plugins/singlefile/on_Crawl__04_singlefile_install.js b/archivebox/plugins/singlefile/on_Crawl__82_singlefile_install.js
similarity index 99%
rename from archivebox/plugins/singlefile/on_Crawl__04_singlefile_install.js
rename to archivebox/plugins/singlefile/on_Crawl__82_singlefile_install.js
index 7637bf98..c0a0b4da 100755
--- a/archivebox/plugins/singlefile/on_Crawl__04_singlefile_install.js
+++ b/archivebox/plugins/singlefile/on_Crawl__82_singlefile_install.js
@@ -7,7 +7,7 @@
  *
  * Extension: https://chromewebstore.google.com/detail/mpiodijhokgodhhofbcjdecpffjipkle
  *
- * Priority: 04 (early) - Must install before Chrome session starts at Crawl level
+ * Priority: 82 - Must install before Chrome session starts at Crawl level
  * Hook: on_Crawl (runs once per crawl, not per snapshot)
  *
  * This extension automatically:
diff --git a/archivebox/plugins/singlefile/on_Snapshot__50_singlefile.py b/archivebox/plugins/singlefile/on_Snapshot__50_singlefile.py
index ec5188d8..aa73d69e 100644
--- a/archivebox/plugins/singlefile/on_Snapshot__50_singlefile.py
+++ b/archivebox/plugins/singlefile/on_Snapshot__50_singlefile.py
@@ -23,6 +23,8 @@ import json
 import os
 import subprocess
 import sys
+import time
+from urllib.request import urlopen
 from pathlib import Path
 
 import rich_click as click
@@ -75,7 +77,22 @@ STATICFILE_DIR = '../staticfile'
 def has_staticfile_output() -> bool:
     """Check if staticfile extractor already downloaded this URL."""
     staticfile_dir = Path(STATICFILE_DIR)
-    return staticfile_dir.exists() and any(staticfile_dir.iterdir())
+    if not staticfile_dir.exists():
+        return False
+    stdout_log = staticfile_dir / 'stdout.log'
+    if not stdout_log.exists():
+        return False
+    for line in stdout_log.read_text(errors='ignore').splitlines():
+        line = line.strip()
+        if not line.startswith('{'):
+            continue
+        try:
+            record = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+        if record.get('type') == 'ArchiveResult' and record.get('status') == 'succeeded':
+            return True
+    return False
 
 
 # Chrome session directory (relative to extractor output dir)
@@ -84,12 +101,17 @@ def has_staticfile_output() -> bool:
 CHROME_SESSION_DIR = '../chrome'
 
 
-def get_cdp_url() -> str | None:
+def get_cdp_url(wait_seconds: float = 0.0) -> str | None:
     """Get CDP URL from chrome plugin if available."""
     cdp_file = Path(CHROME_SESSION_DIR) / 'cdp_url.txt'
-    if cdp_file.exists():
-        return cdp_file.read_text().strip()
-    return None
+    deadline = time.time() + max(wait_seconds, 0.0)
+    while True:
+        if cdp_file.exists():
+            cdp_url = cdp_file.read_text().strip()
+            return cdp_url or None
+        if time.time() >= deadline:
+            return None
+        time.sleep(0.2)
 
 
 def get_port_from_cdp_url(cdp_url: str) -> str | None:
@@ -101,6 +123,14 @@ def get_port_from_cdp_url(cdp_url: str) -> str | None:
     return None
 
 
+def is_cdp_server_available(cdp_remote_url: str) -> bool:
+    try:
+        with urlopen(f'{cdp_remote_url}/json/version', timeout=1) as resp:
+            return resp.status == 200
+    except Exception:
+        return False
+
+
 def save_singlefile(url: str, binary: str) -> tuple[bool, str | None, str]:
     """
     Archive URL using SingleFile.
@@ -122,19 +152,30 @@ def save_singlefile(url: str, binary: str) -> tuple[bool, str | None, str]:
 
     cmd = [binary, *singlefile_args]
 
-    # Try to use existing Chrome session via CDP
-    cdp_url = get_cdp_url()
+    # Try to use existing Chrome session via CDP (prefer HTTP base URL)
+    cdp_wait = min(10, max(1, timeout // 10))
+    cdp_url = get_cdp_url(wait_seconds=cdp_wait)
+    cdp_remote_url = None
     if cdp_url:
-        # SingleFile can connect to existing browser via WebSocket
-        # Extract port from CDP URL (ws://127.0.0.1:PORT/...)
-        port = get_port_from_cdp_url(cdp_url)
-        if port:
-            cmd.extend(['--browser-server', f'http://127.0.0.1:{port}'])
+        if cdp_url.startswith(('http://', 'https://')):
+            cdp_remote_url = cdp_url
+        else:
+            port = get_port_from_cdp_url(cdp_url)
+            if port:
+                cdp_remote_url = f'http://127.0.0.1:{port}'
+            else:
+                cdp_remote_url = cdp_url
+
+    if cdp_remote_url and not is_cdp_server_available(cdp_remote_url):
+        cdp_remote_url = None
+
+    if cdp_remote_url:
+        cmd.extend(['--browser-server', cdp_remote_url])
     elif chrome:
         cmd.extend(['--browser-executable-path', chrome])
 
-    # Pass Chrome arguments (includes user-data-dir and other launch options)
-    if chrome_args:
+    # Pass Chrome arguments (only when launching a new browser)
+    if chrome_args and not cdp_remote_url:
         # SingleFile expects --browser-args as a JSON array string
         cmd.extend(['--browser-args', json.dumps(chrome_args)])
 
@@ -143,7 +184,7 @@ def save_singlefile(url: str, binary: str) -> tuple[bool, str | None, str]:
         cmd.append('--browser-ignore-insecure-certs')
 
     if user_agent:
-        cmd.extend(['--browser-user-agent', user_agent])
+        cmd.extend(['--user-agent', user_agent])
 
     if cookies_file and Path(cookies_file).is_file():
         cmd.extend(['--browser-cookies-file', cookies_file])
@@ -165,11 +206,21 @@ def save_singlefile(url: str, binary: str) -> tuple[bool, str | None, str]:
             return True, str(output_path), ''
         else:
             stderr = result.stderr.decode('utf-8', errors='replace')
+            stdout = result.stdout.decode('utf-8', errors='replace')
             if 'ERR_NAME_NOT_RESOLVED' in stderr:
                 return False, None, 'DNS resolution failed'
             if 'ERR_CONNECTION_REFUSED' in stderr:
                 return False, None, 'Connection refused'
-            return False, None, f'SingleFile failed: {stderr[:200]}'
+            detail = (stderr or stdout).strip()
+            if len(detail) > 2000:
+                detail = detail[:2000]
+            cmd_preview = list(cmd)
+            if '--browser-args' in cmd_preview:
+                idx = cmd_preview.index('--browser-args')
+                if idx + 1 < len(cmd_preview):
+                    cmd_preview[idx + 1] = '<json>'
+            cmd_str = ' '.join(cmd_preview)
+            return False, None, f'SingleFile failed (cmd={cmd_str}): {detail}'
 
     except subprocess.TimeoutExpired:
         return False, None, f'Timed out after {timeout} seconds'
diff --git a/archivebox/plugins/singlefile/templates/icon.html b/archivebox/plugins/singlefile/templates/icon.html
index 31f4673e..cd055f8b 100644
--- a/archivebox/plugins/singlefile/templates/icon.html
+++ b/archivebox/plugins/singlefile/templates/icon.html
@@ -1 +1 @@
-📦
\ No newline at end of file
+<span class="abx-output-icon abx-output-icon--singlefile" title="SingleFile"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M14 3H6a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V9z"/><path d="M14 3v6h6"/><path d="M9 14l2 2 4-4"/></svg></span>
diff --git a/archivebox/plugins/singlefile/tests/test_singlefile.py b/archivebox/plugins/singlefile/tests/test_singlefile.py
index a473f152..c5e8d3e7 100644
--- a/archivebox/plugins/singlefile/tests/test_singlefile.py
+++ b/archivebox/plugins/singlefile/tests/test_singlefile.py
@@ -13,6 +13,7 @@ Tests verify:
 import json
 import os
 import subprocess
+import sys
 import tempfile
 from pathlib import Path
 
@@ -66,7 +67,7 @@ def test_singlefile_cli_archives_example_com():
 
         # Run singlefile snapshot hook
         result = subprocess.run(
-            ['python', str(SNAPSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=test789'],
+            [sys.executable, str(SNAPSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=test789'],
             cwd=tmpdir,
             capture_output=True,
             text=True,
@@ -120,7 +121,7 @@ def test_singlefile_with_chrome_session():
 
             # Run singlefile - it should find and use the existing Chrome session
             result = subprocess.run(
-                ['python', str(SNAPSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=singlefile-test-snap'],
+                [sys.executable, str(SNAPSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=singlefile-test-snap'],
                 cwd=str(singlefile_output_dir),
                 capture_output=True,
                 text=True,
@@ -150,7 +151,7 @@ def test_singlefile_disabled_skips():
         env['SINGLEFILE_ENABLED'] = 'False'
 
         result = subprocess.run(
-            ['python', str(SNAPSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=test-disabled'],
+            [sys.executable, str(SNAPSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=test-disabled'],
             cwd=tmpdir,
             capture_output=True,
             text=True,
diff --git a/archivebox/plugins/ssl/on_Snapshot__23_ssl.bg.js b/archivebox/plugins/ssl/on_Snapshot__23_ssl.bg.js
index 5b98801b..59740e5c 100755
--- a/archivebox/plugins/ssl/on_Snapshot__23_ssl.bg.js
+++ b/archivebox/plugins/ssl/on_Snapshot__23_ssl.bg.js
@@ -32,6 +32,11 @@ const OUTPUT_DIR = '.';
 const OUTPUT_FILE = 'ssl.jsonl';
 const CHROME_SESSION_DIR = '../chrome';
 
+let browser = null;
+let page = null;
+let sslCaptured = false;
+let shuttingDown = false;
+
 async function setupListener(url) {
     const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
     const timeout = getEnvInt('SSL_TIMEOUT', 30) * 1000;
@@ -94,6 +99,7 @@ async function setupListener(url) {
 
             // Write output directly to file
             fs.writeFileSync(outputPath, JSON.stringify(sslInfo, null, 2));
+            sslCaptured = true;
 
         } catch (e) {
             // Ignore errors
@@ -103,6 +109,29 @@ async function setupListener(url) {
     return { browser, page };
 }
 
+function emitResult(status = 'succeeded') {
+    if (shuttingDown) return;
+    shuttingDown = true;
+
+    const outputStr = sslCaptured ? OUTPUT_FILE : OUTPUT_FILE;
+    console.log(JSON.stringify({
+        type: 'ArchiveResult',
+        status,
+        output_str: outputStr,
+    }));
+}
+
+async function handleShutdown(signal) {
+    console.error(`\nReceived ${signal}, emitting final results...`);
+    emitResult('succeeded');
+    if (browser) {
+        try {
+            browser.disconnect();
+        } catch (e) {}
+    }
+    process.exit(0);
+}
+
 async function main() {
     const args = parseArgs();
     const url = args.url;
@@ -119,23 +148,27 @@ async function main() {
         process.exit(0);
     }
 
-    const timeout = getEnvInt('SSL_TIMEOUT', 30) * 1000;
-
     try {
         // Set up listener BEFORE navigation
-        await setupListener(url);
+        const connection = await setupListener(url);
+        browser = connection.browser;
+        page = connection.page;
 
-        // Wait for chrome_navigate to complete (BLOCKING)
-        await waitForPageLoaded(CHROME_SESSION_DIR, timeout * 4);
+        // Register signal handlers for graceful shutdown
+        process.on('SIGTERM', () => handleShutdown('SIGTERM'));
+        process.on('SIGINT', () => handleShutdown('SIGINT'));
 
-        // Output clean JSONL
-        console.log(JSON.stringify({
-            type: 'ArchiveResult',
-            status: 'succeeded',
-            output_str: OUTPUT_FILE,
-        }));
+        // Wait for chrome_navigate to complete (non-fatal)
+        try {
+            const timeout = getEnvInt('SSL_TIMEOUT', 30) * 1000;
+            await waitForPageLoaded(CHROME_SESSION_DIR, timeout * 4);
+        } catch (e) {
+            console.error(`WARN: ${e.message}`);
+        }
 
-        process.exit(0);
+        // console.error('SSL listener active, waiting for cleanup signal...');
+        await new Promise(() => {}); // Keep alive until SIGTERM
+        return;
 
     } catch (e) {
         const error = `${e.name}: ${e.message}`;
diff --git a/archivebox/plugins/ssl/templates/icon.html b/archivebox/plugins/ssl/templates/icon.html
index e69de29b..1707e8b9 100644
--- a/archivebox/plugins/ssl/templates/icon.html
+++ b/archivebox/plugins/ssl/templates/icon.html
@@ -0,0 +1 @@
+<span class="abx-output-icon abx-output-icon--ssl" title="SSL"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><rect x="5" y="11" width="14" height="9" rx="2"/><path d="M8 11V8a4 4 0 0 1 8 0v3"/></svg></span>
diff --git a/archivebox/plugins/ssl/tests/__init__.py b/archivebox/plugins/ssl/tests/__init__.py
deleted file mode 100644
index 48a022d5..00000000
--- a/archivebox/plugins/ssl/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Tests for the SSL plugin."""
diff --git a/archivebox/plugins/ssl/tests/test_ssl.py b/archivebox/plugins/ssl/tests/test_ssl.py
index 6261c26b..5dfa17df 100644
--- a/archivebox/plugins/ssl/tests/test_ssl.py
+++ b/archivebox/plugins/ssl/tests/test_ssl.py
@@ -10,6 +10,7 @@ import shutil
 import subprocess
 import sys
 import tempfile
+import time
 from pathlib import Path
 
 import pytest
@@ -19,7 +20,6 @@ from django.test import TestCase
 sys.path.insert(0, str(Path(__file__).parent.parent.parent / 'chrome' / 'tests'))
 from chrome_test_helpers import (
     chrome_session,
-    get_test_env,
     get_plugin_dir,
     get_hook_script,
 )
@@ -76,17 +76,28 @@ class TestSSLWithChrome(TestCase):
                 # Use the environment from chrome_session (already has CHROME_HEADLESS=true)
 
 
-                # Run SSL hook with the active Chrome session
-                result = subprocess.run(
+                # Run SSL hook with the active Chrome session (background hook)
+                result = subprocess.Popen(
                     ['node', str(SSL_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                    cwd=str(snapshot_chrome_dir,
-            env=get_test_env()),
-                    capture_output=True,
+                    cwd=str(snapshot_chrome_dir),
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.PIPE,
                     text=True,
-                    timeout=60,
                     env=env
                 )
 
+                # Allow it to run briefly, then terminate (background hook)
+                time.sleep(3)
+                if result.poll() is None:
+                    result.terminate()
+                    try:
+                        stdout, stderr = result.communicate(timeout=5)
+                    except subprocess.TimeoutExpired:
+                        result.kill()
+                        stdout, stderr = result.communicate()
+                else:
+                    stdout, stderr = result.communicate()
+
                 # Check for output file
                 ssl_output = snapshot_chrome_dir / 'ssl.jsonl'
 
@@ -106,7 +117,7 @@ class TestSSLWithChrome(TestCase):
 
                 # Try parsing from stdout if not in file
                 if not ssl_data:
-                    for line in result.stdout.split('\n'):
+                    for line in stdout.split('\n'):
                         line = line.strip()
                         if line.startswith('{'):
                             try:
@@ -118,9 +129,8 @@ class TestSSLWithChrome(TestCase):
                                 continue
 
                 # Verify hook ran successfully
-                self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}")
-                self.assertNotIn('Traceback', result.stderr)
-                self.assertNotIn('Error:', result.stderr)
+                self.assertNotIn('Traceback', stderr)
+                self.assertNotIn('Error:', stderr)
 
                 # example.com uses HTTPS, so we MUST get SSL certificate data
                 self.assertIsNotNone(ssl_data, "No SSL data extracted from HTTPS URL")
diff --git a/archivebox/plugins/staticfile/on_Snapshot__32_staticfile.bg.js b/archivebox/plugins/staticfile/on_Snapshot__26_staticfile.bg.js
similarity index 95%
rename from archivebox/plugins/staticfile/on_Snapshot__32_staticfile.bg.js
rename to archivebox/plugins/staticfile/on_Snapshot__26_staticfile.bg.js
index 1ae44977..33531d93 100644
--- a/archivebox/plugins/staticfile/on_Snapshot__32_staticfile.bg.js
+++ b/archivebox/plugins/staticfile/on_Snapshot__26_staticfile.bg.js
@@ -6,7 +6,7 @@
  * Content-Type from the initial response. If it's a static file (PDF, image, etc.),
  * it downloads the content directly using CDP.
  *
- * Usage: on_Snapshot__32_staticfile.bg.js --url=<url> --snapshot-id=<uuid>
+ * Usage: on_Snapshot__26_staticfile.bg.js --url=<url> --snapshot-id=<uuid>
  * Output: Downloads static file
  */
 
@@ -288,7 +288,7 @@ async function main() {
     const snapshotId = args.snapshot_id;
 
     if (!url || !snapshotId) {
-        console.error('Usage: on_Snapshot__32_staticfile.bg.js --url=<url> --snapshot-id=<uuid>');
+        console.error('Usage: on_Snapshot__26_staticfile.bg.js --url=<url> --snapshot-id=<uuid>');
         process.exit(1);
     }
 
@@ -310,11 +310,15 @@ async function main() {
         // Set up static file listener BEFORE navigation
         await setupStaticFileListener();
 
-        // Wait for chrome_navigate to complete (BLOCKING)
-        await waitForPageLoaded(CHROME_SESSION_DIR, timeout * 4, 500);
+        // Wait for chrome_navigate to complete (non-fatal)
+        try {
+            await waitForPageLoaded(CHROME_SESSION_DIR, timeout * 4, 500);
+        } catch (e) {
+            console.error(`WARN: ${e.message}`);
+        }
 
         // Keep process alive until killed by cleanup
-        console.error('Static file detection complete, waiting for cleanup signal...');
+        // console.error('Static file detection complete, waiting for cleanup signal...');
 
         // Keep the process alive indefinitely
         await new Promise(() => {}); // Never resolves
diff --git a/archivebox/plugins/staticfile/templates/icon.html b/archivebox/plugins/staticfile/templates/icon.html
index 1c681685..bc71e426 100644
--- a/archivebox/plugins/staticfile/templates/icon.html
+++ b/archivebox/plugins/staticfile/templates/icon.html
@@ -1 +1 @@
-📎
+<span class="abx-output-icon abx-output-icon--staticfile" title="Static File"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M6 3h8l4 4v14H6z"/><path d="M14 3v5h5"/><circle cx="9" cy="16" r="1" fill="currentColor" stroke="none"/><circle cx="13" cy="16" r="1" fill="currentColor" stroke="none"/><circle cx="17" cy="16" r="1" fill="currentColor" stroke="none"/></svg></span>
diff --git a/archivebox/plugins/staticfile/tests/__init__.py b/archivebox/plugins/staticfile/tests/__init__.py
deleted file mode 100644
index d60e588b..00000000
--- a/archivebox/plugins/staticfile/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Tests for the staticfile plugin."""
diff --git a/archivebox/plugins/staticfile/tests/test_staticfile.py b/archivebox/plugins/staticfile/tests/test_staticfile.py
index 3f4412ae..b99be87c 100644
--- a/archivebox/plugins/staticfile/tests/test_staticfile.py
+++ b/archivebox/plugins/staticfile/tests/test_staticfile.py
@@ -10,6 +10,7 @@ import shutil
 import subprocess
 import sys
 import tempfile
+import time
 from pathlib import Path
 
 import pytest
@@ -76,21 +77,33 @@ class TestStaticfileWithChrome(TestCase):
                 # Use the environment from chrome_session (already has CHROME_HEADLESS=true)
 
 
-                # Run staticfile hook with the active Chrome session
-                result = subprocess.run(
+                # Run staticfile hook with the active Chrome session (background hook)
+                result = subprocess.Popen(
                     ['node', str(STATICFILE_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
                     cwd=str(snapshot_chrome_dir),
-                    capture_output=True,
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.PIPE,
                     text=True,
-                    timeout=120,  # Longer timeout as it waits for navigation
                     env=env
                 )
 
+                # Allow it to run briefly, then terminate (background hook)
+                time.sleep(3)
+                if result.poll() is None:
+                    result.terminate()
+                    try:
+                        stdout, stderr = result.communicate(timeout=5)
+                    except subprocess.TimeoutExpired:
+                        result.kill()
+                        stdout, stderr = result.communicate()
+                else:
+                    stdout, stderr = result.communicate()
+
                 # Verify hook ran without crash
-                self.assertNotIn('Traceback', result.stderr)
+                self.assertNotIn('Traceback', stderr)
 
                 # Parse JSONL output to verify it recognized HTML as non-static
-                for line in result.stdout.split('\n'):
+                for line in stdout.split('\n'):
                     line = line.strip()
                     if line.startswith('{'):
                         try:
diff --git a/archivebox/plugins/title/templates/icon.html b/archivebox/plugins/title/templates/icon.html
index 5a051312..0cc05a17 100644
--- a/archivebox/plugins/title/templates/icon.html
+++ b/archivebox/plugins/title/templates/icon.html
@@ -1 +1 @@
-📝
\ No newline at end of file
+<span class="abx-output-icon abx-output-icon--title" title="Title"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M3 11l8-8h9v9l-8 8-9-9z"/><circle cx="16" cy="7" r="1.5" fill="currentColor" stroke="none"/></svg></span>
diff --git a/archivebox/plugins/twocaptcha/on_Crawl__05_twocaptcha_install.js b/archivebox/plugins/twocaptcha/on_Crawl__83_twocaptcha_install.js
similarity index 93%
rename from archivebox/plugins/twocaptcha/on_Crawl__05_twocaptcha_install.js
rename to archivebox/plugins/twocaptcha/on_Crawl__83_twocaptcha_install.js
index 5b2cb4e5..23a1b3f2 100755
--- a/archivebox/plugins/twocaptcha/on_Crawl__05_twocaptcha_install.js
+++ b/archivebox/plugins/twocaptcha/on_Crawl__83_twocaptcha_install.js
@@ -8,7 +8,7 @@
  * Extension: https://chromewebstore.google.com/detail/ifibfemgeogfhoebkmokieepdoobkbpo
  * Documentation: https://2captcha.com/blog/how-to-use-2captcha-solver-extension-in-puppeteer
  *
- * Priority: 01 (early) - Must install before Chrome session starts at Crawl level
+ * Priority: 83 - Must install before Chrome session starts at Crawl level
  * Hook: on_Crawl (runs once per crawl, not per snapshot)
  *
  * Requirements:
@@ -28,7 +28,7 @@ const EXTENSION = {
 /**
  * Main entry point - install extension before archiving
  *
- * Note: 2captcha configuration is handled by on_Crawl__25_twocaptcha_config.js
+ * Note: 2captcha configuration is handled by on_Crawl__95_twocaptcha_config.js
  * during first-time browser setup to avoid repeated configuration on every snapshot.
  * The API key is injected via chrome.storage API once per browser session.
  */
diff --git a/archivebox/plugins/twocaptcha/on_Crawl__25_twocaptcha_config.js b/archivebox/plugins/twocaptcha/on_Crawl__95_twocaptcha_config.js
similarity index 99%
rename from archivebox/plugins/twocaptcha/on_Crawl__25_twocaptcha_config.js
rename to archivebox/plugins/twocaptcha/on_Crawl__95_twocaptcha_config.js
index 282b0404..3fe8a10a 100755
--- a/archivebox/plugins/twocaptcha/on_Crawl__25_twocaptcha_config.js
+++ b/archivebox/plugins/twocaptcha/on_Crawl__95_twocaptcha_config.js
@@ -5,7 +5,7 @@
  * Configures the 2captcha extension with API key and settings after Crawl-level Chrome session starts.
  * Runs once per crawl to inject configuration into extension storage.
  *
- * Priority: 25 (after chrome_launch at 20, before snapshots start)
+ * Priority: 95 (after chrome_launch at 90, before snapshots start)
  * Hook: on_Crawl (runs once per crawl, not per snapshot)
  *
  * Config Options (from config.json / environment):
@@ -346,7 +346,7 @@ async function main() {
     const snapshotId = args.snapshot_id;
 
     if (!url || !snapshotId) {
-        console.error('Usage: on_Crawl__25_twocaptcha_config.js --url=<url> --snapshot-id=<uuid>');
+        console.error('Usage: on_Crawl__95_twocaptcha_config.js --url=<url> --snapshot-id=<uuid>');
         process.exit(1);
     }
 
diff --git a/archivebox/plugins/twocaptcha/tests/test_twocaptcha.py b/archivebox/plugins/twocaptcha/tests/test_twocaptcha.py
index d8e65a37..5738cc05 100644
--- a/archivebox/plugins/twocaptcha/tests/test_twocaptcha.py
+++ b/archivebox/plugins/twocaptcha/tests/test_twocaptcha.py
@@ -26,8 +26,8 @@ from archivebox.plugins.chrome.tests.chrome_test_helpers import (
 
 
 PLUGIN_DIR = Path(__file__).parent.parent
-INSTALL_SCRIPT = PLUGIN_DIR / 'on_Crawl__05_twocaptcha_install.js'
-CONFIG_SCRIPT = PLUGIN_DIR / 'on_Crawl__25_twocaptcha_config.js'
+INSTALL_SCRIPT = PLUGIN_DIR / 'on_Crawl__83_twocaptcha_install.js'
+CONFIG_SCRIPT = PLUGIN_DIR / 'on_Crawl__95_twocaptcha_config.js'
 
 TEST_URL = 'https://2captcha.com/demo/cloudflare-turnstile'
 
diff --git a/archivebox/plugins/ublock/on_Crawl__03_ublock_install.js b/archivebox/plugins/ublock/on_Crawl__80_install_ublock_extension.js
similarity index 95%
rename from archivebox/plugins/ublock/on_Crawl__03_ublock_install.js
rename to archivebox/plugins/ublock/on_Crawl__80_install_ublock_extension.js
index deb1ada7..ea5fd474 100755
--- a/archivebox/plugins/ublock/on_Crawl__03_ublock_install.js
+++ b/archivebox/plugins/ublock/on_Crawl__80_install_ublock_extension.js
@@ -7,7 +7,7 @@
  *
  * Extension: https://chromewebstore.google.com/detail/cjpalhdlnbpafiamejdnhcphjbkeiagm
  *
- * Priority: 03 (early) - Must install before Chrome session starts at Crawl level
+ * Priority: 80 - Must install before Chrome session starts at Crawl level
  * Hook: on_Crawl (runs once per crawl, not per snapshot)
  *
  * This extension automatically:
diff --git a/archivebox/plugins/wget/binaries.jsonl b/archivebox/plugins/wget/binaries.jsonl
deleted file mode 100644
index 96965691..00000000
--- a/archivebox/plugins/wget/binaries.jsonl
+++ /dev/null
@@ -1 +0,0 @@
-{"type": "Binary", "name": "wget", "binproviders": "apt,brew,pip,env"}
diff --git a/archivebox/plugins/wget/on_Crawl__06_wget_install.py b/archivebox/plugins/wget/on_Crawl__06_wget_install.py
deleted file mode 100755
index 3e21596f..00000000
--- a/archivebox/plugins/wget/on_Crawl__06_wget_install.py
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/usr/bin/env python3
-"""
-Validate and compute derived wget config values.
-
-This hook runs early in the Crawl lifecycle to:
-1. Validate config values with warnings (not hard errors)
-2. Compute derived values (USE_WGET from WGET_ENABLED)
-3. Check binary availability and version
-
-Output:
-    - COMPUTED:KEY=VALUE lines that hooks.py parses and adds to env
-    - Binary JSONL records to stdout when binaries are found
-"""
-
-import json
-import os
-import shutil
-import subprocess
-import sys
-
-from abx_pkg import Binary, EnvProvider
-
-
-# Read config from environment (already validated by JSONSchema)
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-def get_env_int(name: str, default: int = 0) -> int:
-    try:
-        return int(get_env(name, str(default)))
-    except ValueError:
-        return default
-
-
-def output_binary_found(binary: Binary, name: str):
-    """Output Binary JSONL record for an installed binary."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'abspath': str(binary.abspath),
-        'version': str(binary.version) if binary.version else '',
-        'sha256': binary.sha256 or '',
-        'binprovider': 'env',  # Already installed
-        'machine_id': machine_id,
-    }
-    print(json.dumps(record))
-
-
-def output_binary_missing(name: str, binproviders: str):
-    """Output Binary JSONL record for a missing binary that needs installation."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,  # Providers that can install it
-        'machine_id': machine_id,
-    }
-    print(json.dumps(record))
-
-
-def main():
-    warnings = []
-    errors = []
-    computed = {}
-
-    # Get config values
-    wget_enabled = get_env_bool('WGET_ENABLED', True)
-    wget_save_warc = get_env_bool('WGET_SAVE_WARC', True)
-    wget_timeout = get_env_int('WGET_TIMEOUT') or get_env_int('TIMEOUT', 60)
-    wget_binary = get_env('WGET_BINARY', 'wget')
-
-    # Compute derived values (USE_WGET for backward compatibility)
-    use_wget = wget_enabled
-    computed['USE_WGET'] = str(use_wget).lower()
-
-    # Validate timeout with warning (not error)
-    if use_wget and wget_timeout < 20:
-        warnings.append(
-            f"WGET_TIMEOUT={wget_timeout} is very low. "
-            "wget may fail to archive sites if set to less than ~20 seconds. "
-            "Consider setting WGET_TIMEOUT=60 or higher."
-        )
-
-    # Check binary availability using abx-pkg
-    provider = EnvProvider()
-    try:
-        binary = Binary(name=wget_binary, binproviders=[provider]).load()
-        binary_path = str(binary.abspath) if binary.abspath else ''
-    except Exception:
-        binary = None
-        binary_path = ''
-
-    if not binary_path:
-        # Binary not found
-        computed['WGET_BINARY'] = ''
-        if use_wget:
-            # Emit Binary record for installation
-            output_binary_missing(name='wget', binproviders='apt,brew')
-    else:
-        # Binary found
-        computed['WGET_BINARY'] = binary_path
-        wget_version = str(binary.version) if binary.version else 'unknown'
-        computed['WGET_VERSION'] = wget_version
-
-        # Output Binary JSONL record for installed binary
-        output_binary_found(binary, name='wget')
-
-    # Check for compression support
-    if computed.get('WGET_BINARY'):
-        try:
-            result = subprocess.run(
-                [computed['WGET_BINARY'], '--compression=auto', '--help'],
-                capture_output=True, timeout=5
-            )
-            computed['WGET_AUTO_COMPRESSION'] = 'true' if result.returncode == 0 else 'false'
-        except Exception:
-            computed['WGET_AUTO_COMPRESSION'] = 'false'
-
-    # Output results
-    # Format: KEY=VALUE lines that hooks.py will parse and add to env
-    for key, value in computed.items():
-        print(f"COMPUTED:{key}={value}")
-
-    for warning in warnings:
-        print(f"WARNING:{warning}", file=sys.stderr)
-
-    for error in errors:
-        print(f"ERROR:{error}", file=sys.stderr)
-
-    # Exit with error if any hard errors
-    sys.exit(1 if errors else 0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/wget/on_Crawl__10_wget_install.py b/archivebox/plugins/wget/on_Crawl__10_wget_install.py
new file mode 100755
index 00000000..16d95332
--- /dev/null
+++ b/archivebox/plugins/wget/on_Crawl__10_wget_install.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python3
+"""
+Emit wget Binary dependency for the crawl.
+"""
+
+import json
+import os
+import sys
+
+
+# Read config from environment (already validated by JSONSchema)
+def get_env(name: str, default: str = '') -> str:
+    return os.environ.get(name, default).strip()
+
+def get_env_bool(name: str, default: bool = False) -> bool:
+    val = get_env(name, '').lower()
+    if val in ('true', '1', 'yes', 'on'):
+        return True
+    if val in ('false', '0', 'no', 'off'):
+        return False
+    return default
+
+def get_env_int(name: str, default: int = 0) -> int:
+    try:
+        return int(get_env(name, str(default)))
+    except ValueError:
+        return default
+
+
+def output_binary(name: str, binproviders: str):
+    """Output Binary JSONL record for a dependency."""
+    machine_id = os.environ.get('MACHINE_ID', '')
+
+    record = {
+        'type': 'Binary',
+        'name': name,
+        'binproviders': binproviders,
+        'machine_id': machine_id,
+    }
+    print(json.dumps(record))
+
+
+def output_machine_config(config: dict):
+    """Output Machine config JSONL patch."""
+    if not config:
+        return
+    record = {
+        'type': 'Machine',
+        'config': config,
+    }
+    print(json.dumps(record))
+
+
+def main():
+    warnings = []
+    errors = []
+
+    # Get config values
+    wget_enabled = get_env_bool('WGET_ENABLED', True)
+    wget_save_warc = get_env_bool('WGET_SAVE_WARC', True)
+    wget_timeout = get_env_int('WGET_TIMEOUT') or get_env_int('TIMEOUT', 60)
+    wget_binary = get_env('WGET_BINARY', 'wget')
+
+    # Compute derived values (USE_WGET for backward compatibility)
+    use_wget = wget_enabled
+
+    # Validate timeout with warning (not error)
+    if use_wget and wget_timeout < 20:
+        warnings.append(
+            f"WGET_TIMEOUT={wget_timeout} is very low. "
+            "wget may fail to archive sites if set to less than ~20 seconds. "
+            "Consider setting WGET_TIMEOUT=60 or higher."
+        )
+
+    if use_wget:
+        output_binary(name='wget', binproviders='apt,brew,pip,env')
+
+    # Output computed config patch as JSONL
+    output_machine_config({
+        'USE_WGET': use_wget,
+        'WGET_BINARY': wget_binary,
+    })
+
+    for warning in warnings:
+        print(f"WARNING:{warning}", file=sys.stderr)
+
+    for error in errors:
+        print(f"ERROR:{error}", file=sys.stderr)
+
+    # Exit with error if any hard errors
+    sys.exit(1 if errors else 0)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/archivebox/plugins/wget/on_Snapshot__61_wget.py b/archivebox/plugins/wget/on_Snapshot__06_wget.bg.py
similarity index 92%
rename from archivebox/plugins/wget/on_Snapshot__61_wget.py
rename to archivebox/plugins/wget/on_Snapshot__06_wget.bg.py
index 8d4372d5..bf60ea58 100644
--- a/archivebox/plugins/wget/on_Snapshot__61_wget.py
+++ b/archivebox/plugins/wget/on_Snapshot__06_wget.bg.py
@@ -2,7 +2,7 @@
 """
 Archive a URL using wget.
 
-Usage: on_Snapshot__wget.py --url=<url> --snapshot-id=<uuid>
+Usage: on_Snapshot__06_wget.bg.py --url=<url> --snapshot-id=<uuid>
 Output: Downloads files to $PWD
 
 Environment variables:
@@ -74,7 +74,22 @@ STATICFILE_DIR = '../staticfile'
 def has_staticfile_output() -> bool:
     """Check if staticfile extractor already downloaded this URL."""
     staticfile_dir = Path(STATICFILE_DIR)
-    return staticfile_dir.exists() and any(staticfile_dir.iterdir())
+    if not staticfile_dir.exists():
+        return False
+    stdout_log = staticfile_dir / 'stdout.log'
+    if not stdout_log.exists():
+        return False
+    for line in stdout_log.read_text(errors='ignore').splitlines():
+        line = line.strip()
+        if not line.startswith('{'):
+            continue
+        try:
+            record = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+        if record.get('type') == 'ArchiveResult' and record.get('status') == 'succeeded':
+            return True
+    return False
 
 
 
diff --git a/archivebox/plugins/wget/templates/icon.html b/archivebox/plugins/wget/templates/icon.html
index fdf8df21..430432cf 100644
--- a/archivebox/plugins/wget/templates/icon.html
+++ b/archivebox/plugins/wget/templates/icon.html
@@ -1 +1 @@
-📥
\ No newline at end of file
+<span class="abx-output-icon abx-output-icon--wget" title="Wget"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M12 4v10"/><path d="M8 10l4 4 4-4"/><path d="M4 20h16"/></svg></span>
diff --git a/archivebox/plugins/wget/tests/test_wget.py b/archivebox/plugins/wget/tests/test_wget.py
index 4d891904..52c1fc55 100644
--- a/archivebox/plugins/wget/tests/test_wget.py
+++ b/archivebox/plugins/wget/tests/test_wget.py
@@ -300,7 +300,7 @@ def test_staticfile_present_skips():
         #   wget/         <- wget extractor runs here, looks for ../staticfile
         staticfile_dir = tmpdir / 'staticfile'
         staticfile_dir.mkdir()
-        (staticfile_dir / 'index.html').write_text('<html>test</html>')
+        (staticfile_dir / 'stdout.log').write_text('{"type":"ArchiveResult","status":"succeeded","output_str":"index.html"}\n')
 
         wget_dir = tmpdir / 'wget'
         wget_dir.mkdir()
diff --git a/archivebox/plugins/ytdlp/binaries.jsonl b/archivebox/plugins/ytdlp/binaries.jsonl
deleted file mode 100644
index 05240fd2..00000000
--- a/archivebox/plugins/ytdlp/binaries.jsonl
+++ /dev/null
@@ -1,3 +0,0 @@
-{"type": "Binary", "name": "yt-dlp", "binproviders": "pip,brew,apt,env", "overrides": {"pip": {"packages": "yt-dlp[default]"}}}
-{"type": "Binary", "name": "node", "binproviders": "apt,brew,env", "overrides": {"apt": {"packages": ["nodejs"]}}}
-{"type": "Binary", "name": "ffmpeg", "binproviders": "apt,brew,env"}
diff --git a/archivebox/plugins/ytdlp/on_Crawl__07_ytdlp_install.py b/archivebox/plugins/ytdlp/on_Crawl__07_ytdlp_install.py
deleted file mode 100755
index 212d21bb..00000000
--- a/archivebox/plugins/ytdlp/on_Crawl__07_ytdlp_install.py
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/usr/bin/env python3
-"""
-Detect yt-dlp binary and emit Binary JSONL record.
-
-Output: Binary JSONL record to stdout if yt-dlp is found
-"""
-
-import json
-import os
-import sys
-
-from abx_pkg import Binary, EnvProvider
-
-
-def get_env(name: str, default: str = '') -> str:
-    return os.environ.get(name, default).strip()
-
-def get_env_bool(name: str, default: bool = False) -> bool:
-    val = get_env(name, '').lower()
-    if val in ('true', '1', 'yes', 'on'):
-        return True
-    if val in ('false', '0', 'no', 'off'):
-        return False
-    return default
-
-
-def output_binary_found(binary: Binary, name: str):
-    """Output Binary JSONL record for an installed binary."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'abspath': str(binary.abspath),
-        'version': str(binary.version) if binary.version else '',
-        'sha256': binary.sha256 or '',
-        'binprovider': 'env',  # Already installed
-        'machine_id': machine_id,
-    }
-    print(json.dumps(record))
-
-
-def output_binary_missing(name: str, binproviders: str):
-    """Output Binary JSONL record for a missing binary that needs installation."""
-    machine_id = os.environ.get('MACHINE_ID', '')
-
-    record = {
-        'type': 'Binary',
-        'name': name,
-        'binproviders': binproviders,  # Providers that can install it
-        'machine_id': machine_id,
-    }
-    print(json.dumps(record))
-
-
-def main():
-    ytdlp_enabled = get_env_bool('YTDLP_ENABLED', True)
-    ytdlp_binary = get_env('YTDLP_BINARY', 'yt-dlp')
-
-    if not ytdlp_enabled:
-        sys.exit(0)
-
-    provider = EnvProvider()
-    try:
-        binary = Binary(name=ytdlp_binary, binproviders=[provider]).load()
-        if binary.abspath:
-            # Binary found
-            output_binary_found(binary, name='yt-dlp')
-        else:
-            # Binary not found
-            output_binary_missing(name='yt-dlp', binproviders='pip,brew,apt')
-    except Exception:
-        # Binary not found
-        output_binary_missing(name='yt-dlp', binproviders='pip,brew,apt')
-
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/archivebox/plugins/ytdlp/on_Crawl__15_ytdlp_install.py b/archivebox/plugins/ytdlp/on_Crawl__15_ytdlp_install.py
new file mode 100755
index 00000000..7b81b5d9
--- /dev/null
+++ b/archivebox/plugins/ytdlp/on_Crawl__15_ytdlp_install.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python3
+"""
+Emit yt-dlp (and related) Binary dependencies for the crawl.
+"""
+
+import json
+import os
+import sys
+
+
+def get_env(name: str, default: str = '') -> str:
+    return os.environ.get(name, default).strip()
+
+def get_env_bool(name: str, default: bool = False) -> bool:
+    val = get_env(name, '').lower()
+    if val in ('true', '1', 'yes', 'on'):
+        return True
+    if val in ('false', '0', 'no', 'off'):
+        return False
+    return default
+
+
+def output_binary(name: str, binproviders: str, overrides: dict | None = None):
+    """Output Binary JSONL record for a dependency."""
+    machine_id = os.environ.get('MACHINE_ID', '')
+
+    record = {
+        'type': 'Binary',
+        'name': name,
+        'binproviders': binproviders,
+        'machine_id': machine_id,
+    }
+    if overrides:
+        record['overrides'] = overrides
+    print(json.dumps(record))
+
+
+def main():
+    ytdlp_enabled = get_env_bool('YTDLP_ENABLED', True)
+
+    if not ytdlp_enabled:
+        sys.exit(0)
+
+    output_binary(
+        name='yt-dlp',
+        binproviders='pip,brew,apt,env',
+        overrides={'pip': {'packages': ['yt-dlp[default]']}},
+    )
+
+    # Node.js (required by several JS-based extractors, declared here per legacy binaries.jsonl)
+    output_binary(
+        name='node',
+        binproviders='apt,brew,env',
+        overrides={'apt': {'packages': ['nodejs']}},
+    )
+
+    # ffmpeg (used by media extraction)
+    output_binary(name='ffmpeg', binproviders='apt,brew,env')
+
+    sys.exit(0)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/archivebox/plugins/ytdlp/on_Snapshot__63_ytdlp.bg.py b/archivebox/plugins/ytdlp/on_Snapshot__02_ytdlp.bg.py
similarity index 81%
rename from archivebox/plugins/ytdlp/on_Snapshot__63_ytdlp.bg.py
rename to archivebox/plugins/ytdlp/on_Snapshot__02_ytdlp.bg.py
index d8faae21..633765ef 100644
--- a/archivebox/plugins/ytdlp/on_Snapshot__63_ytdlp.bg.py
+++ b/archivebox/plugins/ytdlp/on_Snapshot__02_ytdlp.bg.py
@@ -2,7 +2,7 @@
 """
 Download video/audio from a URL using yt-dlp.
 
-Usage: on_Snapshot__ytdlp.py --url=<url> --snapshot-id=<uuid>
+Usage: on_Snapshot__02_ytdlp.bg.py --url=<url> --snapshot-id=<uuid>
 Output: Downloads video/audio files to $PWD
 
 Environment variables:
@@ -21,6 +21,7 @@ import json
 import os
 import subprocess
 import sys
+import threading
 from pathlib import Path
 
 import rich_click as click
@@ -67,7 +68,22 @@ STATICFILE_DIR = '../staticfile'
 def has_staticfile_output() -> bool:
     """Check if staticfile extractor already downloaded this URL."""
     staticfile_dir = Path(STATICFILE_DIR)
-    return staticfile_dir.exists() and any(staticfile_dir.iterdir())
+    if not staticfile_dir.exists():
+        return False
+    stdout_log = staticfile_dir / 'stdout.log'
+    if not stdout_log.exists():
+        return False
+    for line in stdout_log.read_text(errors='ignore').splitlines():
+        line = line.strip()
+        if not line.startswith('{'):
+            continue
+        try:
+            record = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+        if record.get('type') == 'ArchiveResult' and record.get('status') == 'succeeded':
+            return True
+    return False
 
 
 def save_ytdlp(url: str, binary: str) -> tuple[bool, str | None, str]:
@@ -106,10 +122,42 @@ def save_ytdlp(url: str, binary: str) -> tuple[bool, str | None, str]:
     if ytdlp_args_extra:
         cmd.extend(ytdlp_args_extra)
 
+    if '--newline' not in cmd:
+        cmd.append('--newline')
+
     cmd.append(url)
 
     try:
-        result = subprocess.run(cmd, capture_output=True, timeout=timeout, text=True)
+        print(f'[ytdlp] Starting download (timeout={timeout}s)', file=sys.stderr)
+
+        output_lines: list[str] = []
+        process = subprocess.Popen(
+            cmd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+            bufsize=1,
+        )
+
+        def _read_output() -> None:
+            if not process.stdout:
+                return
+            for line in process.stdout:
+                output_lines.append(line)
+                sys.stderr.write(line)
+
+        reader = threading.Thread(target=_read_output, daemon=True)
+        reader.start()
+
+        try:
+            process.wait(timeout=timeout)
+        except subprocess.TimeoutExpired:
+            process.kill()
+            reader.join(timeout=1)
+            return False, None, f'Timed out after {timeout} seconds'
+
+        reader.join(timeout=1)
+        combined_output = ''.join(output_lines)
 
         # Check if any media files were downloaded
         media_extensions = (
@@ -134,7 +182,7 @@ def save_ytdlp(url: str, binary: str) -> tuple[bool, str | None, str]:
             output = str(video_audio[0]) if video_audio else str(downloaded_files[0])
             return True, output, ''
         else:
-            stderr = result.stderr
+            stderr = combined_output
 
             # These are NOT errors - page simply has no downloadable media
             # Return success with no output (legitimate "nothing to download")
@@ -142,7 +190,7 @@ def save_ytdlp(url: str, binary: str) -> tuple[bool, str | None, str]:
                 return True, None, ''  # Not a media site - success, no output
             if 'URL could be a direct video link' in stderr:
                 return True, None, ''  # Not a supported media URL - success, no output
-            if result.returncode == 0:
+            if process.returncode == 0:
                 return True, None, ''  # yt-dlp exited cleanly, just no media - success
 
             # These ARE errors - something went wrong
diff --git a/archivebox/plugins/ytdlp/templates/icon.html b/archivebox/plugins/ytdlp/templates/icon.html
index b17d15b8..bf0e4ee4 100644
--- a/archivebox/plugins/ytdlp/templates/icon.html
+++ b/archivebox/plugins/ytdlp/templates/icon.html
@@ -1 +1 @@
-🎬
\ No newline at end of file
+<span class="abx-output-icon abx-output-icon--ytdlp" title="Video"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><rect x="3" y="6" width="18" height="12" rx="2"/><path d="M10 9l5 3-5 3z"/></svg></span>
diff --git a/archivebox/templates/core/snapshot_live.html b/archivebox/templates/core/snapshot_live.html
index ccecaef4..1de0c2db 100644
--- a/archivebox/templates/core/snapshot_live.html
+++ b/archivebox/templates/core/snapshot_live.html
@@ -439,13 +439,13 @@
                             <div class="card {% if forloop.first %}selected-card{% endif %}">
                                 <div class="card-body">
                                     <a href="{{result.path|urlencode}}" target="preview" title="./{{result.path}} (downloaded {{result.ts}})">
-                                        <h4>{% extractor_icon result.name %} {{result.name|extractor_name|truncatechars:20}} <small>({{result.size|filesizeformat}})</small></h4>
+                                        <h4>{% plugin_icon result.name %} {{result.name|plugin_name|truncatechars:20}} <small>({{result.size|filesizeformat}})</small></h4>
                                     </a>
                                 </div>
                                 {% if result.result %}
                                     {# Use plugin-specific thumbnail template when ArchiveResult is available #}
                                     <div class="card-img-top thumbnail-wrapper">
-                                        {% extractor_thumbnail result.result %}
+                                        {% plugin_thumbnail result.result %}
                                     </div>
                                 {% else %}
                                     {# Fall back to generic iframe for filesystem-discovered files #}
@@ -476,7 +476,7 @@
         {% if best_result.result %}
             {# Use plugin-specific fullscreen template when ArchiveResult is available #}
             <div id="main-frame-wrapper" class="full-page-wrapper">
-                {% extractor_fullscreen best_result.result %}
+                {% plugin_fullscreen best_result.result %}
             </div>
         {% else %}
             {# Fall back to generic iframe #}
diff --git a/archivebox/templates/static/admin.css b/archivebox/templates/static/admin.css
index 63bf87b2..0afdfe72 100755
--- a/archivebox/templates/static/admin.css
+++ b/archivebox/templates/static/admin.css
@@ -403,6 +403,38 @@ body.model-snapshot.change-list #content .object-tools {
     margin-top: 1px;
 }
 
+.files-icons {
+    display: inline-flex;
+    flex-wrap: wrap;
+    gap: 4px;
+    vertical-align: middle;
+}
+
+.files-icons a {
+    display: inline-flex;
+    align-items: center;
+    justify-content: center;
+    text-decoration: none;
+}
+
+.files-icons .abx-output-icon {
+    width: 18px;
+    height: 18px;
+    display: inline-flex;
+    align-items: center;
+    justify-content: center;
+    border-radius: 4px;
+    color: #1f2937;
+    background: rgba(15, 23, 42, 0.08);
+    box-shadow: inset 0 0 0 1px rgba(15, 23, 42, 0.08);
+}
+
+.files-icons .abx-output-icon svg {
+    width: 14px;
+    height: 14px;
+    display: block;
+}
+
 .exists-False {
     opacity: 0.1;
     filter: grayscale(100%);
diff --git a/archivebox/tests/conftest.py b/archivebox/tests/conftest.py
index ff6f1875..ed2e5316 100644
--- a/archivebox/tests/conftest.py
+++ b/archivebox/tests/conftest.py
@@ -2,7 +2,6 @@
 
 import os
 import sys
-import json
 import subprocess
 from pathlib import Path
 from typing import List, Dict, Any, Optional, Tuple
@@ -110,16 +109,9 @@ def initialized_archive(isolated_data_dir):
 # =============================================================================
 
 def parse_jsonl_output(stdout: str) -> List[Dict[str, Any]]:
-    """Parse JSONL output into list of dicts."""
-    records = []
-    for line in stdout.strip().split('\n'):
-        line = line.strip()
-        if line and line.startswith('{'):
-            try:
-                records.append(json.loads(line))
-            except json.JSONDecodeError:
-                pass
-    return records
+    """Parse JSONL output into list of dicts via Process parser."""
+    from archivebox.machine.models import Process
+    return Process.parse_records_from_text(stdout or '')
 
 
 def assert_jsonl_contains_type(stdout: str, record_type: str, min_count: int = 1):
diff --git a/archivebox/tests/test_cli_add_interrupt.py b/archivebox/tests/test_cli_add_interrupt.py
new file mode 100644
index 00000000..a9343391
--- /dev/null
+++ b/archivebox/tests/test_cli_add_interrupt.py
@@ -0,0 +1,133 @@
+import os
+import signal
+import sqlite3
+import subprocess
+import sys
+import time
+from pathlib import Path
+
+
+def _run(cmd, data_dir: Path, env: dict, timeout: int = 120):
+    return subprocess.run(
+        cmd,
+        cwd=data_dir,
+        env=env,
+        capture_output=True,
+        text=True,
+        timeout=timeout,
+    )
+
+
+def _make_env(data_dir: Path) -> dict:
+    env = os.environ.copy()
+    env["DATA_DIR"] = str(data_dir)
+    env["USE_COLOR"] = "False"
+    env["SHOW_PROGRESS"] = "False"
+    env["ARCHIVEBOX_ALLOW_NO_UNIX_SOCKETS"] = "true"
+    env["PLUGINS"] = "title,favicon"
+    # Keep it fast but still real hooks
+    env["SAVE_TITLE"] = "True"
+    env["SAVE_FAVICON"] = "True"
+    env["SAVE_WGET"] = "False"
+    env["SAVE_WARC"] = "False"
+    env["SAVE_PDF"] = "False"
+    env["SAVE_SCREENSHOT"] = "False"
+    env["SAVE_DOM"] = "False"
+    env["SAVE_SINGLEFILE"] = "False"
+    env["SAVE_READABILITY"] = "False"
+    env["SAVE_MERCURY"] = "False"
+    env["SAVE_GIT"] = "False"
+    env["SAVE_YTDLP"] = "False"
+    env["SAVE_HEADERS"] = "False"
+    env["SAVE_HTMLTOTEXT"] = "False"
+    return env
+
+
+def _count_running_processes(db_path: Path, where: str) -> int:
+    for _ in range(50):
+        try:
+            conn = sqlite3.connect(db_path, timeout=1)
+            cur = conn.cursor()
+            count = cur.execute(
+                f"SELECT COUNT(*) FROM machine_process WHERE status = 'running' AND {where}"
+            ).fetchone()[0]
+            conn.close()
+            return count
+        except sqlite3.OperationalError:
+            time.sleep(0.1)
+    return 0
+
+
+def _wait_for_count(db_path: Path, where: str, target: int, timeout: int = 20) -> bool:
+    start = time.time()
+    while time.time() - start < timeout:
+        if _count_running_processes(db_path, where) >= target:
+            return True
+        time.sleep(0.1)
+    return False
+
+
+def test_add_parents_workers_to_orchestrator(tmp_path):
+    data_dir = tmp_path / "data"
+    data_dir.mkdir()
+    env = _make_env(data_dir)
+
+    init = _run([sys.executable, "-m", "archivebox", "init", "--quick"], data_dir, env)
+    assert init.returncode == 0, init.stderr
+
+    add = _run([sys.executable, "-m", "archivebox", "add", "https://example.com"], data_dir, env, timeout=120)
+    assert add.returncode == 0, add.stderr
+
+    conn = sqlite3.connect(data_dir / "index.sqlite3")
+    cur = conn.cursor()
+    orchestrator = cur.execute(
+        "SELECT id FROM machine_process WHERE process_type = 'orchestrator' ORDER BY created_at DESC LIMIT 1"
+    ).fetchone()
+    assert orchestrator is not None
+    orchestrator_id = orchestrator[0]
+
+    worker_count = cur.execute(
+        "SELECT COUNT(*) FROM machine_process WHERE process_type = 'worker' AND worker_type = 'crawl' "
+        "AND parent_id = ?",
+        (orchestrator_id,),
+    ).fetchone()[0]
+    conn.close()
+
+    assert worker_count >= 1, "Expected crawl worker to be parented to orchestrator"
+
+
+def test_add_interrupt_cleans_orphaned_processes(tmp_path):
+    data_dir = tmp_path / "data"
+    data_dir.mkdir()
+    env = _make_env(data_dir)
+
+    init = _run([sys.executable, "-m", "archivebox", "init", "--quick"], data_dir, env)
+    assert init.returncode == 0, init.stderr
+
+    proc = subprocess.Popen(
+        [sys.executable, "-m", "archivebox", "add", "https://example.com"],
+        cwd=data_dir,
+        env=env,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+    )
+
+    db_path = data_dir / "index.sqlite3"
+    saw_worker = _wait_for_count(db_path, "process_type = 'worker'", 1, timeout=20)
+    assert saw_worker, "Expected at least one worker to start before interrupt"
+
+    proc.send_signal(signal.SIGINT)
+    proc.wait(timeout=30)
+
+    # Wait for workers/hooks to be cleaned up
+    start = time.time()
+    while time.time() - start < 30:
+        running = _count_running_processes(db_path, "process_type IN ('worker','hook')")
+        if running == 0:
+            break
+        time.sleep(0.2)
+
+    assert _count_running_processes(db_path, "process_type IN ('worker','hook')") == 0, (
+        "Expected no running worker/hook processes after interrupt"
+    )
diff --git a/archivebox/tests/test_hooks.py b/archivebox/tests/test_hooks.py
index 54ac210a..308633ba 100755
--- a/archivebox/tests/test_hooks.py
+++ b/archivebox/tests/test_hooks.py
@@ -68,17 +68,8 @@ class TestJSONLParsing(unittest.TestCase):
     def test_parse_clean_jsonl(self):
         """Clean JSONL format should be parsed correctly."""
         stdout = '{"type": "ArchiveResult", "status": "succeeded", "output_str": "Done"}'
-        records = []
-        for line in stdout.splitlines():
-            line = line.strip()
-            if not line or not line.startswith('{'):
-                continue
-            try:
-                data = json.loads(line)
-                if 'type' in data:
-                    records.append(data)
-            except json.JSONDecodeError:
-                pass
+        from archivebox.machine.models import Process
+        records = Process.parse_records_from_text(stdout)
 
         self.assertEqual(len(records), 1)
         self.assertEqual(records[0]['type'], 'ArchiveResult')
@@ -89,17 +80,8 @@ class TestJSONLParsing(unittest.TestCase):
         """Multiple JSONL records should all be parsed."""
         stdout = '''{"type": "ArchiveResult", "status": "succeeded", "output_str": "Done"}
 {"type": "Binary", "name": "wget", "abspath": "/usr/bin/wget"}'''
-        records = []
-        for line in stdout.splitlines():
-            line = line.strip()
-            if not line or not line.startswith('{'):
-                continue
-            try:
-                data = json.loads(line)
-                if 'type' in data:
-                    records.append(data)
-            except json.JSONDecodeError:
-                pass
+        from archivebox.machine.models import Process
+        records = Process.parse_records_from_text(stdout)
 
         self.assertEqual(len(records), 2)
         self.assertEqual(records[0]['type'], 'ArchiveResult')
@@ -111,59 +93,20 @@ class TestJSONLParsing(unittest.TestCase):
 Processing URL: https://example.com
 {"type": "ArchiveResult", "status": "succeeded", "output_str": "Downloaded"}
 Hook completed successfully'''
-        records = []
-        for line in stdout.splitlines():
-            line = line.strip()
-            if not line or not line.startswith('{'):
-                continue
-            try:
-                data = json.loads(line)
-                if 'type' in data:
-                    records.append(data)
-            except json.JSONDecodeError:
-                pass
+        from archivebox.machine.models import Process
+        records = Process.parse_records_from_text(stdout)
 
         self.assertEqual(len(records), 1)
         self.assertEqual(records[0]['status'], 'succeeded')
 
-    def test_parse_legacy_result_json_format(self):
-        """Legacy RESULT_JSON= format should be parsed for backwards compat."""
-        stdout = 'RESULT_JSON={"status": "succeeded", "output": "Done"}'
-        output_json = None
-        records = []
-        for line in stdout.splitlines():
-            line = line.strip()
-            if line.startswith('RESULT_JSON='):
-                try:
-                    data = json.loads(line[len('RESULT_JSON='):])
-                    if output_json is None:
-                        output_json = data
-                    data['type'] = 'ArchiveResult'
-                    records.append(data)
-                except json.JSONDecodeError:
-                    pass
-
-        self.assertEqual(len(records), 1)
-        self.assertEqual(records[0]['type'], 'ArchiveResult')
-        self.assertEqual(records[0]['status'], 'succeeded')
-
     def test_ignore_invalid_json(self):
         """Invalid JSON should be silently ignored."""
         stdout = '''{"type": "ArchiveResult", "status": "succeeded"}
 {invalid json here}
 not json at all
 {"type": "Binary", "name": "wget"}'''
-        records = []
-        for line in stdout.splitlines():
-            line = line.strip()
-            if not line or not line.startswith('{'):
-                continue
-            try:
-                data = json.loads(line)
-                if 'type' in data:
-                    records.append(data)
-            except json.JSONDecodeError:
-                pass
+        from archivebox.machine.models import Process
+        records = Process.parse_records_from_text(stdout)
 
         self.assertEqual(len(records), 2)
 
@@ -171,17 +114,8 @@ not json at all
         """JSON objects without 'type' field should be ignored."""
         stdout = '''{"status": "succeeded", "output_str": "Done"}
 {"type": "ArchiveResult", "status": "succeeded"}'''
-        records = []
-        for line in stdout.splitlines():
-            line = line.strip()
-            if not line or not line.startswith('{'):
-                continue
-            try:
-                data = json.loads(line)
-                if 'type' in data:
-                    records.append(data)
-            except json.JSONDecodeError:
-                pass
+        from archivebox.machine.models import Process
+        records = Process.parse_records_from_text(stdout)
 
         self.assertEqual(len(records), 1)
         self.assertEqual(records[0]['type'], 'ArchiveResult')
@@ -250,9 +184,9 @@ class TestHookDiscovery(unittest.TestCase):
         (wget_dir / 'on_Snapshot__50_wget.py').write_text('# test hook')
         (wget_dir / 'on_Crawl__00_install_wget.py').write_text('# install hook')
 
-        chrome_dir = self.plugins_dir / 'chrome_session'
+        chrome_dir = self.plugins_dir / 'chrome'
         chrome_dir.mkdir()
-        (chrome_dir / 'on_Snapshot__20_chrome_session.bg.js').write_text('// background hook')
+        (chrome_dir / 'on_Snapshot__20_chrome_tab.bg.js').write_text('// background hook')
 
         consolelog_dir = self.plugins_dir / 'consolelog'
         consolelog_dir.mkdir()
@@ -274,7 +208,7 @@ class TestHookDiscovery(unittest.TestCase):
 
         self.assertEqual(len(hooks), 3)
         hook_names = [h.name for h in hooks]
-        self.assertIn('on_Snapshot__20_chrome_session.bg.js', hook_names)
+        self.assertIn('on_Snapshot__20_chrome_tab.bg.js', hook_names)
         self.assertIn('on_Snapshot__21_consolelog.bg.js', hook_names)
         self.assertIn('on_Snapshot__50_wget.py', hook_names)
 
@@ -288,7 +222,7 @@ class TestHookDiscovery(unittest.TestCase):
         hooks = sorted(set(hooks), key=lambda p: p.name)
 
         # Check numeric ordering
-        self.assertEqual(hooks[0].name, 'on_Snapshot__20_chrome_session.js')
+        self.assertEqual(hooks[0].name, 'on_Snapshot__20_chrome_tab.bg.js')
         self.assertEqual(hooks[1].name, 'on_Snapshot__21_consolelog.bg.js')
         self.assertEqual(hooks[2].name, 'on_Snapshot__50_wget.py')
 
@@ -348,9 +282,11 @@ print(json.dumps({"type": "ArchiveResult", "status": "succeeded", "output_str":
         )
 
         self.assertEqual(result.returncode, 0)
-        output = json.loads(result.stdout.strip())
-        self.assertEqual(output['type'], 'ArchiveResult')
-        self.assertEqual(output['status'], 'succeeded')
+        from archivebox.machine.models import Process
+        records = Process.parse_records_from_text(result.stdout)
+        self.assertTrue(records)
+        self.assertEqual(records[0]['type'], 'ArchiveResult')
+        self.assertEqual(records[0]['status'], 'succeeded')
 
     def test_js_hook_execution(self):
         """JavaScript hook should execute and output JSONL."""
@@ -371,9 +307,11 @@ console.log(JSON.stringify({type: 'ArchiveResult', status: 'succeeded', output_s
         )
 
         self.assertEqual(result.returncode, 0)
-        output = json.loads(result.stdout.strip())
-        self.assertEqual(output['type'], 'ArchiveResult')
-        self.assertEqual(output['status'], 'succeeded')
+        from archivebox.machine.models import Process
+        records = Process.parse_records_from_text(result.stdout)
+        self.assertTrue(records)
+        self.assertEqual(records[0]['type'], 'ArchiveResult')
+        self.assertEqual(records[0]['status'], 'succeeded')
 
     def test_hook_receives_cli_args(self):
         """Hook should receive CLI arguments."""
@@ -398,8 +336,10 @@ print(json.dumps({"type": "ArchiveResult", "status": "succeeded", "url": args.ge
         )
 
         self.assertEqual(result.returncode, 0)
-        output = json.loads(result.stdout.strip())
-        self.assertEqual(output['url'], 'https://example.com')
+        from archivebox.machine.models import Process
+        records = Process.parse_records_from_text(result.stdout)
+        self.assertTrue(records)
+        self.assertEqual(records[0]['url'], 'https://example.com')
 
 
 class TestInstallHookOutput(unittest.TestCase):
@@ -424,7 +364,8 @@ class TestInstallHookOutput(unittest.TestCase):
             'binprovider': 'apt',
         })
 
-        data = json.loads(hook_output)
+        from archivebox.machine.models import Process
+        data = Process.parse_records_from_text(hook_output)[0]
         self.assertEqual(data['type'], 'Binary')
         self.assertEqual(data['name'], 'wget')
         self.assertTrue(data['abspath'].startswith('/'))
@@ -433,15 +374,16 @@ class TestInstallHookOutput(unittest.TestCase):
         """Install hook should output Machine config update JSONL."""
         hook_output = json.dumps({
             'type': 'Machine',
-            '_method': 'update',
-            'key': 'config/WGET_BINARY',
-            'value': '/usr/bin/wget',
+            'config': {
+                'WGET_BINARY': '/usr/bin/wget',
+            },
         })
 
-        data = json.loads(hook_output)
+        from archivebox.machine.models import Process
+        data = Process.parse_records_from_text(hook_output)[0]
         self.assertEqual(data['type'], 'Machine')
-        self.assertEqual(data['_method'], 'update')
-        self.assertEqual(data['key'], 'config/WGET_BINARY')
+        self.assertIn('config', data)
+        self.assertEqual(data['config']['WGET_BINARY'], '/usr/bin/wget')
 
 
 class TestSnapshotHookOutput(unittest.TestCase):
@@ -455,7 +397,8 @@ class TestSnapshotHookOutput(unittest.TestCase):
             'output_str': 'Downloaded 5 files',
         })
 
-        data = json.loads(hook_output)
+        from archivebox.machine.models import Process
+        data = Process.parse_records_from_text(hook_output)[0]
         self.assertEqual(data['type'], 'ArchiveResult')
         self.assertEqual(data['status'], 'succeeded')
         self.assertIn('output_str', data)
@@ -469,7 +412,8 @@ class TestSnapshotHookOutput(unittest.TestCase):
             'cmd': ['/usr/bin/wget', '-p', '-k', 'https://example.com'],
         })
 
-        data = json.loads(hook_output)
+        from archivebox.machine.models import Process
+        data = Process.parse_records_from_text(hook_output)[0]
         self.assertEqual(data['type'], 'ArchiveResult')
         self.assertIsInstance(data['cmd'], list)
         self.assertEqual(data['cmd'][0], '/usr/bin/wget')
@@ -487,7 +431,8 @@ class TestSnapshotHookOutput(unittest.TestCase):
             },
         })
 
-        data = json.loads(hook_output)
+        from archivebox.machine.models import Process
+        data = Process.parse_records_from_text(hook_output)[0]
         self.assertEqual(data['type'], 'ArchiveResult')
         self.assertIsInstance(data['output_json'], dict)
         self.assertEqual(data['output_json']['status-code'], 200)
@@ -500,7 +445,8 @@ class TestSnapshotHookOutput(unittest.TestCase):
             'output_str': 'SAVE_WGET=False',
         })
 
-        data = json.loads(hook_output)
+        from archivebox.machine.models import Process
+        data = Process.parse_records_from_text(hook_output)[0]
         self.assertEqual(data['status'], 'skipped')
 
     def test_snapshot_hook_failed_status(self):
@@ -511,7 +457,8 @@ class TestSnapshotHookOutput(unittest.TestCase):
             'output_str': '404 Not Found',
         })
 
-        data = json.loads(hook_output)
+        from archivebox.machine.models import Process
+        data = Process.parse_records_from_text(hook_output)[0]
         self.assertEqual(data['status'], 'failed')
 
 
diff --git a/archivebox/tests/test_list.py b/archivebox/tests/test_list.py
index b46596fa..d527fa5d 100644
--- a/archivebox/tests/test_list.py
+++ b/archivebox/tests/test_list.py
@@ -18,11 +18,10 @@ def test_search_json(process, disable_extractors_dict):
         clean_str = re.sub(r'\x1b\[[0-9;]*m', '', output_str)
         clean_str = re.sub(r'[\x00-\x1f\x7f]', lambda m: ' ' if m.group(0) in '\t\n\r' else '', clean_str)
         output_json = json.loads(clean_str)
-    # With --index-only, only source file snapshots are created (file:// URLs)
     # Verify we get at least one snapshot back
     assert len(output_json) >= 1
-    # The snapshot should be a file:// URL pointing to sources
-    assert any("sources" in entry.get("url", "") for entry in output_json)
+    # Should include the requested URL
+    assert any("example.com" in entry.get("url", "") for entry in output_json)
 
 
 def test_search_json_headers(process, disable_extractors_dict):
@@ -65,16 +64,17 @@ def test_search_csv(process, disable_extractors_dict):
                                   capture_output=True, env=disable_extractors_dict)
     search_process = subprocess.run(["archivebox", "search", "--csv", "url"], capture_output=True)
     output_csv = search_process.stdout.decode("utf-8")
-    # Should contain the source file URL
-    assert "file://" in output_csv or "sources" in output_csv
+    # Should contain the requested URL
+    assert "example.com" in output_csv
 
 def test_search_csv_headers(process, disable_extractors_dict):
     subprocess.run(["archivebox", "add", "--index-only", "https://example.com", "--depth=0"],
                                   capture_output=True, env=disable_extractors_dict)
     search_process = subprocess.run(["archivebox", "search", "--csv", "url", "--with-headers"], capture_output=True)
     output_csv = search_process.stdout.decode("utf-8")
-    # Should have url header and source file content
+    # Should have url header and requested URL
     assert "url" in output_csv
+    assert "example.com" in output_csv
 
 def test_search_with_headers_requires_format(process):
     search_process = subprocess.run(["archivebox", "search", "--with-headers"], capture_output=True)
diff --git a/archivebox/tests/test_real_world_add.py b/archivebox/tests/test_real_world_add.py
new file mode 100644
index 00000000..3c72e622
--- /dev/null
+++ b/archivebox/tests/test_real_world_add.py
@@ -0,0 +1,133 @@
+import os
+import sqlite3
+import subprocess
+from pathlib import Path
+
+
+def _find_snapshot_dir(data_dir: Path, snapshot_id: str) -> Path | None:
+    candidates = {snapshot_id}
+    if len(snapshot_id) == 32:
+        hyphenated = f"{snapshot_id[:8]}-{snapshot_id[8:12]}-{snapshot_id[12:16]}-{snapshot_id[16:20]}-{snapshot_id[20:]}"
+        candidates.add(hyphenated)
+    elif len(snapshot_id) == 36 and '-' in snapshot_id:
+        candidates.add(snapshot_id.replace('-', ''))
+
+    for needle in candidates:
+        for path in data_dir.rglob(needle):
+            if path.is_dir():
+                return path
+    return None
+
+
+def _find_html_with_text(root: Path, needle: str) -> list[Path]:
+    hits: list[Path] = []
+    for path in root.rglob("*.htm*"):
+        if not path.is_file():
+            continue
+        try:
+            if needle in path.read_text(errors="ignore"):
+                hits.append(path)
+        except Exception:
+            continue
+    return hits
+
+
+def test_add_real_world_example_domain(tmp_path):
+    os.chdir(tmp_path)
+    tmp_short = Path("/tmp") / f"abx-{tmp_path.name}"
+    tmp_short.mkdir(parents=True, exist_ok=True)
+    env = os.environ.copy()
+    env["TMP_DIR"] = str(tmp_short)
+    env["ARCHIVEBOX_ALLOW_NO_UNIX_SOCKETS"] = "true"
+
+    init = subprocess.run(
+        ["archivebox", "init"],
+        capture_output=True,
+        text=True,
+        timeout=120,
+        env=env,
+    )
+    assert init.returncode == 0, f"archivebox init failed: {init.stderr}"
+
+    result = subprocess.run(
+        ["archivebox", "add", "https://example.com"],
+        capture_output=True,
+        text=True,
+        timeout=900,
+        env=env,
+    )
+    assert result.returncode == 0, (
+        "archivebox add failed.\n"
+        f"stdout:\n{result.stdout}\n"
+        f"stderr:\n{result.stderr}"
+    )
+
+    conn = sqlite3.connect(tmp_path / "index.sqlite3")
+    c = conn.cursor()
+    snapshot_row = c.execute(
+        "SELECT id, url, title FROM core_snapshot WHERE url = ?",
+        ("https://example.com",),
+    ).fetchone()
+    assert snapshot_row is not None, "Snapshot for https://example.com not found in DB"
+    snapshot_id, snapshot_url, snapshot_title = snapshot_row
+    assert snapshot_title and "Example Domain" in snapshot_title, (
+        f"Expected title to contain Example Domain, got: {snapshot_title}"
+    )
+
+    failed_results = c.execute(
+        "SELECT COUNT(*) FROM core_archiveresult WHERE snapshot_id = ? AND status = 'failed'",
+        (snapshot_id,),
+    ).fetchone()[0]
+    assert failed_results == 0, "Some archive results failed for example.com snapshot"
+
+    binary_workers = c.execute(
+        "SELECT COUNT(*) FROM machine_process WHERE process_type = 'worker' AND worker_type = 'binary'"
+    ).fetchone()[0]
+    assert binary_workers > 0, "Expected BinaryWorker to run installs via BinaryMachine"
+
+    failed_binary_workers = c.execute(
+        "SELECT COUNT(*) FROM machine_process WHERE process_type = 'worker' AND worker_type = 'binary' "
+        "AND exit_code IS NOT NULL AND exit_code != 0"
+    ).fetchone()[0]
+    assert failed_binary_workers == 0, "BinaryWorker reported non-zero exit codes"
+
+    queued_binaries = c.execute(
+        "SELECT name FROM machine_binary WHERE status != 'installed'"
+    ).fetchall()
+    assert not queued_binaries, f"Some binaries did not install: {queued_binaries}"
+    conn.close()
+
+    snapshot_dir = _find_snapshot_dir(tmp_path, str(snapshot_id))
+    assert snapshot_dir is not None, "Snapshot output directory not found"
+
+    title_path = snapshot_dir / "title" / "title.txt"
+    assert title_path.exists(), f"Missing title output: {title_path}"
+    assert "Example Domain" in title_path.read_text(errors="ignore")
+
+    html_sources = []
+    for candidate in ("wget", "singlefile", "dom"):
+        for candidate_dir in (snapshot_dir / candidate, *snapshot_dir.glob(f"*_{candidate}")):
+            if candidate_dir.exists():
+                html_sources.extend(_find_html_with_text(candidate_dir, "Example Domain"))
+    assert len(html_sources) >= 2, (
+        "Expected HTML outputs from multiple extractors to contain Example Domain "
+        f"(found {len(html_sources)})."
+    )
+
+    text_hits = 0
+    for path in (
+        *snapshot_dir.glob("*_readability/content.txt"),
+        snapshot_dir / "readability" / "content.txt",
+    ):
+        if path.exists() and "Example Domain" in path.read_text(errors="ignore"):
+            text_hits += 1
+    for path in (
+        *snapshot_dir.glob("*_htmltotext/htmltotext.txt"),
+        snapshot_dir / "htmltotext" / "htmltotext.txt",
+    ):
+        if path.exists() and "Example Domain" in path.read_text(errors="ignore"):
+            text_hits += 1
+    assert text_hits >= 2, (
+        "Expected multiple text extractors to contain Example Domain "
+        f"(readability/htmltotext hits={text_hits})."
+    )
diff --git a/archivebox/tests/test_settings_signal_webhooks.py b/archivebox/tests/test_settings_signal_webhooks.py
new file mode 100644
index 00000000..acb6367d
--- /dev/null
+++ b/archivebox/tests/test_settings_signal_webhooks.py
@@ -0,0 +1,8 @@
+from django.test import TestCase
+
+
+class TestSignalWebhooksSettings(TestCase):
+    def test_task_handler_is_sync_in_tests(self):
+        from signal_webhooks.settings import webhook_settings
+
+        assert webhook_settings.TASK_HANDLER.__name__ == "sync_task_handler"
diff --git a/archivebox/tests/test_snapshot.py b/archivebox/tests/test_snapshot.py
index 7ca8e5c8..8d2fc3fc 100644
--- a/archivebox/tests/test_snapshot.py
+++ b/archivebox/tests/test_snapshot.py
@@ -4,7 +4,11 @@
 import os
 import subprocess
 import sqlite3
-import json
+from archivebox.machine.models import Process
+from datetime import datetime
+from pathlib import Path
+from urllib.parse import urlparse
+import uuid
 
 import pytest
 
@@ -16,19 +20,51 @@ def test_snapshot_creates_snapshot_with_correct_url(tmp_path, process, disable_e
     os.chdir(tmp_path)
 
     subprocess.run(
-        ['archivebox', 'snapshot', 'https://example.com'],
+        ['archivebox', 'snapshot', 'create', 'https://example.com'],
         capture_output=True,
-        env=disable_extractors_dict,
+        env={**disable_extractors_dict, 'DATA_DIR': str(tmp_path)},
     )
 
     conn = sqlite3.connect('index.sqlite3')
     c = conn.cursor()
-    result = c.execute("SELECT url FROM core_snapshot WHERE url = ?",
-                       ('https://example.com',)).fetchone()
+    snapshot_row = c.execute(
+        "SELECT id, created_at, url, crawl_id FROM core_snapshot WHERE url = ?",
+        ('https://example.com',)
+    ).fetchone()
+    assert snapshot_row is not None
+    crawl_row = c.execute(
+        "SELECT id, created_at, urls, created_by_id FROM crawls_crawl WHERE id = ?",
+        (snapshot_row[3],)
+    ).fetchone()
+    assert crawl_row is not None
+    user_row = c.execute(
+        "SELECT username FROM auth_user WHERE id = ?",
+        (crawl_row[3],)
+    ).fetchone()
+    assert user_row is not None
     conn.close()
 
-    assert result is not None
-    assert result[0] == 'https://example.com'
+    snapshot_id_raw, snapshot_created_at, snapshot_url, crawl_id = snapshot_row
+    snapshot_id = str(uuid.UUID(snapshot_id_raw))
+    crawl_id, crawl_created_at, crawl_urls, crawl_created_by_id = crawl_row
+    username = user_row[0]
+    crawl_date_str = datetime.fromisoformat(crawl_created_at).strftime('%Y%m%d')
+    snapshot_date_str = datetime.fromisoformat(snapshot_created_at).strftime('%Y%m%d')
+    domain = urlparse(snapshot_url).hostname or 'unknown'
+
+    # Verify crawl symlink exists and is relative
+    target_path = tmp_path / 'users' / username / 'snapshots' / snapshot_date_str / domain / snapshot_id
+    symlinks = [
+        p for p in tmp_path.rglob(str(snapshot_id))
+        if p.is_symlink()
+    ]
+    assert symlinks, "Snapshot symlink should exist under crawl dir"
+    link_path = symlinks[0]
+
+    assert link_path.is_symlink(), "Snapshot symlink should exist under crawl dir"
+    link_target = os.readlink(link_path)
+    assert not os.path.isabs(link_target), "Symlink should be relative"
+    assert link_path.resolve() == target_path.resolve()
 
 
 def test_snapshot_multiple_urls_creates_multiple_records(tmp_path, process, disable_extractors_dict):
@@ -36,11 +72,11 @@ def test_snapshot_multiple_urls_creates_multiple_records(tmp_path, process, disa
     os.chdir(tmp_path)
 
     subprocess.run(
-        ['archivebox', 'snapshot',
+        ['archivebox', 'snapshot', 'create',
          'https://example.com',
          'https://iana.org'],
         capture_output=True,
-        env=disable_extractors_dict,
+        env={**disable_extractors_dict, 'DATA_DIR': str(tmp_path)},
     )
 
     conn = sqlite3.connect('index.sqlite3')
@@ -59,10 +95,10 @@ def test_snapshot_tag_creates_tag_and_links_to_snapshot(tmp_path, process, disab
     os.chdir(tmp_path)
 
     subprocess.run(
-        ['archivebox', 'snapshot', '--tag=mytesttag',
+        ['archivebox', 'snapshot', 'create', '--tag=mytesttag',
          'https://example.com'],
         capture_output=True,
-        env=disable_extractors_dict,
+        env={**disable_extractors_dict, 'DATA_DIR': str(tmp_path)},
     )
 
     conn = sqlite3.connect('index.sqlite3')
@@ -95,22 +131,15 @@ def test_snapshot_jsonl_output_has_correct_structure(tmp_path, process, disable_
 
     # Pass URL as argument instead of stdin for more reliable behavior
     result = subprocess.run(
-        ['archivebox', 'snapshot', 'https://example.com'],
+        ['archivebox', 'snapshot', 'create', 'https://example.com'],
         capture_output=True,
         text=True,
-        env=disable_extractors_dict,
+        env={**disable_extractors_dict, 'DATA_DIR': str(tmp_path)},
     )
 
     # Parse JSONL output lines
-    snapshot_records = []
-    for line in result.stdout.strip().split('\n'):
-        if line:
-            try:
-                record = json.loads(line)
-                if record.get('type') == 'Snapshot':
-                    snapshot_records.append(record)
-            except json.JSONDecodeError:
-                continue
+    records = Process.parse_records_from_text(result.stdout)
+    snapshot_records = [r for r in records if r.get('type') == 'Snapshot']
 
     assert len(snapshot_records) >= 1, "Should output at least one Snapshot JSONL record"
 
@@ -127,10 +156,10 @@ def test_snapshot_with_tag_stores_tag_name(tmp_path, process, disable_extractors
 
     # Use command line args instead of stdin
     subprocess.run(
-        ['archivebox', 'snapshot', '--tag=customtag', 'https://example.com'],
+        ['archivebox', 'snapshot', 'create', '--tag=customtag', 'https://example.com'],
         capture_output=True,
         text=True,
-        env=disable_extractors_dict,
+        env={**disable_extractors_dict, 'DATA_DIR': str(tmp_path)},
     )
 
     conn = sqlite3.connect('index.sqlite3')
@@ -145,40 +174,40 @@ def test_snapshot_with_tag_stores_tag_name(tmp_path, process, disable_extractors
     assert tag[0] == 'customtag'
 
 
-def test_snapshot_with_depth_creates_crawl_object(tmp_path, process, disable_extractors_dict):
-    """Test that --depth > 0 creates a Crawl object with correct max_depth."""
+def test_snapshot_with_depth_sets_snapshot_depth(tmp_path, process, disable_extractors_dict):
+    """Test that --depth sets snapshot depth when creating snapshots."""
     os.chdir(tmp_path)
 
     subprocess.run(
-        ['archivebox', 'snapshot', '--depth=1',
+        ['archivebox', 'snapshot', 'create', '--depth=1',
          'https://example.com'],
         capture_output=True,
-        env=disable_extractors_dict,
+        env={**disable_extractors_dict, 'DATA_DIR': str(tmp_path)},
     )
 
     conn = sqlite3.connect('index.sqlite3')
     c = conn.cursor()
-    crawl = c.execute("SELECT max_depth FROM crawls_crawl ORDER BY created_at DESC LIMIT 1").fetchone()
+    snapshot = c.execute("SELECT depth FROM core_snapshot ORDER BY created_at DESC LIMIT 1").fetchone()
     conn.close()
 
-    assert crawl is not None, "Crawl object should be created when depth > 0"
-    assert crawl[0] == 1, "Crawl max_depth should match --depth value"
+    assert snapshot is not None, "Snapshot should be created when depth is provided"
+    assert snapshot[0] == 1, "Snapshot depth should match --depth value"
 
 
-def test_snapshot_deduplicates_urls(tmp_path, process, disable_extractors_dict):
-    """Test that adding the same URL twice doesn't create duplicate snapshots."""
+def test_snapshot_allows_duplicate_urls_across_crawls(tmp_path, process, disable_extractors_dict):
+    """Snapshot create auto-creates a crawl per run; same URL can appear multiple times."""
     os.chdir(tmp_path)
 
     # Add same URL twice
     subprocess.run(
-        ['archivebox', 'snapshot', 'https://example.com'],
+        ['archivebox', 'snapshot', 'create', 'https://example.com'],
         capture_output=True,
-        env=disable_extractors_dict,
+        env={**disable_extractors_dict, 'DATA_DIR': str(tmp_path)},
     )
     subprocess.run(
-        ['archivebox', 'snapshot', 'https://example.com'],
+        ['archivebox', 'snapshot', 'create', 'https://example.com'],
         capture_output=True,
-        env=disable_extractors_dict,
+        env={**disable_extractors_dict, 'DATA_DIR': str(tmp_path)},
     )
 
     conn = sqlite3.connect('index.sqlite3')
@@ -187,7 +216,7 @@ def test_snapshot_deduplicates_urls(tmp_path, process, disable_extractors_dict):
                      ('https://example.com',)).fetchone()[0]
     conn.close()
 
-    assert count == 1, "Same URL should not create duplicate snapshots"
+    assert count == 2, "Same URL should create separate snapshots across different crawls"
 
 
 if __name__ == '__main__':
diff --git a/archivebox/workers/orchestrator.py b/archivebox/workers/orchestrator.py
index 4b8a2827..358c6ad9 100644
--- a/archivebox/workers/orchestrator.py
+++ b/archivebox/workers/orchestrator.py
@@ -83,6 +83,10 @@ class Orchestrator:
         # In foreground mode (exit_on_idle=True), limit to 1 CrawlWorker
         if self.exit_on_idle:
             self.MAX_CRAWL_WORKERS = 1
+            # Faster UI updates for interactive runs
+            self.POLL_INTERVAL = 0.25
+            # Exit quickly once idle in foreground mode
+            self.IDLE_TIMEOUT = 1
     
     def __repr__(self) -> str:
         return f'[underline]Orchestrator[/underline]\\[pid={self.pid}]'
@@ -111,8 +115,14 @@ class Orchestrator:
         # Clean up any stale Process records from previous runs
         stale_count = Process.cleanup_stale_running()
 
-        # Clean up orphaned Chrome processes from previous crashes
-        chrome_count = Process.cleanup_orphaned_chrome()
+        # Foreground runs should start fast; skip expensive orphan cleanup unless in daemon mode.
+        chrome_count = 0
+        orphaned_workers = 0
+        if not self.exit_on_idle:
+            # Clean up orphaned Chrome processes from previous crashes
+            chrome_count = Process.cleanup_orphaned_chrome()
+            # Clean up orphaned workers from previous crashes
+            orphaned_workers = Process.cleanup_orphaned_workers()
 
         # Collect startup metadata
         metadata = {
@@ -123,6 +133,8 @@ class Orchestrator:
             metadata['cleaned_stale_pids'] = stale_count
         if chrome_count:
             metadata['cleaned_orphaned_chrome'] = chrome_count
+        if orphaned_workers:
+            metadata['cleaned_orphaned_workers'] = orphaned_workers
 
         log_worker_event(
             worker_type='Orchestrator',
@@ -135,30 +147,26 @@ class Orchestrator:
     def terminate_all_workers(self) -> None:
         """Terminate all running worker processes."""
         from archivebox.machine.models import Process
-        import signal
-
-        # Get all running worker processes
-        running_workers = Process.objects.filter(
-            process_type=Process.TypeChoices.WORKER,
-            status__in=['running', 'started']
-        )
+        # Get running worker processes scoped to this orchestrator when possible
+        if getattr(self, 'db_process', None):
+            running_workers = self._get_scoped_running_workers()
+        else:
+            running_workers = Process.objects.filter(
+                process_type=Process.TypeChoices.WORKER,
+                status=Process.StatusChoices.RUNNING,
+            )
 
         for worker_process in running_workers:
             try:
-                # Send SIGTERM to gracefully terminate the worker
-                os.kill(worker_process.pid, signal.SIGTERM)
-            except ProcessLookupError:
-                # Process already dead
-                pass
+                # Gracefully terminate the worker and update Process status
+                worker_process.terminate(graceful_timeout=5.0)
             except Exception:
-                # Ignore other errors during shutdown
                 pass
 
     def on_shutdown(self, error: BaseException | None = None) -> None:
         """Called when orchestrator shuts down."""
-        # Terminate all worker processes in exit_on_idle mode
-        if self.exit_on_idle:
-            self.terminate_all_workers()
+        # Terminate all worker processes on shutdown
+        self.terminate_all_workers()
 
         # Update Process record status
         if hasattr(self, 'db_process') and self.db_process:
@@ -188,11 +196,26 @@ class Orchestrator:
             Process.cleanup_stale_running()
             self._last_cleanup_time = now
 
+        if self.crawl_id and getattr(self, 'db_process', None):
+            return self._get_scoped_running_workers().count()
+
         return sum(len(W.get_running_workers()) for W in self.WORKER_TYPES)
 
     def get_running_workers_for_type(self, WorkerClass: Type[Worker]) -> int:
         """Get count of running workers for a specific worker type."""
+        if self.crawl_id and getattr(self, 'db_process', None):
+            return self._get_scoped_running_workers().filter(worker_type=WorkerClass.name).count()
         return len(WorkerClass.get_running_workers())
+
+    def _get_scoped_running_workers(self):
+        """Get running workers scoped to this orchestrator process tree."""
+        from archivebox.machine.models import Process
+
+        descendants = self.db_process.get_descendants(include_self=False)
+        return descendants.filter(
+            process_type=Process.TypeChoices.WORKER,
+            status=Process.StatusChoices.RUNNING,
+        )
     
     def should_spawn_worker(self, WorkerClass: Type[Worker], queue_count: int) -> bool:
         """Determine if we should spawn a new worker."""
@@ -208,8 +231,11 @@ class Orchestrator:
             max_workers = 1  # Default for unknown types
 
         # Check worker limit
-        running_workers = WorkerClass.get_running_workers()
-        running_count = len(running_workers)
+        if self.crawl_id and getattr(self, 'db_process', None) and WorkerClass.name != 'binary':
+            running_count = self._get_scoped_running_workers().filter(worker_type=WorkerClass.name).count()
+        else:
+            running_workers = WorkerClass.get_running_workers()
+            running_count = len(running_workers)
 
         if running_count >= max_workers:
             return False
@@ -225,9 +251,13 @@ class Orchestrator:
         """Spawn a new worker process. Returns PID or None if spawn failed."""
         try:
             print(f'[yellow]DEBUG: Spawning {WorkerClass.name} worker with crawl_id={self.crawl_id}...[/yellow]')
-            pid = WorkerClass.start(crawl_id=self.crawl_id)
+            pid = WorkerClass.start(parent=self.db_process, crawl_id=self.crawl_id)
             print(f'[yellow]DEBUG: Spawned {WorkerClass.name} worker with PID={pid}[/yellow]')
 
+            if self.exit_on_idle:
+                # Foreground runs have MAX_CRAWL_WORKERS=1; avoid blocking startup on registration.
+                return pid
+
             # CRITICAL: Block until worker registers itself in Process table
             # This prevents race condition where orchestrator spawns multiple workers
             # before any of them finish on_startup() and register
@@ -316,7 +346,7 @@ class Orchestrator:
         if binary_count > 0:
             running_binary_workers_list = BinaryWorker.get_running_workers()
             if len(running_binary_workers_list) == 0:
-                BinaryWorker.start()
+                BinaryWorker.start(parent=self.db_process)
 
         # Check if any BinaryWorkers are still running
         running_binary_workers = len(BinaryWorker.get_running_workers())
@@ -344,7 +374,7 @@ class Orchestrator:
                 # Claim next crawl
                 crawl = crawl_queue.first()
                 if crawl and self._claim_crawl(crawl):
-                    CrawlWorker.start(crawl_id=str(crawl.id))
+                    CrawlWorker.start(parent=self.db_process, crawl_id=str(crawl.id))
 
         return queue_sizes
 
@@ -463,7 +493,7 @@ class Orchestrator:
 
                 with Live(
                     progress_layout.get_layout(),
-                    refresh_per_second=4,
+                    refresh_per_second=8,
                     screen=True,
                     console=orchestrator_console,
                 ):
@@ -521,41 +551,147 @@ class Orchestrator:
                     else:
                         status = "Idle"
 
+                    binary_workers_count = worker_counts.get('binary', 0)
                     # Update orchestrator status
                     progress_layout.update_orchestrator_status(
                         status=status,
                         crawl_queue_count=crawl_queue_count,
                         crawl_workers_count=crawl_workers_count,
+                        binary_queue_count=queue_sizes.get('binary', 0),
+                        binary_workers_count=binary_workers_count,
                         max_crawl_workers=self.MAX_CRAWL_WORKERS,
                     )
 
-                    # Update CrawlWorker logs by tailing Process stdout/stderr
-                    if crawl_workers_count > 0:
-                        from archivebox.machine.models import Process
-                        crawl_worker_process = Process.objects.filter(
-                            process_type=Process.TypeChoices.WORKER,
-                            worker_type='crawl',
-                            status__in=['running', 'started']
-                        ).first()
-                        if crawl_worker_process:
-                            progress_layout.update_crawl_worker_logs(crawl_worker_process)
+                    # Update crawl queue tree (active + recently completed)
+                    from archivebox.crawls.models import Crawl
+                    from archivebox.core.models import Snapshot, ArchiveResult
+                    recent_cutoff = timezone.now() - timedelta(minutes=5)
+                    pending_snapshot_candidates: list[Snapshot] = []
+                    hooks_by_snapshot: dict[str, list] = {}
 
-                    # Log queue size changes
-                    if queue_sizes != last_queue_sizes:
-                        for worker_type, count in queue_sizes.items():
-                            old_count = last_queue_sizes.get(worker_type, 0)
-                            if count != old_count:
-                                if count > old_count:
-                                    progress_layout.log_event(
-                                        f"{worker_type.capitalize()} queue: {old_count} → {count}",
-                                        style="yellow"
-                                    )
-                                else:
-                                    progress_layout.log_event(
-                                        f"{worker_type.capitalize()} queue: {old_count} → {count}",
-                                        style="green"
-                                    )
-                        last_queue_sizes = queue_sizes.copy()
+                    active_qs = Crawl.objects.exclude(status__in=Crawl.FINAL_STATES)
+                    if self.crawl_id:
+                        active_qs = active_qs.filter(id=self.crawl_id)
+                    active_qs = active_qs.order_by('retry_at')
+
+                    recent_done_qs = Crawl.objects.filter(
+                        status__in=Crawl.FINAL_STATES,
+                        modified_at__gte=recent_cutoff,
+                    )
+                    if self.crawl_id:
+                        recent_done_qs = recent_done_qs.filter(id=self.crawl_id)
+                    recent_done_qs = recent_done_qs.order_by('-modified_at')
+
+                    crawls = list(active_qs)
+                    active_ids = {c.id for c in crawls}
+                    for crawl in recent_done_qs:
+                        if crawl.id not in active_ids:
+                            crawls.append(crawl)
+
+                    def _abbrev(text: str, max_len: int = 80) -> str:
+                        return text if len(text) <= max_len else f"{text[:max_len - 3]}..."
+
+                    tree_data: list[dict] = []
+                    for crawl in crawls:
+                        urls = crawl.get_urls_list()
+                        url_count = len(urls)
+                        label = f"{url_count} url" + ("s" if url_count != 1 else "")
+                        label = _abbrev(label)
+
+                        snapshots = []
+                        snap_qs = Snapshot.objects.filter(crawl_id=crawl.id)
+                        active_snaps = list(
+                            snap_qs.filter(status__in=[Snapshot.StatusChoices.QUEUED, Snapshot.StatusChoices.STARTED])
+                            .order_by('created_at')[:16]
+                        )
+                        recent_snaps = list(
+                            snap_qs.filter(status__in=Snapshot.FINAL_STATES)
+                            .order_by('-modified_at')[:8]
+                        )
+                        snap_ids = {s.id for s in active_snaps}
+                        for s in recent_snaps:
+                            if s.id not in snap_ids:
+                                active_snaps.append(s)
+
+                        for snap in active_snaps:
+                            total = snap.archiveresult_set.count()
+                            completed = snap.archiveresult_set.filter(status__in=[
+                                ArchiveResult.StatusChoices.SUCCEEDED,
+                                ArchiveResult.StatusChoices.SKIPPED,
+                                ArchiveResult.StatusChoices.FAILED,
+                            ]).count()
+                            running = snap.archiveresult_set.filter(status=ArchiveResult.StatusChoices.STARTED).count()
+                            try:
+                                from archivebox.config.configset import get_config
+                                from archivebox.hooks import discover_hooks
+                                hooks_list = discover_hooks('Snapshot', config=get_config(snapshot=snap))
+                                total_hooks = len(hooks_list)
+                                hooks_by_snapshot[str(snap.id)] = hooks_list
+                            except Exception:
+                                total_hooks = total
+                            pending = max(total_hooks - completed - running, 0)
+                            snap_label = _abbrev(snap.url or str(snap.id), max_len=60)
+                            snapshots.append({
+                                'id': str(snap.id),
+                                'status': snap.status,
+                                'label': snap_label,
+                                'hooks': {'completed': completed, 'running': running, 'pending': pending} if total else {},
+                            })
+                            pending_snapshot_candidates.append(snap)
+
+                        tree_data.append({
+                            'id': str(crawl.id),
+                            'status': crawl.status,
+                            'label': label,
+                            'snapshots': snapshots,
+                        })
+
+                    progress_layout.update_crawl_tree(tree_data)
+
+                    # Update running process panels (tail stdout/stderr for each running process)
+                    from archivebox.machine.models import Process
+                    if self.crawl_id and getattr(self, 'db_process', None):
+                        process_qs = self.db_process.get_descendants(include_self=False)
+                        process_qs = process_qs.filter(status=Process.StatusChoices.RUNNING)
+                    else:
+                        process_qs = Process.objects.filter(
+                            status=Process.StatusChoices.RUNNING,
+                        ).exclude(process_type=Process.TypeChoices.ORCHESTRATOR)
+
+                    running_processes = [
+                        proc for proc in process_qs.order_by('process_type', 'worker_type', 'started_at')
+                        if proc.is_running
+                    ]
+                    pending_processes = []
+                    try:
+                        from types import SimpleNamespace
+                        for snap in pending_snapshot_candidates:
+                            hooks_list = hooks_by_snapshot.get(str(snap.id), [])
+                            if not hooks_list:
+                                continue
+                            existing = set(
+                                snap.archiveresult_set.exclude(hook_name='').values_list('hook_name', flat=True)
+                            )
+                            for hook_path in hooks_list:
+                                if hook_path.name in existing:
+                                    continue
+                                pending_processes.append(SimpleNamespace(
+                                    process_type='hook',
+                                    worker_type='',
+                                    pid=None,
+                                    cmd=['', str(hook_path)],
+                                    url=snap.url,
+                                    status='queued',
+                                    started_at=None,
+                                    timeout=None,
+                                    pwd=None,
+                                ))
+                    except Exception:
+                        pending_processes = []
+
+                    progress_layout.update_process_panels(running_processes, pending=pending_processes)
+
+                    last_queue_sizes = queue_sizes.copy()
 
                     # Update snapshot progress
                     from archivebox.core.models import Snapshot
@@ -641,11 +777,10 @@ class Orchestrator:
                             # Hooks created but none started yet
                             current_plugin = "waiting"
 
-                        # Update snapshot worker (show even if no hooks yet)
                         # Debug: Log first time we see this snapshot
-                        if snapshot.id not in progress_layout.snapshot_to_worker:
+                        if snapshot.id not in snapshot_progress:
                             progress_layout.log_event(
-                                f"Assigning to worker: {snapshot.url[:50]}",
+                                f"Tracking snapshot: {snapshot.url[:50]}",
                                 style="grey53"
                             )
 
@@ -656,17 +791,21 @@ class Orchestrator:
                         if prev_progress != curr_progress:
                             prev_total, prev_completed, prev_plugin = prev_progress
 
-                            # Log hooks created
-                            if total > prev_total:
-                                progress_layout.log_event(
-                                    f"Hooks created: {total} for {snapshot.url[:40]}",
-                                    style="cyan"
-                                )
-
                             # Log hook completion
                             if completed > prev_completed:
+                                completed_ar = snapshot.archiveresult_set.filter(
+                                    status__in=['succeeded', 'skipped', 'failed']
+                                ).order_by('-end_ts', '-modified_at').first()
+                                hook_label = ''
+                                if completed_ar:
+                                    hook_name = completed_ar.hook_name or completed_ar.plugin or ''
+                                    if hook_name:
+                                        hook_label = hook_name.split('__')[-1] if '__' in hook_name else hook_name
+                                        hook_label = hook_label.replace('.py', '').replace('.js', '').replace('.sh', '').replace('.bg', '')
+                                if not hook_label:
+                                    hook_label = f"{completed}/{total}"
                                 progress_layout.log_event(
-                                    f"Hook completed: {completed}/{total} for {snapshot.url[:40]}",
+                                    f"Hook completed: {hook_label}",
                                     style="green"
                                 )
 
@@ -686,23 +825,15 @@ class Orchestrator:
                                 style="red"
                             )
 
-                        progress_layout.update_snapshot_worker(
-                            snapshot_id=snapshot.id,
-                            url=snapshot.url,
-                            total=max(total, 1),  # Show at least 1 to avoid division by zero
-                            completed=completed,
-                            current_plugin=current_plugin,
-                        )
+                        # No per-snapshot panels; logs only
 
-                    # Remove snapshots that are no longer active
-                    for snapshot_id in list(progress_layout.snapshot_to_worker.keys()):
+                    # Cleanup progress tracking for completed snapshots
+                    for snapshot_id in list(snapshot_progress.keys()):
                         if snapshot_id not in active_ids:
                             progress_layout.log_event(
                                 f"Snapshot completed/removed",
                                 style="blue"
                             )
-                            progress_layout.remove_snapshot_worker(snapshot_id)
-                            # Also clean up progress tracking
                             if snapshot_id in snapshot_progress:
                                 del snapshot_progress[snapshot_id]
 
@@ -734,6 +865,7 @@ class Orchestrator:
             if progress_layout:
                 progress_layout.log_event("Interrupted by user", style="red")
             print()  # Newline after ^C
+            self.on_shutdown(error=KeyboardInterrupt())
         except BaseException as e:
             if progress_layout:
                 progress_layout.log_event(f"Error: {e}", style="red")
diff --git a/archivebox/workers/tests/test_orchestrator.py b/archivebox/workers/tests/test_orchestrator.py
index d54331ec..79d37f95 100644
--- a/archivebox/workers/tests/test_orchestrator.py
+++ b/archivebox/workers/tests/test_orchestrator.py
@@ -215,6 +215,46 @@ class TestOrchestratorWithProcess(TestCase):
             mock_count.assert_called()
             self.assertTrue(result)
 
+    def test_orchestrator_scoped_worker_count(self):
+        """Orchestrator with crawl_id should count only descendant workers."""
+        import time
+        from archivebox.machine.models import Process, Machine
+
+        machine = Machine.current()
+        orchestrator = Orchestrator(exit_on_idle=True, crawl_id='test-crawl')
+
+        orchestrator.db_process = Process.objects.create(
+            machine=machine,
+            process_type=Process.TypeChoices.ORCHESTRATOR,
+            status=Process.StatusChoices.RUNNING,
+            pid=12345,
+            started_at=timezone.now(),
+        )
+
+        # Prevent cleanup from marking fake PIDs as exited
+        orchestrator._last_cleanup_time = time.time()
+
+        Process.objects.create(
+            machine=machine,
+            process_type=Process.TypeChoices.WORKER,
+            worker_type='crawl',
+            status=Process.StatusChoices.RUNNING,
+            pid=12346,
+            parent=orchestrator.db_process,
+            started_at=timezone.now(),
+        )
+
+        Process.objects.create(
+            machine=machine,
+            process_type=Process.TypeChoices.WORKER,
+            worker_type='crawl',
+            status=Process.StatusChoices.RUNNING,
+            pid=12347,
+            started_at=timezone.now(),
+        )
+
+        self.assertEqual(orchestrator.get_total_worker_count(), 1)
+
 
 class TestProcessBasedWorkerTracking(TestCase):
     """Test Process model methods that replace pid_utils functionality."""
diff --git a/archivebox/workers/worker.py b/archivebox/workers/worker.py
index 7546a02a..38f5361b 100644
--- a/archivebox/workers/worker.py
+++ b/archivebox/workers/worker.py
@@ -23,6 +23,7 @@ from django.db.models import QuerySet
 from django.utils import timezone
 from django.conf import settings
 
+from statemachine.exceptions import TransitionNotAllowed
 from rich import print
 
 from archivebox.misc.logging_util import log_worker_event
@@ -450,13 +451,34 @@ class CrawlWorker(Worker):
     def runloop(self) -> None:
         """Run crawl state machine, spawn SnapshotWorkers."""
         import sys
+        from archivebox.crawls.models import Crawl
         self.on_startup()
 
         try:
             print(f'🔄 CrawlWorker starting for crawl {self.crawl_id}', file=sys.stderr)
 
+            if self.crawl.status == Crawl.StatusChoices.SEALED:
+                print(
+                    '✅ This crawl has already completed and there are no tasks remaining.\n'
+                    '   To re-crawl it, create a new crawl with the same URLs, e.g.\n'
+                    '   archivebox crawl create <urls> | archivebox run',
+                    file=sys.stderr,
+                )
+                return
+
             # Advance state machine: QUEUED → STARTED (triggers run() via @started.enter)
-            self.crawl.sm.tick()
+            try:
+                self.crawl.sm.tick()
+            except TransitionNotAllowed:
+                if self.crawl.status == Crawl.StatusChoices.SEALED:
+                    print(
+                        '✅ This crawl has already completed and there are no tasks remaining.\n'
+                        '   To re-crawl it, create a new crawl with the same URLs, e.g.\n'
+                        '   archivebox crawl create <urls> | archivebox run',
+                        file=sys.stderr,
+                    )
+                    return
+                raise
             self.crawl.refresh_from_db()
             print(f'🔄 tick() complete, crawl status={self.crawl.status}', file=sys.stderr)
 
@@ -509,13 +531,20 @@ class CrawlWorker(Worker):
             status__in=['running', 'started'],
         )
 
-        # Extract snapshot IDs from their pwd (contains snapshot ID at the end)
+        # Extract snapshot IDs from worker cmd args (more reliable than pwd paths)
         running_snapshot_ids = []
         for proc in running_processes:
-            if proc.pwd:
-                # pwd is like: /path/to/archive/{timestamp}
-                # We need to match this against snapshot.output_dir
-                running_snapshot_ids.append(proc.pwd)
+            cmd = proc.cmd or []
+            snapshot_id = None
+            for i, part in enumerate(cmd):
+                if part == '--snapshot-id' and i + 1 < len(cmd):
+                    snapshot_id = cmd[i + 1]
+                    break
+                if part.startswith('--snapshot-id='):
+                    snapshot_id = part.split('=', 1)[1]
+                    break
+            if snapshot_id:
+                running_snapshot_ids.append(snapshot_id)
 
         # Find snapshots that don't have a running worker
         all_snapshots = Snapshot.objects.filter(
@@ -526,7 +555,7 @@ class CrawlWorker(Worker):
         # Filter out snapshots that already have workers
         pending_snapshots = [
             snap for snap in all_snapshots
-            if snap.output_dir not in running_snapshot_ids
+            if str(snap.id) not in running_snapshot_ids
         ][:self.MAX_SNAPSHOT_WORKERS - running_count]
 
         with open(debug_log, 'a') as f:
@@ -631,7 +660,6 @@ class SnapshotWorker(Worker):
         b. If foreground: wait for completion
         c. If background: track but continue to next hook
         d. Update ArchiveResult status
-        e. Advance current_step when all step's hooks complete
     4. When all hooks done: seal snapshot
     5. On shutdown: SIGTERM all background hooks
     """
@@ -662,7 +690,7 @@ class SnapshotWorker(Worker):
 
     def runloop(self) -> None:
         """Execute all hooks sequentially."""
-        from archivebox.hooks import discover_hooks, is_background_hook, extract_step
+        from archivebox.hooks import discover_hooks, is_background_hook
         from archivebox.core.models import ArchiveResult
         from archivebox.config.configset import get_config
 
@@ -679,8 +707,7 @@ class SnapshotWorker(Worker):
             # Execute each hook sequentially
             for hook_path in hooks:
                 hook_name = hook_path.name
-                plugin = self._extract_plugin_name(hook_name)
-                hook_step = extract_step(hook_name)
+                plugin = self._extract_plugin_name(hook_path, hook_name)
                 is_background = is_background_hook(hook_name)
 
                 # Create ArchiveResult for THIS HOOK (not per plugin)
@@ -724,16 +751,18 @@ class SnapshotWorker(Worker):
                         pid=self.pid,
                     )
 
-                # Check if we can advance to next step
-                self._try_advance_step()
+                # Reap any background hooks that finished while we worked
+                self._reap_background_hooks()
 
-            # All hooks launched (or completed) - seal using state machine
+            # All hooks launched (or completed) - terminate bg hooks and seal
+            self._finalize_background_hooks()
             # This triggers enter_sealed() which calls cleanup() and checks parent crawl sealing
             self.snapshot.sm.seal()
             self.snapshot.refresh_from_db()
 
         except Exception as e:
             # Mark snapshot as sealed even on error (still triggers cleanup)
+            self._finalize_background_hooks()
             self.snapshot.sm.seal()
             self.snapshot.refresh_from_db()
             raise
@@ -753,7 +782,6 @@ class SnapshotWorker(Worker):
             script=hook_path,
             output_dir=output_dir,
             config=config,
-            timeout=120,
             parent=self.db_process,
             url=str(self.snapshot.url),
             snapshot_id=str(self.snapshot.id),
@@ -773,12 +801,22 @@ class SnapshotWorker(Worker):
         except TimeoutError:
             # Hook exceeded timeout - kill it
             process.kill(signal_num=9)
-            exit_code = -1
+            exit_code = process.exit_code or 137
 
         # Update ArchiveResult from hook output
         ar.update_from_output()
         ar.end_ts = timezone.now()
 
+        # Apply hook-emitted JSONL records regardless of exit code
+        from archivebox.hooks import extract_records_from_process, process_hook_records
+
+        records = extract_records_from_process(process)
+        if records:
+            process_hook_records(
+                records,
+                overrides={'snapshot': self.snapshot, 'crawl': self.snapshot.crawl},
+            )
+
         # Determine final status from hook exit code
         if exit_code == 0:
             ar.status = ar.StatusChoices.SUCCEEDED
@@ -787,34 +825,53 @@ class SnapshotWorker(Worker):
 
         ar.save(update_fields=['status', 'end_ts', 'modified_at'])
 
-    def _try_advance_step(self) -> None:
-        """Advance current_step if all foreground hooks in current step are done."""
-        from django.db.models import Q
+    def _finalize_background_hooks(self) -> None:
+        """Gracefully terminate background hooks and update their ArchiveResults."""
+        if getattr(self, '_background_hooks_finalized', False):
+            return
+
+        self._background_hooks_finalized = True
+
+        # Send SIGTERM and wait up to each hook's remaining timeout
+        self._terminate_background_hooks(
+            background_processes=self.background_processes,
+            worker_type='SnapshotWorker',
+            indent_level=2,
+        )
+
+        # Clear to avoid double-termination during on_shutdown
+        self.background_processes = {}
+
+        # Update STARTED background results now that hooks are done
         from archivebox.core.models import ArchiveResult
 
-        current_step = self.snapshot.current_step
-
-        # Single query: foreground hooks in current step that aren't finished
-        # Foreground hooks: hook_name doesn't contain '.bg.'
-        pending_foreground = self.snapshot.archiveresult_set.filter(
-            Q(hook_name__contains=f'__{current_step}_') &  # Current step
-            ~Q(hook_name__contains='.bg.') &  # Not background
-            ~Q(status__in=ArchiveResult.FINAL_STATES)  # Not finished
-        ).exists()
-
-        if pending_foreground:
-            return  # Still waiting for hooks
-
-        # All foreground hooks done - advance!
-        self.snapshot.current_step += 1
-        self.snapshot.save(update_fields=['current_step', 'modified_at'])
-
-        log_worker_event(
-            worker_type='SnapshotWorker',
-            event=f'Advanced to step {self.snapshot.current_step}',
-            indent_level=2,
-            pid=self.pid,
+        started_bg = self.snapshot.archiveresult_set.filter(
+            status=ArchiveResult.StatusChoices.STARTED,
+            hook_name__contains='.bg.',
         )
+        for ar in started_bg:
+            ar.update_from_output()
+
+    def _reap_background_hooks(self) -> None:
+        """Update ArchiveResults for background hooks that already exited."""
+        if getattr(self, '_background_hooks_finalized', False):
+            return
+        if not self.background_processes:
+            return
+
+        from archivebox.core.models import ArchiveResult
+
+        for hook_name, process in list(self.background_processes.items()):
+            exit_code = process.poll()
+            if exit_code is None:
+                continue
+
+            ar = self.snapshot.archiveresult_set.filter(hook_name=hook_name).first()
+            if ar and ar.status == ArchiveResult.StatusChoices.STARTED:
+                ar.update_from_output()
+
+            # Remove completed hook from tracking
+            self.background_processes.pop(hook_name, None)
 
     def on_shutdown(self, error: BaseException | None = None) -> None:
         """
@@ -834,12 +891,15 @@ class SnapshotWorker(Worker):
         super().on_shutdown(error)
 
     @staticmethod
-    def _extract_plugin_name(hook_name: str) -> str:
-        """Extract plugin name from hook filename."""
-        # on_Snapshot__50_wget.py -> wget
-        name = hook_name.split('__')[-1]  # Get part after last __
+    def _extract_plugin_name(hook_path: Path, hook_name: str) -> str:
+        """Extract plugin name from hook path (fallback to filename)."""
+        plugin_dir = hook_path.parent.name
+        if plugin_dir not in ('plugins', '.'):
+            return plugin_dir
+        # Fallback: on_Snapshot__50_wget.py -> wget
+        name = hook_name.split('__')[-1]
         name = name.replace('.py', '').replace('.js', '').replace('.sh', '')
-        name = name.replace('.bg', '')  # Remove .bg suffix
+        name = name.replace('.bg', '')
         return name
 
 
@@ -888,7 +948,7 @@ class BinaryWorker(Worker):
                 machine=machine,
                 status=Binary.StatusChoices.QUEUED,
                 retry_at__lte=timezone.now()
-            ).order_by('retry_at')
+            ).order_by('retry_at', 'created_at', 'name')
 
     def runloop(self) -> None:
         """Install binary(ies)."""