wip 2

2026-04-05 23:37:58 +10:00 · 2025-12-24 21:46:14 -08:00
parent 1915333b81
commit 6c769d831c
69 changed files with 3586 additions and 4216 deletions
--- a/archivebox/plugins/archive_org/tests/test_archive_org.py
+++ b/archivebox/plugins/archive_org/tests/test_archive_org.py
@@ -0,0 +1,61 @@
+"""
+Integration tests for archive_org plugin
+
+Tests verify standalone archive.org extractor execution.
+"""
+
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+import pytest
+
+PLUGIN_DIR = Path(__file__).parent.parent
+ARCHIVE_ORG_HOOK = PLUGIN_DIR / 'on_Snapshot__13_archive_org.py'
+TEST_URL = 'https://example.com'
+
+def test_hook_script_exists():
+    assert ARCHIVE_ORG_HOOK.exists()
+
+def test_submits_to_archive_org():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        result = subprocess.run(
+            [sys.executable, str(ARCHIVE_ORG_HOOK), '--url', TEST_URL, '--snapshot-id', 'test789'],
+            cwd=tmpdir, capture_output=True, text=True, timeout=60
+        )
+        
+        assert result.returncode in (0, 1)
+        assert 'RESULT_JSON=' in result.stdout
+        
+        # Should either succeed or fail gracefully
+        assert 'STATUS=' in result.stdout
+
+def test_config_save_archive_org_false_skips():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        import os
+        env = os.environ.copy()
+        env['SAVE_ARCHIVE_DOT_ORG'] = 'False'
+        
+        result = subprocess.run(
+            [sys.executable, str(ARCHIVE_ORG_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
+            cwd=tmpdir, capture_output=True, text=True, env=env, timeout=30
+        )
+        
+        if result.returncode == 0:
+            assert 'STATUS=skipped' in result.stdout or 'STATUS=succeeded' in result.stdout
+
+def test_handles_timeout():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        import os
+        env = os.environ.copy()
+        env['TIMEOUT'] = '1'
+        
+        result = subprocess.run(
+            [sys.executable, str(ARCHIVE_ORG_HOOK), '--url', TEST_URL, '--snapshot-id', 'testtimeout'],
+            cwd=tmpdir, capture_output=True, text=True, env=env, timeout=30
+        )
+        
+        assert result.returncode in (0, 1)
+
+if __name__ == '__main__':
+    pytest.main([__file__, '-v'])
--- a/archivebox/plugins/chrome_session/on_Crawl__00_install_chrome.py
+++ b/archivebox/plugins/chrome_session/on_Crawl__00_install_chrome.py
@@ -0,0 +1,149 @@
+#!/usr/bin/env python3
+"""
+Install Chrome/Chromium if not already available.
+
+Runs at crawl start to ensure Chrome is installed.
+Uses playwright to install chromium if no system Chrome found.
+Outputs JSONL for InstalledBinary.
+"""
+
+import json
+import sys
+import os
+import shutil
+from pathlib import Path
+
+
+def find_chrome():
+    """Try to find system Chrome/Chromium."""
+    # Comprehensive list of Chrome/Chromium binary names and paths
+    chromium_names_linux = [
+        'chromium',
+        'chromium-browser',
+        'chromium-browser-beta',
+        'chromium-browser-unstable',
+        'chromium-browser-canary',
+        'chromium-browser-dev',
+    ]
+
+    chrome_names_linux = [
+        'google-chrome',
+        'google-chrome-stable',
+        'google-chrome-beta',
+        'google-chrome-canary',
+        'google-chrome-unstable',
+        'google-chrome-dev',
+        'chrome',
+    ]
+
+    chrome_paths_macos = [
+        '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
+        '/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary',
+        '/Applications/Chromium.app/Contents/MacOS/Chromium',
+    ]
+
+    chrome_paths_linux = [
+        '/usr/bin/google-chrome',
+        '/usr/bin/google-chrome-stable',
+        '/usr/bin/chromium',
+        '/usr/bin/chromium-browser',
+        '/snap/bin/chromium',
+        '/opt/google/chrome/chrome',
+    ]
+
+    all_chrome_names = chrome_names_linux + chromium_names_linux
+    all_chrome_paths = chrome_paths_macos + chrome_paths_linux
+
+    # Check env var first
+    env_path = os.environ.get('CHROME_BINARY', '')
+    if env_path and Path(env_path).is_file():
+        return env_path
+
+    # Try shutil.which for various names
+    for name in all_chrome_names:
+        abspath = shutil.which(name)
+        if abspath:
+            return abspath
+
+    # Check common paths
+    for path in all_chrome_paths:
+        if Path(path).is_file():
+            return path
+
+    return None
+
+
+def main():
+    try:
+        # First try to find system Chrome
+        system_chrome = find_chrome()
+        if system_chrome:
+            print(json.dumps({
+                'type': 'InstalledBinary',
+                'name': 'chrome',
+                'abspath': str(system_chrome),
+                'version': None,
+                'sha256': None,
+                'binprovider': 'env',
+            }))
+            sys.exit(0)
+
+        # If not found in system, try to install chromium via apt/brew
+        from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides
+
+        AptProvider.model_rebuild()
+        BrewProvider.model_rebuild()
+        EnvProvider.model_rebuild()
+
+        # Try chromium-browser or chromium via system package managers
+        for binary_name in ['chromium', 'chromium-browser', 'google-chrome']:
+            try:
+                chrome_binary = Binary(
+                    name=binary_name,
+                    binproviders=[AptProvider(), BrewProvider(), EnvProvider()]
+                )
+
+                # Try to load, install if not found
+                try:
+                    loaded = chrome_binary.load()
+                    if not loaded or not loaded.abspath:
+                        raise Exception("Not loaded")
+                except Exception:
+                    # Install via system package manager
+                    loaded = chrome_binary.install()
+
+                if loaded and loaded.abspath:
+                    # Output InstalledBinary JSONL
+                    print(json.dumps({
+                        'type': 'InstalledBinary',
+                        'name': 'chrome',
+                        'abspath': str(loaded.abspath),
+                        'version': str(loaded.version) if loaded.version else None,
+                        'sha256': loaded.sha256,
+                        'binprovider': loaded.loaded_binprovider.name if loaded.loaded_binprovider else 'unknown',
+                    }))
+                    sys.exit(0)
+            except Exception:
+                continue
+
+        # If all attempts failed
+        print(json.dumps({
+            'type': 'Dependency',
+            'bin_name': 'chrome',
+            'bin_providers': 'apt,brew,env',
+        }))
+        print("Failed to install Chrome/Chromium", file=sys.stderr)
+        sys.exit(1)
+
+    except Exception as e:
+        print(json.dumps({
+            'type': 'Dependency',
+            'bin_name': 'chrome',
+            'bin_providers': 'apt,brew,env',
+        }))
+        print(f"Error installing Chrome: {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
--- a/archivebox/plugins/chrome_session/tests/init.py
+++ b/archivebox/plugins/chrome_session/tests/init.py
--- a/archivebox/plugins/chrome_session/tests/test_chrome_session.py
+++ b/archivebox/plugins/chrome_session/tests/test_chrome_session.py
@@ -0,0 +1,85 @@
+"""
+Integration tests for chrome_session plugin
+
+Tests verify:
+1. Install hook finds system Chrome or installs chromium
+2. Verify deps with abx-pkg
+3. Chrome session script exists
+"""
+
+import json
+import subprocess
+import sys
+from pathlib import Path
+import pytest
+
+PLUGIN_DIR = Path(__file__).parent.parent
+CHROME_INSTALL_HOOK = PLUGIN_DIR / 'on_Crawl__00_install_chrome.py'
+CHROME_SESSION_HOOK = PLUGIN_DIR / 'on_Snapshot__20_chrome_session.js'
+
+
+def test_hook_script_exists():
+    """Verify chrome session hook exists."""
+    assert CHROME_SESSION_HOOK.exists(), f"Hook not found: {CHROME_SESSION_HOOK}"
+
+
+def test_chrome_install_hook():
+    """Test chrome install hook to find or install Chrome/Chromium."""
+    result = subprocess.run(
+        [sys.executable, str(CHROME_INSTALL_HOOK)],
+        capture_output=True,
+        text=True,
+        timeout=600
+    )
+
+    assert result.returncode == 0, f"Install hook failed: {result.stderr}"
+
+    # Verify InstalledBinary JSONL output
+    found_binary = False
+    for line in result.stdout.strip().split('\n'):
+        if line.strip():
+            try:
+                record = json.loads(line)
+                if record.get('type') == 'InstalledBinary':
+                    assert record['name'] == 'chrome'
+                    assert record['abspath']
+                    assert Path(record['abspath']).exists(), f"Chrome binary should exist at {record['abspath']}"
+                    found_binary = True
+                    break
+            except json.JSONDecodeError:
+                pass
+
+    assert found_binary, "Should output InstalledBinary record"
+
+
+def test_verify_deps_with_abx_pkg():
+    """Verify chrome is available via abx-pkg after hook installation."""
+    from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides
+
+    AptProvider.model_rebuild()
+    BrewProvider.model_rebuild()
+    EnvProvider.model_rebuild()
+
+    # Try various chrome binary names
+    for binary_name in ['chromium', 'chromium-browser', 'google-chrome', 'chrome']:
+        try:
+            chrome_binary = Binary(
+                name=binary_name,
+                binproviders=[AptProvider(), BrewProvider(), EnvProvider()]
+            )
+            chrome_loaded = chrome_binary.load()
+            if chrome_loaded and chrome_loaded.abspath:
+                # Found at least one chrome variant
+                assert Path(chrome_loaded.abspath).exists()
+                return
+        except Exception:
+            continue
+
+    # If we get here, chrome should still be available from system
+    import shutil
+    assert shutil.which('chromium') or shutil.which('chrome') or shutil.which('google-chrome'), \
+        "Chrome should be available after install hook"
+
+
+if __name__ == '__main__':
+    pytest.main([__file__, '-v'])
--- a/archivebox/plugins/dom/tests/test_dom.py
+++ b/archivebox/plugins/dom/tests/test_dom.py
@@ -0,0 +1,205 @@
+"""
+Integration tests for dom plugin
+
+Tests verify:
+1. Hook script exists
+2. Dependencies installed via chrome_session validation hooks
+3. Verify deps with abx-pkg
+4. DOM extraction works on https://example.com
+5. JSONL output is correct
+6. Filesystem output contains actual page content
+7. Config options work
+"""
+
+import json
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+
+import pytest
+
+
+PLUGIN_DIR = Path(__file__).parent.parent
+PLUGINS_ROOT = PLUGIN_DIR.parent
+DOM_HOOK = PLUGIN_DIR / 'on_Snapshot__36_dom.js'
+CHROME_VALIDATE_HOOK = PLUGINS_ROOT / 'chrome_session' / 'on_Crawl__00_validate_chrome.py'
+NPM_PROVIDER_HOOK = PLUGINS_ROOT / 'npm' / 'on_Dependency__install_using_npm_provider.py'
+TEST_URL = 'https://example.com'
+
+
+def test_hook_script_exists():
+    """Verify on_Snapshot hook exists."""
+    assert DOM_HOOK.exists(), f"Hook not found: {DOM_HOOK}"
+
+
+def test_chrome_validation_and_install():
+    """Test chrome validation hook to install puppeteer-core if needed."""
+    # Run chrome validation hook (from chrome_session plugin)
+    result = subprocess.run(
+        [sys.executable, str(CHROME_VALIDATE_HOOK)],
+        capture_output=True,
+        text=True,
+        timeout=30
+    )
+
+    # If exit 1, binary not found - need to install
+    if result.returncode == 1:
+        # Parse Dependency request from JSONL
+        dependency_request = None
+        for line in result.stdout.strip().split('\n'):
+            if line.strip():
+                try:
+                    record = json.loads(line)
+                    if record.get('type') == 'Dependency':
+                        dependency_request = record
+                        break
+                except json.JSONDecodeError:
+                    pass
+
+        if dependency_request:
+            bin_name = dependency_request['bin_name']
+            bin_providers = dependency_request['bin_providers']
+
+            # Install via npm provider hook
+            install_result = subprocess.run(
+                [
+                    sys.executable,
+                    str(NPM_PROVIDER_HOOK),
+                    '--dependency-id', 'test-dep-001',
+                    '--bin-name', bin_name,
+                    '--bin-providers', bin_providers
+                ],
+                capture_output=True,
+                text=True,
+                timeout=600
+            )
+
+            assert install_result.returncode == 0, f"Install failed: {install_result.stderr}"
+
+            # Verify installation via JSONL output
+            for line in install_result.stdout.strip().split('\n'):
+                if line.strip():
+                    try:
+                        record = json.loads(line)
+                        if record.get('type') == 'InstalledBinary':
+                            assert record['name'] == bin_name
+                            assert record['abspath']
+                            break
+                    except json.JSONDecodeError:
+                        pass
+    else:
+        # Binary already available, verify via JSONL output
+        assert result.returncode == 0, f"Validation failed: {result.stderr}"
+
+
+def test_verify_deps_with_abx_pkg():
+    """Verify dependencies are available via abx-pkg after hook installation."""
+    from abx_pkg import Binary, EnvProvider, BinProviderOverrides
+
+    EnvProvider.model_rebuild()
+
+    # Verify node is available
+    node_binary = Binary(name='node', binproviders=[EnvProvider()])
+    node_loaded = node_binary.load()
+    assert node_loaded and node_loaded.abspath, "Node.js required for dom plugin"
+
+
+def test_extracts_dom_from_example_com():
+    """Test full workflow: extract DOM from real example.com via hook."""
+    # Prerequisites checked by earlier test
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = Path(tmpdir)
+
+        # Run DOM extraction hook
+        result = subprocess.run(
+            ['node', str(DOM_HOOK), f'--url={TEST_URL}', '--snapshot-id=test789'],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            timeout=120
+        )
+
+        assert result.returncode == 0, f"Extraction failed: {result.stderr}"
+
+        # Verify JSONL output
+        assert 'STATUS=succeeded' in result.stdout, "Should report success"
+        assert 'RESULT_JSON=' in result.stdout, "Should output RESULT_JSON"
+
+        # Parse JSONL result
+        result_json = None
+        for line in result.stdout.split('\n'):
+            if line.startswith('RESULT_JSON='):
+                result_json = json.loads(line.split('=', 1)[1])
+                break
+
+        assert result_json, "Should have RESULT_JSON"
+        assert result_json['extractor'] == 'dom'
+        assert result_json['status'] == 'succeeded'
+        assert result_json['url'] == TEST_URL
+
+        # Verify filesystem output
+        dom_dir = tmpdir / 'dom'
+        assert dom_dir.exists(), "Output directory not created"
+
+        dom_file = dom_dir / 'output.html'
+        assert dom_file.exists(), "output.html not created"
+
+        # Verify HTML content contains REAL example.com text
+        html_content = dom_file.read_text(errors='ignore')
+        assert len(html_content) > 200, f"HTML content too short: {len(html_content)} bytes"
+        assert '<html' in html_content.lower(), "Missing <html> tag"
+        assert 'example domain' in html_content.lower(), "Missing 'Example Domain' in HTML"
+        assert ('this domain' in html_content.lower() or
+                'illustrative examples' in html_content.lower()), \
+            "Missing example.com description text"
+
+
+def test_config_save_dom_false_skips():
+    """Test that SAVE_DOM=False causes skip."""
+    import os
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = Path(tmpdir)
+        env = os.environ.copy()
+        env['SAVE_DOM'] = 'False'
+
+        result = subprocess.run(
+            ['node', str(DOM_HOOK), f'--url={TEST_URL}', '--snapshot-id=test999'],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            env=env,
+            timeout=30
+        )
+
+        assert result.returncode == 0, f"Should exit 0 when skipping: {result.stderr}"
+        assert 'STATUS=skipped' in result.stdout, "Should report skipped status"
+
+
+def test_staticfile_present_skips():
+    """Test that dom skips when staticfile already downloaded."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = Path(tmpdir)
+
+        # Create staticfile directory to simulate staticfile extractor ran
+        staticfile_dir = tmpdir / 'staticfile'
+        staticfile_dir.mkdir()
+        (staticfile_dir / 'index.html').write_text('<html>test</html>')
+
+        result = subprocess.run(
+            ['node', str(DOM_HOOK), f'--url={TEST_URL}', '--snapshot-id=teststatic'],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            timeout=30
+        )
+
+        assert result.returncode == 0, "Should exit 0 when skipping"
+        assert 'STATUS=skipped' in result.stdout, "Should report skipped status"
+        assert 'staticfile' in result.stdout.lower(), "Should mention staticfile"
+
+
+if __name__ == '__main__':
+    pytest.main([__file__, '-v'])
--- a/archivebox/plugins/git/on_Crawl__00_install_git.py
+++ b/archivebox/plugins/git/on_Crawl__00_install_git.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+"""
+Install git if not already available.
+
+Runs at crawl start to ensure git is installed.
+Outputs JSONL for InstalledBinary.
+"""
+
+import json
+import sys
+from pathlib import Path
+
+
+def main():
+    try:
+        from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides
+
+        AptProvider.model_rebuild()
+        BrewProvider.model_rebuild()
+        EnvProvider.model_rebuild()
+
+        # git binary and package have same name
+        git_binary = Binary(
+            name='git',
+            binproviders=[AptProvider(), BrewProvider(), EnvProvider()]
+        )
+
+        # Try to load, install if not found
+        try:
+            loaded = git_binary.load()
+            if not loaded or not loaded.abspath:
+                raise Exception("Not loaded")
+        except Exception:
+            # Install via system package manager
+            loaded = git_binary.install()
+
+        if loaded and loaded.abspath:
+            # Output InstalledBinary JSONL
+            print(json.dumps({
+                'type': 'InstalledBinary',
+                'name': 'git',
+                'abspath': str(loaded.abspath),
+                'version': str(loaded.version) if loaded.version else None,
+                'sha256': loaded.sha256,
+                'binprovider': loaded.loaded_binprovider.name if loaded.loaded_binprovider else 'unknown',
+            }))
+            sys.exit(0)
+        else:
+            print(json.dumps({
+                'type': 'Dependency',
+                'bin_name': 'git',
+                'bin_providers': 'apt,brew,env',
+            }))
+            print("Failed to install git", file=sys.stderr)
+            sys.exit(1)
+
+    except Exception as e:
+        print(json.dumps({
+            'type': 'Dependency',
+            'bin_name': 'git',
+            'bin_providers': 'apt,brew,env',
+        }))
+        print(f"Error installing git: {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
--- a/archivebox/plugins/git/tests/test_git.py
+++ b/archivebox/plugins/git/tests/test_git.py
@@ -0,0 +1,90 @@
+"""
+Integration tests for git plugin
+
+Tests verify:
+1. Install hook installs git via abx-pkg
+2. Verify deps with abx-pkg
+3. Standalone git extractor execution
+"""
+
+import json
+import shutil
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+import pytest
+
+PLUGIN_DIR = Path(__file__).parent.parent
+GIT_HOOK = PLUGIN_DIR / 'on_Snapshot__12_git.py'
+GIT_INSTALL_HOOK = PLUGIN_DIR / 'on_Crawl__00_install_git.py'
+TEST_URL = 'https://github.com/example/repo.git'
+
+def test_hook_script_exists():
+    assert GIT_HOOK.exists()
+
+def test_git_install_hook():
+    """Test git install hook to install git if needed."""
+    result = subprocess.run(
+        [sys.executable, str(GIT_INSTALL_HOOK)],
+        capture_output=True,
+        text=True,
+        timeout=600
+    )
+
+    assert result.returncode == 0, f"Install hook failed: {result.stderr}"
+
+    # Verify InstalledBinary JSONL output
+    found_binary = False
+    for line in result.stdout.strip().split('\n'):
+        if line.strip():
+            try:
+                record = json.loads(line)
+                if record.get('type') == 'InstalledBinary':
+                    assert record['name'] == 'git'
+                    assert record['abspath']
+                    found_binary = True
+                    break
+            except json.JSONDecodeError:
+                pass
+
+    assert found_binary, "Should output InstalledBinary record"
+
+def test_verify_deps_with_abx_pkg():
+    """Verify git is available via abx-pkg after hook installation."""
+    from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider
+
+    AptProvider.model_rebuild()
+    BrewProvider.model_rebuild()
+    EnvProvider.model_rebuild()
+
+    git_binary = Binary(name='git', binproviders=[AptProvider(), BrewProvider(), EnvProvider()])
+    git_loaded = git_binary.load()
+    assert git_loaded and git_loaded.abspath, "git should be available after install hook"
+
+def test_reports_missing_git():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        env = {'PATH': '/nonexistent'}
+        result = subprocess.run(
+            [sys.executable, str(GIT_HOOK), '--url', TEST_URL, '--snapshot-id', 'test123'],
+            cwd=tmpdir, capture_output=True, text=True, env=env
+        )
+        if result.returncode != 0:
+            combined = result.stdout + result.stderr
+            assert 'DEPENDENCY_NEEDED' in combined or 'git' in combined.lower() or 'ERROR=' in combined
+
+def test_handles_non_git_url():
+    if not shutil.which('git'):
+        pytest.skip("git not installed")
+    
+    with tempfile.TemporaryDirectory() as tmpdir:
+        result = subprocess.run(
+            [sys.executable, str(GIT_HOOK), '--url', 'https://example.com', '--snapshot-id', 'test789'],
+            cwd=tmpdir, capture_output=True, text=True, timeout=30
+        )
+        # Should fail or skip for non-git URL
+        assert result.returncode in (0, 1)
+        assert 'STATUS=' in result.stdout
+
+if __name__ == '__main__':
+    pytest.main([__file__, '-v'])
--- a/archivebox/plugins/htmltotext/tests/test_htmltotext.py
+++ b/archivebox/plugins/htmltotext/tests/test_htmltotext.py
@@ -0,0 +1,53 @@
+"""
+Integration tests for htmltotext plugin
+
+Tests verify standalone htmltotext extractor execution.
+"""
+
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+import pytest
+
+PLUGIN_DIR = Path(__file__).parent.parent
+HTMLTOTEXT_HOOK = PLUGIN_DIR / 'on_Snapshot__54_htmltotext.py'
+TEST_URL = 'https://example.com'
+
+def test_hook_script_exists():
+    assert HTMLTOTEXT_HOOK.exists()
+
+def test_extracts_text_from_html():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = Path(tmpdir)
+        # Create HTML source
+        (tmpdir / 'singlefile').mkdir()
+        (tmpdir / 'singlefile' / 'singlefile.html').write_text('<html><body><h1>Example Domain</h1><p>This domain is for examples.</p></body></html>')
+        
+        result = subprocess.run(
+            [sys.executable, str(HTMLTOTEXT_HOOK), '--url', TEST_URL, '--snapshot-id', 'test789'],
+            cwd=tmpdir, capture_output=True, text=True, timeout=30
+        )
+        
+        assert result.returncode in (0, 1)
+        assert 'RESULT_JSON=' in result.stdout
+        
+        if result.returncode == 0:
+            assert 'STATUS=succeeded' in result.stdout
+            output_file = tmpdir / 'htmltotext' / 'content.txt'
+            if output_file.exists():
+                content = output_file.read_text()
+                assert len(content) > 0
+
+def test_fails_gracefully_without_html():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        result = subprocess.run(
+            [sys.executable, str(HTMLTOTEXT_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
+            cwd=tmpdir, capture_output=True, text=True, timeout=30
+        )
+        assert result.returncode in (0, 1)
+        combined = result.stdout + result.stderr
+        assert 'STATUS=' in combined
+
+if __name__ == '__main__':
+    pytest.main([__file__, '-v'])
--- a/archivebox/plugins/media/on_Crawl__00_install_ytdlp.py
+++ b/archivebox/plugins/media/on_Crawl__00_install_ytdlp.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+"""
+Install yt-dlp if not already available.
+
+Runs at crawl start to ensure yt-dlp is installed.
+Outputs JSONL for InstalledBinary.
+"""
+
+import json
+import sys
+from pathlib import Path
+
+
+def main():
+    try:
+        from abx_pkg import Binary, PipProvider, EnvProvider, BinProviderOverrides
+
+        PipProvider.model_rebuild()
+        EnvProvider.model_rebuild()
+
+        # yt-dlp binary and package have same name
+        ytdlp_binary = Binary(
+            name='yt-dlp',
+            binproviders=[PipProvider(), EnvProvider()]
+        )
+
+        # Try to load, install if not found
+        try:
+            loaded = ytdlp_binary.load()
+            if not loaded or not loaded.abspath:
+                raise Exception("Not loaded")
+        except Exception:
+            # Install via pip
+            loaded = ytdlp_binary.install()
+
+        if loaded and loaded.abspath:
+            # Output InstalledBinary JSONL
+            print(json.dumps({
+                'type': 'InstalledBinary',
+                'name': 'yt-dlp',
+                'abspath': str(loaded.abspath),
+                'version': str(loaded.version) if loaded.version else None,
+                'sha256': loaded.sha256,
+                'binprovider': loaded.loaded_binprovider.name if loaded.loaded_binprovider else 'unknown',
+            }))
+            sys.exit(0)
+        else:
+            print(json.dumps({
+                'type': 'Dependency',
+                'bin_name': 'yt-dlp',
+                'bin_providers': 'pip,brew,env',
+            }))
+            print("Failed to install yt-dlp", file=sys.stderr)
+            sys.exit(1)
+
+    except Exception as e:
+        print(json.dumps({
+            'type': 'Dependency',
+            'bin_name': 'yt-dlp',
+            'bin_providers': 'pip,brew,env',
+        }))
+        print(f"Error installing yt-dlp: {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
--- a/archivebox/plugins/media/tests/test_media.py
+++ b/archivebox/plugins/media/tests/test_media.py
@@ -0,0 +1,148 @@
+"""
+Integration tests for media plugin
+
+Tests verify:
+1. Hook script exists
+2. Dependencies installed via validation hooks
+3. Verify deps with abx-pkg
+4. Media extraction works on video URLs
+5. JSONL output is correct
+6. Config options work
+7. Handles non-media URLs gracefully
+"""
+
+import json
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+import pytest
+
+PLUGIN_DIR = Path(__file__).parent.parent
+PLUGINS_ROOT = PLUGIN_DIR.parent
+MEDIA_HOOK = PLUGIN_DIR / 'on_Snapshot__51_media.py'
+MEDIA_INSTALL_HOOK = PLUGIN_DIR / 'on_Crawl__00_install_ytdlp.py'
+TEST_URL = 'https://example.com/video.mp4'
+
+def test_hook_script_exists():
+    """Verify on_Snapshot hook exists."""
+    assert MEDIA_HOOK.exists(), f"Hook not found: {MEDIA_HOOK}"
+
+
+def test_ytdlp_install_hook():
+    """Test yt-dlp install hook to install yt-dlp if needed."""
+    # Run yt-dlp install hook
+    result = subprocess.run(
+        [sys.executable, str(MEDIA_INSTALL_HOOK)],
+        capture_output=True,
+        text=True,
+        timeout=600
+    )
+
+    assert result.returncode == 0, f"Install hook failed: {result.stderr}"
+
+    # Verify InstalledBinary JSONL output
+    found_binary = False
+    for line in result.stdout.strip().split('\n'):
+        if line.strip():
+            try:
+                record = json.loads(line)
+                if record.get('type') == 'InstalledBinary':
+                    assert record['name'] == 'yt-dlp'
+                    assert record['abspath']
+                    found_binary = True
+                    break
+            except json.JSONDecodeError:
+                pass
+
+    assert found_binary, "Should output InstalledBinary record"
+
+
+def test_verify_deps_with_abx_pkg():
+    """Verify yt-dlp is available via abx-pkg after hook installation."""
+    from abx_pkg import Binary, PipProvider, EnvProvider, BinProviderOverrides
+
+    PipProvider.model_rebuild()
+    EnvProvider.model_rebuild()
+
+    # Verify yt-dlp is available
+    ytdlp_binary = Binary(name='yt-dlp', binproviders=[PipProvider(), EnvProvider()])
+    ytdlp_loaded = ytdlp_binary.load()
+    assert ytdlp_loaded and ytdlp_loaded.abspath, "yt-dlp should be available after install hook"
+
+def test_handles_non_media_url():
+    """Test that media extractor handles non-media URLs gracefully via hook."""
+    # Prerequisites checked by earlier test
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = Path(tmpdir)
+
+        # Run media extraction hook on non-media URL
+        result = subprocess.run(
+            [sys.executable, str(MEDIA_HOOK), '--url', 'https://example.com', '--snapshot-id', 'test789'],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            timeout=60
+        )
+
+        # Should exit 0 even for non-media URL
+        assert result.returncode == 0, f"Should handle non-media URL gracefully: {result.stderr}"
+
+        # Verify JSONL output
+        assert 'STATUS=' in result.stdout, "Should report status"
+        assert 'RESULT_JSON=' in result.stdout, "Should output RESULT_JSON"
+
+        # Parse JSONL result
+        result_json = None
+        for line in result.stdout.split('\n'):
+            if line.startswith('RESULT_JSON='):
+                result_json = json.loads(line.split('=', 1)[1])
+                break
+
+        assert result_json, "Should have RESULT_JSON"
+        assert result_json['extractor'] == 'media'
+
+
+def test_config_save_media_false_skips():
+    """Test that SAVE_MEDIA=False causes skip."""
+    import os
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        env = os.environ.copy()
+        env['SAVE_MEDIA'] = 'False'
+
+        result = subprocess.run(
+            [sys.executable, str(MEDIA_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            env=env,
+            timeout=30
+        )
+
+        assert result.returncode == 0, f"Should exit 0 when skipping: {result.stderr}"
+        assert 'STATUS=' in result.stdout
+
+
+def test_config_timeout():
+    """Test that MEDIA_TIMEOUT config is respected."""
+    import os
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        env = os.environ.copy()
+        env['MEDIA_TIMEOUT'] = '5'
+
+        result = subprocess.run(
+            [sys.executable, str(MEDIA_HOOK), '--url', 'https://example.com', '--snapshot-id', 'testtimeout'],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            env=env,
+            timeout=30
+        )
+
+        assert result.returncode == 0, "Should complete without hanging"
+
+if __name__ == '__main__':
+    pytest.main([__file__, '-v'])
--- a/archivebox/plugins/mercury/on_Crawl__00_install_mercury.py
+++ b/archivebox/plugins/mercury/on_Crawl__00_install_mercury.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+"""
+Install mercury-parser if not already available.
+
+Runs at crawl start to ensure mercury-parser is installed.
+Outputs JSONL for InstalledBinary.
+"""
+
+import json
+import sys
+from pathlib import Path
+
+
+def main():
+    try:
+        from abx_pkg import Binary, NpmProvider, EnvProvider, BinProviderOverrides
+
+        NpmProvider.model_rebuild()
+        EnvProvider.model_rebuild()
+
+        # Note: npm package is @postlight/mercury-parser, binary is mercury-parser
+        mercury_binary = Binary(
+            name='mercury-parser',
+            binproviders=[NpmProvider(), EnvProvider()],
+            overrides={'npm': {'packages': ['@postlight/mercury-parser']}}
+        )
+
+        # Try to load, install if not found
+        try:
+            loaded = mercury_binary.load()
+            if not loaded or not loaded.abspath:
+                raise Exception("Not loaded")
+        except Exception:
+            # Install via npm
+            loaded = mercury_binary.install()
+
+        if loaded and loaded.abspath:
+            # Output InstalledBinary JSONL
+            print(json.dumps({
+                'type': 'InstalledBinary',
+                'name': 'mercury-parser',
+                'abspath': str(loaded.abspath),
+                'version': str(loaded.version) if loaded.version else None,
+                'sha256': loaded.sha256,
+                'binprovider': loaded.loaded_binprovider.name if loaded.loaded_binprovider else 'unknown',
+            }))
+            sys.exit(0)
+        else:
+            print(json.dumps({
+                'type': 'Dependency',
+                'bin_name': 'mercury-parser',
+                'bin_providers': 'npm,env',
+            }))
+            print("Failed to install mercury-parser", file=sys.stderr)
+            sys.exit(1)
+
+    except Exception as e:
+        print(json.dumps({
+            'type': 'Dependency',
+            'bin_name': 'mercury-parser',
+            'bin_providers': 'npm,env',
+        }))
+        print(f"Error installing mercury-parser: {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
--- a/archivebox/plugins/mercury/tests/test_mercury.py
+++ b/archivebox/plugins/mercury/tests/test_mercury.py
@@ -0,0 +1,164 @@
+"""
+Integration tests for mercury plugin
+
+Tests verify:
+1. Hook script exists
+2. Dependencies installed via validation hooks
+3. Verify deps with abx-pkg
+4. Mercury extraction works on https://example.com
+5. JSONL output is correct
+6. Filesystem output contains extracted content
+7. Config options work
+"""
+
+import json
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+import pytest
+
+PLUGIN_DIR = Path(__file__).parent.parent
+PLUGINS_ROOT = PLUGIN_DIR.parent
+MERCURY_HOOK = PLUGIN_DIR / 'on_Snapshot__53_mercury.py'
+MERCURY_INSTALL_HOOK = PLUGIN_DIR / 'on_Crawl__00_install_mercury.py'
+TEST_URL = 'https://example.com'
+
+def test_hook_script_exists():
+    """Verify on_Snapshot hook exists."""
+    assert MERCURY_HOOK.exists(), f"Hook not found: {MERCURY_HOOK}"
+
+
+def test_mercury_install_hook():
+    """Test mercury install hook to install mercury-parser if needed."""
+    # Run mercury install hook
+    result = subprocess.run(
+        [sys.executable, str(MERCURY_INSTALL_HOOK)],
+        capture_output=True,
+        text=True,
+        timeout=600
+    )
+
+    assert result.returncode == 0, f"Install hook failed: {result.stderr}"
+
+    # Verify InstalledBinary JSONL output
+    found_binary = False
+    for line in result.stdout.strip().split('\n'):
+        if line.strip():
+            try:
+                record = json.loads(line)
+                if record.get('type') == 'InstalledBinary':
+                    assert record['name'] == 'mercury-parser'
+                    assert record['abspath']
+                    found_binary = True
+                    break
+            except json.JSONDecodeError:
+                pass
+
+    assert found_binary, "Should output InstalledBinary record"
+
+
+def test_verify_deps_with_abx_pkg():
+    """Verify mercury-parser is available via abx-pkg after hook installation."""
+    from abx_pkg import Binary, NpmProvider, EnvProvider, BinProviderOverrides
+
+    NpmProvider.model_rebuild()
+    EnvProvider.model_rebuild()
+
+    # Verify mercury-parser is available
+    mercury_binary = Binary(
+        name='mercury-parser',
+        binproviders=[NpmProvider(), EnvProvider()],
+        overrides={'npm': {'packages': ['@postlight/mercury-parser']}}
+    )
+    mercury_loaded = mercury_binary.load()
+    assert mercury_loaded and mercury_loaded.abspath, "mercury-parser should be available after install hook"
+
+def test_extracts_with_mercury_parser():
+    """Test full workflow: extract with mercury-parser from real HTML via hook."""
+    # Prerequisites checked by earlier test
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = Path(tmpdir)
+
+        # Create HTML source that mercury can parse
+        (tmpdir / 'singlefile').mkdir()
+        (tmpdir / 'singlefile' / 'singlefile.html').write_text(
+            '<html><head><title>Test Article</title></head><body>'
+            '<article><h1>Example Article</h1><p>This is test content for mercury parser.</p></article>'
+            '</body></html>'
+        )
+
+        # Run mercury extraction hook
+        result = subprocess.run(
+            [sys.executable, str(MERCURY_HOOK), '--url', TEST_URL, '--snapshot-id', 'test789'],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            timeout=60
+        )
+
+        assert result.returncode == 0, f"Extraction failed: {result.stderr}"
+
+        # Verify JSONL output
+        assert 'STATUS=' in result.stdout, "Should report status"
+        assert 'RESULT_JSON=' in result.stdout, "Should output RESULT_JSON"
+
+        # Parse JSONL result
+        result_json = None
+        for line in result.stdout.split('\n'):
+            if line.startswith('RESULT_JSON='):
+                result_json = json.loads(line.split('=', 1)[1])
+                break
+
+        assert result_json, "Should have RESULT_JSON"
+        assert result_json['extractor'] == 'mercury'
+
+        # Verify filesystem output if extraction succeeded
+        if result_json['status'] == 'succeeded':
+            mercury_dir = tmpdir / 'mercury'
+            assert mercury_dir.exists(), "Output directory not created"
+
+            output_file = mercury_dir / 'content.html'
+            assert output_file.exists(), "content.html not created"
+
+            content = output_file.read_text()
+            assert len(content) > 0, "Output should not be empty"
+
+def test_config_save_mercury_false_skips():
+    """Test that SAVE_MERCURY=False causes skip."""
+    import os
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        env = os.environ.copy()
+        env['SAVE_MERCURY'] = 'False'
+
+        result = subprocess.run(
+            [sys.executable, str(MERCURY_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            env=env,
+            timeout=30
+        )
+
+        assert result.returncode == 0, f"Should exit 0 when skipping: {result.stderr}"
+        assert 'STATUS=' in result.stdout
+
+
+def test_fails_gracefully_without_html():
+    """Test that mercury fails gracefully when no HTML source exists."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        result = subprocess.run(
+            [sys.executable, str(MERCURY_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            timeout=30
+        )
+
+        assert result.returncode == 0, "Should exit 0 even when no HTML source"
+        assert 'STATUS=' in result.stdout
+
+if __name__ == '__main__':
+    pytest.main([__file__, '-v'])
--- a/archivebox/plugins/package-lock.json
+++ b/archivebox/plugins/package-lock.json
@@ -0,0 +1,925 @@
+{
+  "name": "archivebox-plugins",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {
+    "": {
+      "name": "archivebox-plugins",
+      "dependencies": {
+        "puppeteer-core": "^24.34.0"
+      }
+    },
+    "node_modules/@puppeteer/browsers": {
+      "version": "2.11.0",
+      "resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-2.11.0.tgz",
+      "integrity": "sha512-n6oQX6mYkG8TRPuPXmbPidkUbsSRalhmaaVAQxvH1IkQy63cwsH+kOjB3e4cpCDHg0aSvsiX9bQ4s2VB6mGWUQ==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "debug": "^4.4.3",
+        "extract-zip": "^2.0.1",
+        "progress": "^2.0.3",
+        "proxy-agent": "^6.5.0",
+        "semver": "^7.7.3",
+        "tar-fs": "^3.1.1",
+        "yargs": "^17.7.2"
+      },
+      "bin": {
+        "browsers": "lib/cjs/main-cli.js"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@tootallnate/quickjs-emscripten": {
+      "version": "0.23.0",
+      "resolved": "https://registry.npmjs.org/@tootallnate/quickjs-emscripten/-/quickjs-emscripten-0.23.0.tgz",
+      "integrity": "sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA==",
+      "license": "MIT"
+    },
+    "node_modules/@types/node": {
+      "version": "25.0.3",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-25.0.3.tgz",
+      "integrity": "sha512-W609buLVRVmeW693xKfzHeIV6nJGGz98uCPfeXI1ELMLXVeKYZ9m15fAMSaUPBHYLGFsVRcMmSCksQOrZV9BYA==",
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "undici-types": "~7.16.0"
+      }
+    },
+    "node_modules/@types/yauzl": {
+      "version": "2.10.3",
+      "resolved": "https://registry.npmjs.org/@types/yauzl/-/yauzl-2.10.3.tgz",
+      "integrity": "sha512-oJoftv0LSuaDZE3Le4DbKX+KS9G36NzOeSap90UIK0yMA/NhKJhqlSGtNDORNRaIbQfzjXDrQa0ytJ6mNRGz/Q==",
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "@types/node": "*"
+      }
+    },
+    "node_modules/agent-base": {
+      "version": "7.1.4",
+      "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz",
+      "integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/ansi-regex": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
+      "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/ansi-styles": {
+      "version": "4.3.0",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
+      "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
+      "license": "MIT",
+      "dependencies": {
+        "color-convert": "^2.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+      }
+    },
+    "node_modules/ast-types": {
+      "version": "0.13.4",
+      "resolved": "https://registry.npmjs.org/ast-types/-/ast-types-0.13.4.tgz",
+      "integrity": "sha512-x1FCFnFifvYDDzTaLII71vG5uvDwgtmDTEVWAxrgeiR8VjMONcCXJx7E+USjDtHlwFmt9MysbqgF9b9Vjr6w+w==",
+      "license": "MIT",
+      "dependencies": {
+        "tslib": "^2.0.1"
+      },
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/b4a": {
+      "version": "1.7.3",
+      "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.7.3.tgz",
+      "integrity": "sha512-5Q2mfq2WfGuFp3uS//0s6baOJLMoVduPYVeNmDYxu5OUA1/cBfvr2RIS7vi62LdNj/urk1hfmj867I3qt6uZ7Q==",
+      "license": "Apache-2.0",
+      "peerDependencies": {
+        "react-native-b4a": "*"
+      },
+      "peerDependenciesMeta": {
+        "react-native-b4a": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/bare-events": {
+      "version": "2.8.2",
+      "resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.8.2.tgz",
+      "integrity": "sha512-riJjyv1/mHLIPX4RwiK+oW9/4c3TEUeORHKefKAKnZ5kyslbN+HXowtbaVEqt4IMUB7OXlfixcs6gsFeo/jhiQ==",
+      "license": "Apache-2.0",
+      "peerDependencies": {
+        "bare-abort-controller": "*"
+      },
+      "peerDependenciesMeta": {
+        "bare-abort-controller": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/bare-fs": {
+      "version": "4.5.2",
+      "resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.5.2.tgz",
+      "integrity": "sha512-veTnRzkb6aPHOvSKIOy60KzURfBdUflr5VReI+NSaPL6xf+XLdONQgZgpYvUuZLVQ8dCqxpBAudaOM1+KpAUxw==",
+      "license": "Apache-2.0",
+      "optional": true,
+      "dependencies": {
+        "bare-events": "^2.5.4",
+        "bare-path": "^3.0.0",
+        "bare-stream": "^2.6.4",
+        "bare-url": "^2.2.2",
+        "fast-fifo": "^1.3.2"
+      },
+      "engines": {
+        "bare": ">=1.16.0"
+      },
+      "peerDependencies": {
+        "bare-buffer": "*"
+      },
+      "peerDependenciesMeta": {
+        "bare-buffer": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/bare-os": {
+      "version": "3.6.2",
+      "resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.6.2.tgz",
+      "integrity": "sha512-T+V1+1srU2qYNBmJCXZkUY5vQ0B4FSlL3QDROnKQYOqeiQR8UbjNHlPa+TIbM4cuidiN9GaTaOZgSEgsvPbh5A==",
+      "license": "Apache-2.0",
+      "optional": true,
+      "engines": {
+        "bare": ">=1.14.0"
+      }
+    },
+    "node_modules/bare-path": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/bare-path/-/bare-path-3.0.0.tgz",
+      "integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==",
+      "license": "Apache-2.0",
+      "optional": true,
+      "dependencies": {
+        "bare-os": "^3.0.1"
+      }
+    },
+    "node_modules/bare-stream": {
+      "version": "2.7.0",
+      "resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.7.0.tgz",
+      "integrity": "sha512-oyXQNicV1y8nc2aKffH+BUHFRXmx6VrPzlnaEvMhram0nPBrKcEdcyBg5r08D0i8VxngHFAiVyn1QKXpSG0B8A==",
+      "license": "Apache-2.0",
+      "optional": true,
+      "dependencies": {
+        "streamx": "^2.21.0"
+      },
+      "peerDependencies": {
+        "bare-buffer": "*",
+        "bare-events": "*"
+      },
+      "peerDependenciesMeta": {
+        "bare-buffer": {
+          "optional": true
+        },
+        "bare-events": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/bare-url": {
+      "version": "2.3.2",
+      "resolved": "https://registry.npmjs.org/bare-url/-/bare-url-2.3.2.tgz",
+      "integrity": "sha512-ZMq4gd9ngV5aTMa5p9+UfY0b3skwhHELaDkhEHetMdX0LRkW9kzaym4oo/Eh+Ghm0CCDuMTsRIGM/ytUc1ZYmw==",
+      "license": "Apache-2.0",
+      "optional": true,
+      "dependencies": {
+        "bare-path": "^3.0.0"
+      }
+    },
+    "node_modules/basic-ftp": {
+      "version": "5.0.5",
+      "resolved": "https://registry.npmjs.org/basic-ftp/-/basic-ftp-5.0.5.tgz",
+      "integrity": "sha512-4Bcg1P8xhUuqcii/S0Z9wiHIrQVPMermM1any+MX5GeGD7faD3/msQUDGLol9wOcz4/jbg/WJnGqoJF6LiBdtg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=10.0.0"
+      }
+    },
+    "node_modules/buffer-crc32": {
+      "version": "0.2.13",
+      "resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz",
+      "integrity": "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==",
+      "license": "MIT",
+      "engines": {
+        "node": "*"
+      }
+    },
+    "node_modules/chromium-bidi": {
+      "version": "12.0.1",
+      "resolved": "https://registry.npmjs.org/chromium-bidi/-/chromium-bidi-12.0.1.tgz",
+      "integrity": "sha512-fGg+6jr0xjQhzpy5N4ErZxQ4wF7KLEvhGZXD6EgvZKDhu7iOhZXnZhcDxPJDcwTcrD48NPzOCo84RP2lv3Z+Cg==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "mitt": "^3.0.1",
+        "zod": "^3.24.1"
+      },
+      "peerDependencies": {
+        "devtools-protocol": "*"
+      }
+    },
+    "node_modules/cliui": {
+      "version": "8.0.1",
+      "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz",
+      "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==",
+      "license": "ISC",
+      "dependencies": {
+        "string-width": "^4.2.0",
+        "strip-ansi": "^6.0.1",
+        "wrap-ansi": "^7.0.0"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/color-convert": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
+      "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
+      "license": "MIT",
+      "dependencies": {
+        "color-name": "~1.1.4"
+      },
+      "engines": {
+        "node": ">=7.0.0"
+      }
+    },
+    "node_modules/color-name": {
+      "version": "1.1.4",
+      "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
+      "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
+      "license": "MIT"
+    },
+    "node_modules/data-uri-to-buffer": {
+      "version": "6.0.2",
+      "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz",
+      "integrity": "sha512-7hvf7/GW8e86rW0ptuwS3OcBGDjIi6SZva7hCyWC0yYry2cOPmLIjXAUHI6DK2HsnwJd9ifmt57i8eV2n4YNpw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/debug": {
+      "version": "4.4.3",
+      "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
+      "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==",
+      "license": "MIT",
+      "dependencies": {
+        "ms": "^2.1.3"
+      },
+      "engines": {
+        "node": ">=6.0"
+      },
+      "peerDependenciesMeta": {
+        "supports-color": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/degenerator": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/degenerator/-/degenerator-5.0.1.tgz",
+      "integrity": "sha512-TllpMR/t0M5sqCXfj85i4XaAzxmS5tVA16dqvdkMwGmzI+dXLXnw3J+3Vdv7VKw+ThlTMboK6i9rnZ6Nntj5CQ==",
+      "license": "MIT",
+      "dependencies": {
+        "ast-types": "^0.13.4",
+        "escodegen": "^2.1.0",
+        "esprima": "^4.0.1"
+      },
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/devtools-protocol": {
+      "version": "0.0.1534754",
+      "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1534754.tgz",
+      "integrity": "sha512-26T91cV5dbOYnXdJi5qQHoTtUoNEqwkHcAyu/IKtjIAxiEqPMrDiRkDOPWVsGfNZGmlQVHQbZRSjD8sxagWVsQ==",
+      "license": "BSD-3-Clause",
+      "peer": true
+    },
+    "node_modules/emoji-regex": {
+      "version": "8.0.0",
+      "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
+      "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
+      "license": "MIT"
+    },
+    "node_modules/end-of-stream": {
+      "version": "1.4.5",
+      "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.5.tgz",
+      "integrity": "sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==",
+      "license": "MIT",
+      "dependencies": {
+        "once": "^1.4.0"
+      }
+    },
+    "node_modules/escalade": {
+      "version": "3.2.0",
+      "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
+      "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/escodegen": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/escodegen/-/escodegen-2.1.0.tgz",
+      "integrity": "sha512-2NlIDTwUWJN0mRPQOdtQBzbUHvdGY2P1VXSyU83Q3xKxM7WHX2Ql8dKq782Q9TgQUNOLEzEYu9bzLNj1q88I5w==",
+      "license": "BSD-2-Clause",
+      "dependencies": {
+        "esprima": "^4.0.1",
+        "estraverse": "^5.2.0",
+        "esutils": "^2.0.2"
+      },
+      "bin": {
+        "escodegen": "bin/escodegen.js",
+        "esgenerate": "bin/esgenerate.js"
+      },
+      "engines": {
+        "node": ">=6.0"
+      },
+      "optionalDependencies": {
+        "source-map": "~0.6.1"
+      }
+    },
+    "node_modules/esprima": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz",
+      "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==",
+      "license": "BSD-2-Clause",
+      "bin": {
+        "esparse": "bin/esparse.js",
+        "esvalidate": "bin/esvalidate.js"
+      },
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/estraverse": {
+      "version": "5.3.0",
+      "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz",
+      "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==",
+      "license": "BSD-2-Clause",
+      "engines": {
+        "node": ">=4.0"
+      }
+    },
+    "node_modules/esutils": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz",
+      "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==",
+      "license": "BSD-2-Clause",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/events-universal": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/events-universal/-/events-universal-1.0.1.tgz",
+      "integrity": "sha512-LUd5euvbMLpwOF8m6ivPCbhQeSiYVNb8Vs0fQ8QjXo0JTkEHpz8pxdQf0gStltaPpw0Cca8b39KxvK9cfKRiAw==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "bare-events": "^2.7.0"
+      }
+    },
+    "node_modules/extract-zip": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/extract-zip/-/extract-zip-2.0.1.tgz",
+      "integrity": "sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg==",
+      "license": "BSD-2-Clause",
+      "dependencies": {
+        "debug": "^4.1.1",
+        "get-stream": "^5.1.0",
+        "yauzl": "^2.10.0"
+      },
+      "bin": {
+        "extract-zip": "cli.js"
+      },
+      "engines": {
+        "node": ">= 10.17.0"
+      },
+      "optionalDependencies": {
+        "@types/yauzl": "^2.9.1"
+      }
+    },
+    "node_modules/fast-fifo": {
+      "version": "1.3.2",
+      "resolved": "https://registry.npmjs.org/fast-fifo/-/fast-fifo-1.3.2.tgz",
+      "integrity": "sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ==",
+      "license": "MIT"
+    },
+    "node_modules/fd-slicer": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/fd-slicer/-/fd-slicer-1.1.0.tgz",
+      "integrity": "sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==",
+      "license": "MIT",
+      "dependencies": {
+        "pend": "~1.2.0"
+      }
+    },
+    "node_modules/get-caller-file": {
+      "version": "2.0.5",
+      "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz",
+      "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==",
+      "license": "ISC",
+      "engines": {
+        "node": "6.* || 8.* || >= 10.*"
+      }
+    },
+    "node_modules/get-stream": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-5.2.0.tgz",
+      "integrity": "sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA==",
+      "license": "MIT",
+      "dependencies": {
+        "pump": "^3.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/get-uri": {
+      "version": "6.0.5",
+      "resolved": "https://registry.npmjs.org/get-uri/-/get-uri-6.0.5.tgz",
+      "integrity": "sha512-b1O07XYq8eRuVzBNgJLstU6FYc1tS6wnMtF1I1D9lE8LxZSOGZ7LhxN54yPP6mGw5f2CkXY2BQUL9Fx41qvcIg==",
+      "license": "MIT",
+      "dependencies": {
+        "basic-ftp": "^5.0.2",
+        "data-uri-to-buffer": "^6.0.2",
+        "debug": "^4.3.4"
+      },
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/http-proxy-agent": {
+      "version": "7.0.2",
+      "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz",
+      "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==",
+      "license": "MIT",
+      "dependencies": {
+        "agent-base": "^7.1.0",
+        "debug": "^4.3.4"
+      },
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/https-proxy-agent": {
+      "version": "7.0.6",
+      "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz",
+      "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==",
+      "license": "MIT",
+      "dependencies": {
+        "agent-base": "^7.1.2",
+        "debug": "4"
+      },
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/ip-address": {
+      "version": "10.1.0",
+      "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.1.0.tgz",
+      "integrity": "sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 12"
+      }
+    },
+    "node_modules/is-fullwidth-code-point": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
+      "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/lru-cache": {
+      "version": "7.18.3",
+      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-7.18.3.tgz",
+      "integrity": "sha512-jumlc0BIUrS3qJGgIkWZsyfAM7NCWiBcCDhnd+3NNM5KbBmLTgHVfWBcg6W+rLUsIpzpERPsvwUP7CckAQSOoA==",
+      "license": "ISC",
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/mitt": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.1.tgz",
+      "integrity": "sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw==",
+      "license": "MIT"
+    },
+    "node_modules/ms": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
+      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
+      "license": "MIT"
+    },
+    "node_modules/netmask": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/netmask/-/netmask-2.0.2.tgz",
+      "integrity": "sha512-dBpDMdxv9Irdq66304OLfEmQ9tbNRFnFTuZiLo+bD+r332bBmMJ8GBLXklIXXgxd3+v9+KUnZaUR5PJMa75Gsg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4.0"
+      }
+    },
+    "node_modules/once": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
+      "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==",
+      "license": "ISC",
+      "dependencies": {
+        "wrappy": "1"
+      }
+    },
+    "node_modules/pac-proxy-agent": {
+      "version": "7.2.0",
+      "resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz",
+      "integrity": "sha512-TEB8ESquiLMc0lV8vcd5Ql/JAKAoyzHFXaStwjkzpOpC5Yv+pIzLfHvjTSdf3vpa2bMiUQrg9i6276yn8666aA==",
+      "license": "MIT",
+      "dependencies": {
+        "@tootallnate/quickjs-emscripten": "^0.23.0",
+        "agent-base": "^7.1.2",
+        "debug": "^4.3.4",
+        "get-uri": "^6.0.1",
+        "http-proxy-agent": "^7.0.0",
+        "https-proxy-agent": "^7.0.6",
+        "pac-resolver": "^7.0.1",
+        "socks-proxy-agent": "^8.0.5"
+      },
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/pac-resolver": {
+      "version": "7.0.1",
+      "resolved": "https://registry.npmjs.org/pac-resolver/-/pac-resolver-7.0.1.tgz",
+      "integrity": "sha512-5NPgf87AT2STgwa2ntRMr45jTKrYBGkVU36yT0ig/n/GMAa3oPqhZfIQ2kMEimReg0+t9kZViDVZ83qfVUlckg==",
+      "license": "MIT",
+      "dependencies": {
+        "degenerator": "^5.0.0",
+        "netmask": "^2.0.2"
+      },
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/pend": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz",
+      "integrity": "sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==",
+      "license": "MIT"
+    },
+    "node_modules/progress": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz",
+      "integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.4.0"
+      }
+    },
+    "node_modules/proxy-agent": {
+      "version": "6.5.0",
+      "resolved": "https://registry.npmjs.org/proxy-agent/-/proxy-agent-6.5.0.tgz",
+      "integrity": "sha512-TmatMXdr2KlRiA2CyDu8GqR8EjahTG3aY3nXjdzFyoZbmB8hrBsTyMezhULIXKnC0jpfjlmiZ3+EaCzoInSu/A==",
+      "license": "MIT",
+      "dependencies": {
+        "agent-base": "^7.1.2",
+        "debug": "^4.3.4",
+        "http-proxy-agent": "^7.0.1",
+        "https-proxy-agent": "^7.0.6",
+        "lru-cache": "^7.14.1",
+        "pac-proxy-agent": "^7.1.0",
+        "proxy-from-env": "^1.1.0",
+        "socks-proxy-agent": "^8.0.5"
+      },
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/proxy-from-env": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
+      "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==",
+      "license": "MIT"
+    },
+    "node_modules/pump": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.3.tgz",
+      "integrity": "sha512-todwxLMY7/heScKmntwQG8CXVkWUOdYxIvY2s0VWAAMh/nd8SoYiRaKjlr7+iCs984f2P8zvrfWcDDYVb73NfA==",
+      "license": "MIT",
+      "dependencies": {
+        "end-of-stream": "^1.1.0",
+        "once": "^1.3.1"
+      }
+    },
+    "node_modules/puppeteer-core": {
+      "version": "24.34.0",
+      "resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-24.34.0.tgz",
+      "integrity": "sha512-24evawO+mUGW4mvS2a2ivwLdX3gk8zRLZr9HP+7+VT2vBQnm0oh9jJEZmUE3ePJhRkYlZ93i7OMpdcoi2qNCLg==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "@puppeteer/browsers": "2.11.0",
+        "chromium-bidi": "12.0.1",
+        "debug": "^4.4.3",
+        "devtools-protocol": "0.0.1534754",
+        "typed-query-selector": "^2.12.0",
+        "webdriver-bidi-protocol": "0.3.10",
+        "ws": "^8.18.3"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/require-directory": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
+      "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/semver": {
+      "version": "7.7.3",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.3.tgz",
+      "integrity": "sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q==",
+      "license": "ISC",
+      "bin": {
+        "semver": "bin/semver.js"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/smart-buffer": {
+      "version": "4.2.0",
+      "resolved": "https://registry.npmjs.org/smart-buffer/-/smart-buffer-4.2.0.tgz",
+      "integrity": "sha512-94hK0Hh8rPqQl2xXc3HsaBoOXKV20MToPkcXvwbISWLEs+64sBq5kFgn2kJDHb1Pry9yrP0dxrCI9RRci7RXKg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 6.0.0",
+        "npm": ">= 3.0.0"
+      }
+    },
+    "node_modules/socks": {
+      "version": "2.8.7",
+      "resolved": "https://registry.npmjs.org/socks/-/socks-2.8.7.tgz",
+      "integrity": "sha512-HLpt+uLy/pxB+bum/9DzAgiKS8CX1EvbWxI4zlmgGCExImLdiad2iCwXT5Z4c9c3Eq8rP2318mPW2c+QbtjK8A==",
+      "license": "MIT",
+      "dependencies": {
+        "ip-address": "^10.0.1",
+        "smart-buffer": "^4.2.0"
+      },
+      "engines": {
+        "node": ">= 10.0.0",
+        "npm": ">= 3.0.0"
+      }
+    },
+    "node_modules/socks-proxy-agent": {
+      "version": "8.0.5",
+      "resolved": "https://registry.npmjs.org/socks-proxy-agent/-/socks-proxy-agent-8.0.5.tgz",
+      "integrity": "sha512-HehCEsotFqbPW9sJ8WVYB6UbmIMv7kUUORIF2Nncq4VQvBfNBLibW9YZR5dlYCSUhwcD628pRllm7n+E+YTzJw==",
+      "license": "MIT",
+      "dependencies": {
+        "agent-base": "^7.1.2",
+        "debug": "^4.3.4",
+        "socks": "^2.8.3"
+      },
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/source-map": {
+      "version": "0.6.1",
+      "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz",
+      "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==",
+      "license": "BSD-3-Clause",
+      "optional": true,
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/streamx": {
+      "version": "2.23.0",
+      "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz",
+      "integrity": "sha512-kn+e44esVfn2Fa/O0CPFcex27fjIL6MkVae0Mm6q+E6f0hWv578YCERbv+4m02cjxvDsPKLnmxral/rR6lBMAg==",
+      "license": "MIT",
+      "dependencies": {
+        "events-universal": "^1.0.0",
+        "fast-fifo": "^1.3.2",
+        "text-decoder": "^1.1.0"
+      }
+    },
+    "node_modules/string-width": {
+      "version": "4.2.3",
+      "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
+      "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
+      "license": "MIT",
+      "dependencies": {
+        "emoji-regex": "^8.0.0",
+        "is-fullwidth-code-point": "^3.0.0",
+        "strip-ansi": "^6.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/strip-ansi": {
+      "version": "6.0.1",
+      "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
+      "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
+      "license": "MIT",
+      "dependencies": {
+        "ansi-regex": "^5.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/tar-fs": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.1.1.tgz",
+      "integrity": "sha512-LZA0oaPOc2fVo82Txf3gw+AkEd38szODlptMYejQUhndHMLQ9M059uXR+AfS7DNo0NpINvSqDsvyaCrBVkptWg==",
+      "license": "MIT",
+      "dependencies": {
+        "pump": "^3.0.0",
+        "tar-stream": "^3.1.5"
+      },
+      "optionalDependencies": {
+        "bare-fs": "^4.0.1",
+        "bare-path": "^3.0.0"
+      }
+    },
+    "node_modules/tar-stream": {
+      "version": "3.1.7",
+      "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.7.tgz",
+      "integrity": "sha512-qJj60CXt7IU1Ffyc3NJMjh6EkuCFej46zUqJ4J7pqYlThyd9bO0XBTmcOIhSzZJVWfsLks0+nle/j538YAW9RQ==",
+      "license": "MIT",
+      "dependencies": {
+        "b4a": "^1.6.4",
+        "fast-fifo": "^1.2.0",
+        "streamx": "^2.15.0"
+      }
+    },
+    "node_modules/text-decoder": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.3.tgz",
+      "integrity": "sha512-3/o9z3X0X0fTupwsYvR03pJ/DjWuqqrfwBgTQzdWDiQSm9KitAyz/9WqsT2JQW7KV2m+bC2ol/zqpW37NHxLaA==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "b4a": "^1.6.4"
+      }
+    },
+    "node_modules/tslib": {
+      "version": "2.8.1",
+      "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
+      "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
+      "license": "0BSD"
+    },
+    "node_modules/typed-query-selector": {
+      "version": "2.12.0",
+      "resolved": "https://registry.npmjs.org/typed-query-selector/-/typed-query-selector-2.12.0.tgz",
+      "integrity": "sha512-SbklCd1F0EiZOyPiW192rrHZzZ5sBijB6xM+cpmrwDqObvdtunOHHIk9fCGsoK5JVIYXoyEp4iEdE3upFH3PAg==",
+      "license": "MIT"
+    },
+    "node_modules/undici-types": {
+      "version": "7.16.0",
+      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
+      "integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==",
+      "license": "MIT",
+      "optional": true
+    },
+    "node_modules/webdriver-bidi-protocol": {
+      "version": "0.3.10",
+      "resolved": "https://registry.npmjs.org/webdriver-bidi-protocol/-/webdriver-bidi-protocol-0.3.10.tgz",
+      "integrity": "sha512-5LAE43jAVLOhB/QqX4bwSiv0Hg1HBfMmOuwBSXHdvg4GMGu9Y0lIq7p4R/yySu6w74WmaR4GM4H9t2IwLW7hgw==",
+      "license": "Apache-2.0"
+    },
+    "node_modules/wrap-ansi": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
+      "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==",
+      "license": "MIT",
+      "dependencies": {
+        "ansi-styles": "^4.0.0",
+        "string-width": "^4.1.0",
+        "strip-ansi": "^6.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/wrap-ansi?sponsor=1"
+      }
+    },
+    "node_modules/wrappy": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
+      "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
+      "license": "ISC"
+    },
+    "node_modules/ws": {
+      "version": "8.18.3",
+      "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz",
+      "integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=10.0.0"
+      },
+      "peerDependencies": {
+        "bufferutil": "^4.0.1",
+        "utf-8-validate": ">=5.0.2"
+      },
+      "peerDependenciesMeta": {
+        "bufferutil": {
+          "optional": true
+        },
+        "utf-8-validate": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/y18n": {
+      "version": "5.0.8",
+      "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
+      "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==",
+      "license": "ISC",
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/yargs": {
+      "version": "17.7.2",
+      "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz",
+      "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==",
+      "license": "MIT",
+      "dependencies": {
+        "cliui": "^8.0.1",
+        "escalade": "^3.1.1",
+        "get-caller-file": "^2.0.5",
+        "require-directory": "^2.1.1",
+        "string-width": "^4.2.3",
+        "y18n": "^5.0.5",
+        "yargs-parser": "^21.1.1"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/yargs-parser": {
+      "version": "21.1.1",
+      "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz",
+      "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==",
+      "license": "ISC",
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/yauzl": {
+      "version": "2.10.0",
+      "resolved": "https://registry.npmjs.org/yauzl/-/yauzl-2.10.0.tgz",
+      "integrity": "sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==",
+      "license": "MIT",
+      "dependencies": {
+        "buffer-crc32": "~0.2.3",
+        "fd-slicer": "~1.1.0"
+      }
+    },
+    "node_modules/zod": {
+      "version": "3.25.76",
+      "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
+      "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==",
+      "license": "MIT",
+      "funding": {
+        "url": "https://github.com/sponsors/colinhacks"
+      }
+    }
+  }
+}
--- a/archivebox/plugins/package.json
+++ b/archivebox/plugins/package.json
@@ -0,0 +1 @@
+{"name":"archivebox-plugins","private":true,"dependencies":{"puppeteer-core":"^24.34.0"}}
--- a/archivebox/plugins/pdf/tests/test_pdf.py
+++ b/archivebox/plugins/pdf/tests/test_pdf.py
@@ -0,0 +1,232 @@
+"""
+Integration tests for pdf plugin
+
+Tests verify:
+1. Hook script exists
+2. Dependencies installed via chrome_session validation hooks
+3. Verify deps with abx-pkg
+4. PDF extraction works on https://example.com
+5. JSONL output is correct
+6. Filesystem output is valid PDF file
+7. Config options work
+"""
+
+import json
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+
+import pytest
+
+
+PLUGIN_DIR = Path(__file__).parent.parent
+PLUGINS_ROOT = PLUGIN_DIR.parent
+PDF_HOOK = PLUGIN_DIR / 'on_Snapshot__35_pdf.js'
+CHROME_VALIDATE_HOOK = PLUGINS_ROOT / 'chrome_session' / 'on_Crawl__00_validate_chrome.py'
+NPM_PROVIDER_HOOK = PLUGINS_ROOT / 'npm' / 'on_Dependency__install_using_npm_provider.py'
+TEST_URL = 'https://example.com'
+
+
+def test_hook_script_exists():
+    """Verify on_Snapshot hook exists."""
+    assert PDF_HOOK.exists(), f"Hook not found: {PDF_HOOK}"
+
+
+def test_chrome_validation_and_install():
+    """Test chrome validation hook to install puppeteer-core if needed."""
+    # Run chrome validation hook (from chrome_session plugin)
+    result = subprocess.run(
+        [sys.executable, str(CHROME_VALIDATE_HOOK)],
+        capture_output=True,
+        text=True,
+        timeout=30
+    )
+
+    # If exit 1, binary not found - need to install
+    if result.returncode == 1:
+        # Parse Dependency request from JSONL
+        dependency_request = None
+        for line in result.stdout.strip().split('\n'):
+            if line.strip():
+                try:
+                    record = json.loads(line)
+                    if record.get('type') == 'Dependency':
+                        dependency_request = record
+                        break
+                except json.JSONDecodeError:
+                    pass
+
+        if dependency_request:
+            bin_name = dependency_request['bin_name']
+            bin_providers = dependency_request['bin_providers']
+
+            # Install via npm provider hook
+            install_result = subprocess.run(
+                [
+                    sys.executable,
+                    str(NPM_PROVIDER_HOOK),
+                    '--dependency-id', 'test-dep-001',
+                    '--bin-name', bin_name,
+                    '--bin-providers', bin_providers
+                ],
+                capture_output=True,
+                text=True,
+                timeout=600
+            )
+
+            assert install_result.returncode == 0, f"Install failed: {install_result.stderr}"
+
+            # Verify installation via JSONL output
+            for line in install_result.stdout.strip().split('\n'):
+                if line.strip():
+                    try:
+                        record = json.loads(line)
+                        if record.get('type') == 'InstalledBinary':
+                            assert record['name'] == bin_name
+                            assert record['abspath']
+                            break
+                    except json.JSONDecodeError:
+                        pass
+    else:
+        # Binary already available, verify via JSONL output
+        assert result.returncode == 0, f"Validation failed: {result.stderr}"
+
+
+def test_verify_deps_with_abx_pkg():
+    """Verify dependencies are available via abx-pkg after hook installation."""
+    from abx_pkg import Binary, EnvProvider, BinProviderOverrides
+
+    EnvProvider.model_rebuild()
+
+    # Verify node is available
+    node_binary = Binary(name='node', binproviders=[EnvProvider()])
+    node_loaded = node_binary.load()
+    assert node_loaded and node_loaded.abspath, "Node.js required for pdf plugin"
+
+
+def test_extracts_pdf_from_example_com():
+    """Test full workflow: extract PDF from real example.com via hook."""
+    # Prerequisites checked by earlier test
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = Path(tmpdir)
+
+        # Run PDF extraction hook
+        result = subprocess.run(
+            ['node', str(PDF_HOOK), f'--url={TEST_URL}', '--snapshot-id=test789'],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            timeout=120
+        )
+
+        assert result.returncode == 0, f"Extraction failed: {result.stderr}"
+
+        # Verify JSONL output
+        assert 'STATUS=succeeded' in result.stdout, "Should report success"
+        assert 'RESULT_JSON=' in result.stdout, "Should output RESULT_JSON"
+
+        # Parse JSONL result
+        result_json = None
+        for line in result.stdout.split('\n'):
+            if line.startswith('RESULT_JSON='):
+                result_json = json.loads(line.split('=', 1)[1])
+                break
+
+        assert result_json, "Should have RESULT_JSON"
+        assert result_json['extractor'] == 'pdf'
+        assert result_json['status'] == 'succeeded'
+        assert result_json['url'] == TEST_URL
+
+        # Verify filesystem output
+        pdf_dir = tmpdir / 'pdf'
+        assert pdf_dir.exists(), "Output directory not created"
+
+        pdf_file = pdf_dir / 'output.pdf'
+        assert pdf_file.exists(), "output.pdf not created"
+
+        # Verify file is valid PDF
+        file_size = pdf_file.stat().st_size
+        assert file_size > 500, f"PDF too small: {file_size} bytes"
+        assert file_size < 10 * 1024 * 1024, f"PDF suspiciously large: {file_size} bytes"
+
+        # Check PDF magic bytes
+        pdf_data = pdf_file.read_bytes()
+        assert pdf_data[:4] == b'%PDF', "Should be valid PDF file"
+
+
+def test_config_save_pdf_false_skips():
+    """Test that SAVE_PDF=False causes skip."""
+    import os
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = Path(tmpdir)
+        env = os.environ.copy()
+        env['SAVE_PDF'] = 'False'
+
+        result = subprocess.run(
+            ['node', str(PDF_HOOK), f'--url={TEST_URL}', '--snapshot-id=test999'],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            env=env,
+            timeout=30
+        )
+
+        assert result.returncode == 0, f"Should exit 0 when skipping: {result.stderr}"
+        assert 'STATUS=' in result.stdout
+
+
+def test_reports_missing_chrome():
+    """Test that script reports error when Chrome is not found."""
+    import os
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = Path(tmpdir)
+
+        # Set CHROME_BINARY to nonexistent path
+        env = os.environ.copy()
+        env['CHROME_BINARY'] = '/nonexistent/chrome'
+
+        result = subprocess.run(
+            ['node', str(PDF_HOOK), f'--url={TEST_URL}', '--snapshot-id=test123'],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            env=env,
+            timeout=30
+        )
+
+        # Should fail and report missing Chrome
+        if result.returncode != 0:
+            combined = result.stdout + result.stderr
+            assert 'chrome' in combined.lower() or 'browser' in combined.lower() or 'ERROR=' in combined
+
+
+def test_config_timeout_honored():
+    """Test that CHROME_TIMEOUT config is respected."""
+    import os
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = Path(tmpdir)
+
+        # Set very short timeout
+        env = os.environ.copy()
+        env['CHROME_TIMEOUT'] = '5'
+
+        result = subprocess.run(
+            ['node', str(PDF_HOOK), f'--url={TEST_URL}', '--snapshot-id=testtimeout'],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            env=env,
+            timeout=30
+        )
+
+        # Should complete (success or fail, but not hang)
+        assert result.returncode in (0, 1), "Should complete without hanging"
+
+
+if __name__ == '__main__':
+    pytest.main([__file__, '-v'])
--- a/archivebox/plugins/readability/on_Crawl__00_install_readability.py
+++ b/archivebox/plugins/readability/on_Crawl__00_install_readability.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+"""
+Install readability-extractor if not already available.
+
+Runs at crawl start to ensure readability-extractor is installed.
+Outputs JSONL for InstalledBinary.
+"""
+
+import json
+import sys
+from pathlib import Path
+
+
+def main():
+    try:
+        from abx_pkg import Binary, NpmProvider, EnvProvider, BinProviderOverrides
+
+        NpmProvider.model_rebuild()
+        EnvProvider.model_rebuild()
+
+        # Note: npm package is from github:ArchiveBox/readability-extractor
+        readability_binary = Binary(
+            name='readability-extractor',
+            binproviders=[NpmProvider(), EnvProvider()],
+            overrides={'npm': {'packages': ['github:ArchiveBox/readability-extractor']}}
+        )
+
+        # Try to load, install if not found
+        try:
+            loaded = readability_binary.load()
+            if not loaded or not loaded.abspath:
+                raise Exception("Not loaded")
+        except Exception:
+            # Install via npm from GitHub repo
+            loaded = readability_binary.install()
+
+        if loaded and loaded.abspath:
+            # Output InstalledBinary JSONL
+            print(json.dumps({
+                'type': 'InstalledBinary',
+                'name': 'readability-extractor',
+                'abspath': str(loaded.abspath),
+                'version': str(loaded.version) if loaded.version else None,
+                'sha256': loaded.sha256,
+                'binprovider': loaded.loaded_binprovider.name if loaded.loaded_binprovider else 'unknown',
+            }))
+            sys.exit(0)
+        else:
+            print(json.dumps({
+                'type': 'Dependency',
+                'bin_name': 'readability-extractor',
+                'bin_providers': 'npm,env',
+            }))
+            print("Failed to install readability-extractor", file=sys.stderr)
+            sys.exit(1)
+
+    except Exception as e:
+        print(json.dumps({
+            'type': 'Dependency',
+            'bin_name': 'readability-extractor',
+            'bin_providers': 'npm,env',
+        }))
+        print(f"Error installing readability-extractor: {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
--- a/archivebox/plugins/readability/on_Snapshot__52_readability.py
+++ b/archivebox/plugins/readability/on_Snapshot__52_readability.py
@@ -6,10 +6,10 @@ Usage: on_Snapshot__readability.py --url=<url> --snapshot-id=<uuid>
 Output: Creates readability/ directory with content.html, content.txt, article.json

 Environment variables:
-    READABILITY_BINARY: Path to readability-cli binary
+    READABILITY_BINARY: Path to readability-extractor binary
    TIMEOUT: Timeout in seconds (default: 60)

-Note: Requires readability-cli: npm install -g readability-cli
+Note: Requires readability-extractor from https://github.com/ArchiveBox/readability-extractor
      This extractor looks for HTML source from other extractors (wget, singlefile, dom)
 """

@@ -27,7 +27,7 @@ import rich_click as click

 # Extractor metadata
 EXTRACTOR_NAME = 'readability'
-BIN_NAME = 'readability-cli'
+BIN_NAME = 'readability-extractor'
 BIN_PROVIDERS = 'npm,env'
 OUTPUT_DIR = 'readability'

@@ -44,12 +44,12 @@ def get_env_int(name: str, default: int = 0) -> int:


 def find_readability() -> str | None:
-    """Find readability-cli binary."""
+    """Find readability-extractor binary."""
    readability = get_env('READABILITY_BINARY')
    if readability and os.path.isfile(readability):
        return readability

-    for name in ['readability-cli', 'readable']:
+    for name in ['readability-extractor']:
        binary = shutil.which(name)
        if binary:
            return binary
@@ -58,7 +58,7 @@ def find_readability() -> str | None:


 def get_version(binary: str) -> str:
-    """Get readability-cli version."""
+    """Get readability-extractor version."""
    try:
        result = subprocess.run([binary, '--version'], capture_output=True, text=True, timeout=10)
        return result.stdout.strip()[:64]
@@ -106,24 +106,24 @@ def extract_readability(url: str, binary: str) -> tuple[bool, str | None, str]:
    output_dir.mkdir(exist_ok=True)

    try:
-        # Run readability-cli
-        cmd = [binary, '--json', html_source]
+        # Run readability-extractor (outputs JSON by default)
+        cmd = [binary, html_source]
        result = subprocess.run(cmd, capture_output=True, timeout=timeout)

        if result.returncode != 0:
            stderr = result.stderr.decode('utf-8', errors='replace')
-            return False, None, f'readability-cli failed: {stderr[:200]}'
+            return False, None, f'readability-extractor failed: {stderr[:200]}'

        # Parse JSON output
        try:
            result_json = json.loads(result.stdout)
        except json.JSONDecodeError:
-            return False, None, 'readability-cli returned invalid JSON'
+            return False, None, 'readability-extractor returned invalid JSON'

        # Extract and save content
-        # readability-cli v2.x uses hyphenated field names
-        text_content = result_json.pop('text-content', result_json.pop('textContent', ''))
-        html_content = result_json.pop('html-content', result_json.pop('content', ''))
+        # readability-extractor uses camelCase field names (textContent, content)
+        text_content = result_json.pop('textContent', result_json.pop('text-content', ''))
+        html_content = result_json.pop('content', result_json.pop('html-content', ''))

        if not text_content and not html_content:
            return False, None, 'No content extracted'
@@ -157,7 +157,7 @@ def main(url: str, snapshot_id: str):
        # Find binary
        binary = find_readability()
        if not binary:
-            print(f'ERROR: readability-cli binary not found', file=sys.stderr)
+            print(f'ERROR: readability-extractor binary not found', file=sys.stderr)
            print(f'DEPENDENCY_NEEDED={BIN_NAME}', file=sys.stderr)
            print(f'BIN_PROVIDERS={BIN_PROVIDERS}', file=sys.stderr)
            sys.exit(1)
@@ -187,7 +187,7 @@ def main(url: str, snapshot_id: str):
    print(f'END_TS={end_ts.isoformat()}')
    print(f'DURATION={duration:.2f}')
    if binary:
-        print(f'CMD={binary} --json <html>')
+        print(f'CMD={binary} <html>')
    if version:
        print(f'VERSION={version}')
    if output:
--- a/archivebox/plugins/readability/tests/test_readability.py
+++ b/archivebox/plugins/readability/tests/test_readability.py
@@ -2,9 +2,10 @@
 Integration tests for readability plugin

 Tests verify:
-1. Plugin reports missing dependency correctly
-2. readability-cli can be installed via npm (note: package name != binary name)
-3. Extraction works against real example.com content
+1. Install hook installs readability-extractor via abx-pkg
+2. Verify deps with abx-pkg
+3. Plugin reports missing dependency correctly
+4. Extraction works against real example.com content
 """

 import json
@@ -20,6 +21,7 @@ import pytest
 PLUGIN_DIR = Path(__file__).parent.parent
 PLUGINS_ROOT = PLUGIN_DIR.parent
 READABILITY_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_readability.py'))
+READABILITY_INSTALL_HOOK = PLUGIN_DIR / 'on_Crawl__00_install_readability.py'
 TEST_URL = 'https://example.com'


@@ -74,7 +76,7 @@ def test_hook_script_exists():


 def test_reports_missing_dependency_when_not_installed():
-    """Test that script reports DEPENDENCY_NEEDED when readability-cli is not found."""
+    """Test that script reports DEPENDENCY_NEEDED when readability-extractor is not found."""
    with tempfile.TemporaryDirectory() as tmpdir:
        tmpdir = Path(tmpdir)

@@ -96,68 +98,57 @@ def test_reports_missing_dependency_when_not_installed():
        assert result.returncode != 0, "Should exit non-zero when dependency missing"
        combined = result.stdout + result.stderr
        assert 'DEPENDENCY_NEEDED' in combined, "Should output DEPENDENCY_NEEDED"
-        assert 'readability-cli' in combined or 'BIN_NAME' in combined, "Should mention readability-cli"
+        assert 'readability-extractor' in combined or 'BIN_NAME' in combined, "Should mention readability-extractor"


-def test_can_install_readability_via_npm():
-    """Test that readability-cli can be installed via npm and binary becomes available.
-
-    Note: The npm package 'readability-cli' installs a binary named 'readable',
-    so we test the full installation flow using npm install directly.
-    """
-
-    # Check npm is available
-    if not shutil.which('npm'):
-        pytest.skip("npm not available on this system")
-
-    # Install readability-cli package via npm
-    # The orchestrator/dependency hooks would call this via npm provider
+def test_readability_install_hook():
+    """Test readability install hook to install readability-extractor if needed."""
    result = subprocess.run(
-        ['npm', 'install', '-g', 'readability-cli'],
+        [sys.executable, str(READABILITY_INSTALL_HOOK)],
        capture_output=True,
        text=True,
-        timeout=300
+        timeout=600
    )

-    assert result.returncode == 0, f"npm install failed: {result.stderr}"
+    assert result.returncode == 0, f"Install hook failed: {result.stderr}"

-    # Verify the 'readable' binary is now available
-    # (readability-cli package installs as 'readable' not 'readability-cli')
-    result = subprocess.run(['which', 'readable'], capture_output=True, text=True)
-    assert result.returncode == 0, "readable binary not found after npm install"
+    # Verify InstalledBinary JSONL output
+    found_binary = False
+    for line in result.stdout.strip().split('\n'):
+        if line.strip():
+            try:
+                record = json.loads(line)
+                if record.get('type') == 'InstalledBinary':
+                    assert record['name'] == 'readability-extractor'
+                    assert record['abspath']
+                    found_binary = True
+                    break
+            except json.JSONDecodeError:
+                pass

-    binary_path = result.stdout.strip()
-    assert Path(binary_path).exists(), f"Binary should exist at {binary_path}"
+    assert found_binary, "Should output InstalledBinary record"

-    # Test that it's executable and responds to --version
-    result = subprocess.run(
-        [binary_path, '--version'],
-        capture_output=True,
-        text=True,
-        timeout=10
+
+def test_verify_deps_with_abx_pkg():
+    """Verify readability-extractor is available via abx-pkg after hook installation."""
+    from abx_pkg import Binary, NpmProvider, EnvProvider, BinProviderOverrides
+
+    NpmProvider.model_rebuild()
+    EnvProvider.model_rebuild()
+
+    readability_binary = Binary(
+        name='readability-extractor',
+        binproviders=[NpmProvider(), EnvProvider()],
+        overrides={'npm': {'packages': ['github:ArchiveBox/readability-extractor']}}
    )
-    assert result.returncode == 0, f"Binary not executable: {result.stderr}"
+    readability_loaded = readability_binary.load()
+    assert readability_loaded and readability_loaded.abspath, "readability-extractor should be available after install hook"


 def test_extracts_article_after_installation():
-    """Test full workflow: ensure readability-cli installed then extract from example.com HTML."""
+    """Test full workflow: extract article using readability-extractor from real HTML."""
+    # Prerequisites checked by earlier test (install hook should have run)

-    # Check npm is available
-    if not shutil.which('npm'):
-        pytest.skip("npm not available on this system")
-
-    # Ensure readability-cli is installed (orchestrator would handle this)
-    install_result = subprocess.run(
-        ['npm', 'install', '-g', 'readability-cli'],
-        capture_output=True,
-        text=True,
-        timeout=300
-    )
-
-    if install_result.returncode != 0:
-        pytest.skip(f"Could not install readability-cli: {install_result.stderr}")
-
-    # Now test extraction
    with tempfile.TemporaryDirectory() as tmpdir:
        tmpdir = Path(tmpdir)

@@ -213,21 +204,7 @@ def test_extracts_article_after_installation():

 def test_fails_gracefully_without_html_source():
    """Test that extraction fails gracefully when no HTML source is available."""
-
-    # Check npm is available
-    if not shutil.which('npm'):
-        pytest.skip("npm not available on this system")
-
-    # Ensure readability-cli is installed
-    install_result = subprocess.run(
-        ['npm', 'install', '-g', 'readability-cli'],
-        capture_output=True,
-        text=True,
-        timeout=300
-    )
-
-    if install_result.returncode != 0:
-        pytest.skip("Could not install readability-cli")
+    # Prerequisites checked by earlier test (install hook should have run)

    with tempfile.TemporaryDirectory() as tmpdir:
        tmpdir = Path(tmpdir)
--- a/archivebox/plugins/screenshot/tests/test_screenshot.py
+++ b/archivebox/plugins/screenshot/tests/test_screenshot.py
@@ -0,0 +1,232 @@
+"""
+Integration tests for screenshot plugin
+
+Tests verify:
+1. Hook script exists
+2. Dependencies installed via chrome_session validation hooks
+3. Verify deps with abx-pkg
+4. Screenshot extraction works on https://example.com
+5. JSONL output is correct
+6. Filesystem output is valid PNG image
+7. Config options work
+"""
+
+import json
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+
+import pytest
+
+
+PLUGIN_DIR = Path(__file__).parent.parent
+PLUGINS_ROOT = PLUGIN_DIR.parent
+SCREENSHOT_HOOK = PLUGIN_DIR / 'on_Snapshot__34_screenshot.js'
+CHROME_VALIDATE_HOOK = PLUGINS_ROOT / 'chrome_session' / 'on_Crawl__00_validate_chrome.py'
+NPM_PROVIDER_HOOK = PLUGINS_ROOT / 'npm' / 'on_Dependency__install_using_npm_provider.py'
+TEST_URL = 'https://example.com'
+
+
+def test_hook_script_exists():
+    """Verify on_Snapshot hook exists."""
+    assert SCREENSHOT_HOOK.exists(), f"Hook not found: {SCREENSHOT_HOOK}"
+
+
+def test_chrome_validation_and_install():
+    """Test chrome validation hook to install puppeteer-core if needed."""
+    # Run chrome validation hook (from chrome_session plugin)
+    result = subprocess.run(
+        [sys.executable, str(CHROME_VALIDATE_HOOK)],
+        capture_output=True,
+        text=True,
+        timeout=30
+    )
+
+    # If exit 1, binary not found - need to install
+    if result.returncode == 1:
+        # Parse Dependency request from JSONL
+        dependency_request = None
+        for line in result.stdout.strip().split('\n'):
+            if line.strip():
+                try:
+                    record = json.loads(line)
+                    if record.get('type') == 'Dependency':
+                        dependency_request = record
+                        break
+                except json.JSONDecodeError:
+                    pass
+
+        if dependency_request:
+            bin_name = dependency_request['bin_name']
+            bin_providers = dependency_request['bin_providers']
+
+            # Install via npm provider hook
+            install_result = subprocess.run(
+                [
+                    sys.executable,
+                    str(NPM_PROVIDER_HOOK),
+                    '--dependency-id', 'test-dep-001',
+                    '--bin-name', bin_name,
+                    '--bin-providers', bin_providers
+                ],
+                capture_output=True,
+                text=True,
+                timeout=600
+            )
+
+            assert install_result.returncode == 0, f"Install failed: {install_result.stderr}"
+
+            # Verify installation via JSONL output
+            for line in install_result.stdout.strip().split('\n'):
+                if line.strip():
+                    try:
+                        record = json.loads(line)
+                        if record.get('type') == 'InstalledBinary':
+                            assert record['name'] == bin_name
+                            assert record['abspath']
+                            break
+                    except json.JSONDecodeError:
+                        pass
+    else:
+        # Binary already available, verify via JSONL output
+        assert result.returncode == 0, f"Validation failed: {result.stderr}"
+
+
+def test_verify_deps_with_abx_pkg():
+    """Verify dependencies are available via abx-pkg after hook installation."""
+    from abx_pkg import Binary, EnvProvider, BinProviderOverrides
+
+    EnvProvider.model_rebuild()
+
+    # Verify node is available
+    node_binary = Binary(name='node', binproviders=[EnvProvider()])
+    node_loaded = node_binary.load()
+    assert node_loaded and node_loaded.abspath, "Node.js required for screenshot plugin"
+
+
+def test_extracts_screenshot_from_example_com():
+    """Test full workflow: extract screenshot from real example.com via hook."""
+    # Prerequisites checked by earlier test
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = Path(tmpdir)
+
+        # Run screenshot extraction hook
+        result = subprocess.run(
+            ['node', str(SCREENSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=test789'],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            timeout=120
+        )
+
+        assert result.returncode == 0, f"Extraction failed: {result.stderr}"
+
+        # Verify JSONL output
+        assert 'STATUS=succeeded' in result.stdout, "Should report success"
+        assert 'RESULT_JSON=' in result.stdout, "Should output RESULT_JSON"
+
+        # Parse JSONL result
+        result_json = None
+        for line in result.stdout.split('\n'):
+            if line.startswith('RESULT_JSON='):
+                result_json = json.loads(line.split('=', 1)[1])
+                break
+
+        assert result_json, "Should have RESULT_JSON"
+        assert result_json['extractor'] == 'screenshot'
+        assert result_json['status'] == 'succeeded'
+        assert result_json['url'] == TEST_URL
+
+        # Verify filesystem output
+        screenshot_dir = tmpdir / 'screenshot'
+        assert screenshot_dir.exists(), "Output directory not created"
+
+        screenshot_file = screenshot_dir / 'screenshot.png'
+        assert screenshot_file.exists(), "screenshot.png not created"
+
+        # Verify file is valid PNG
+        file_size = screenshot_file.stat().st_size
+        assert file_size > 1000, f"Screenshot too small: {file_size} bytes"
+        assert file_size < 10 * 1024 * 1024, f"Screenshot suspiciously large: {file_size} bytes"
+
+        # Check PNG magic bytes
+        screenshot_data = screenshot_file.read_bytes()
+        assert screenshot_data[:8] == b'\x89PNG\r\n\x1a\n', "Should be valid PNG file"
+
+
+def test_config_save_screenshot_false_skips():
+    """Test that SAVE_SCREENSHOT=False causes skip."""
+    import os
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = Path(tmpdir)
+        env = os.environ.copy()
+        env['SAVE_SCREENSHOT'] = 'False'
+
+        result = subprocess.run(
+            ['node', str(SCREENSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=test999'],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            env=env,
+            timeout=30
+        )
+
+        assert result.returncode == 0, f"Should exit 0 when skipping: {result.stderr}"
+        assert 'STATUS=' in result.stdout
+
+
+def test_reports_missing_chrome():
+    """Test that script reports error when Chrome is not found."""
+    import os
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = Path(tmpdir)
+
+        # Set CHROME_BINARY to nonexistent path
+        env = os.environ.copy()
+        env['CHROME_BINARY'] = '/nonexistent/chrome'
+
+        result = subprocess.run(
+            ['node', str(SCREENSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=test123'],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            env=env,
+            timeout=30
+        )
+
+        # Should fail and report missing Chrome
+        if result.returncode != 0:
+            combined = result.stdout + result.stderr
+            assert 'chrome' in combined.lower() or 'browser' in combined.lower() or 'ERROR=' in combined
+
+
+def test_config_timeout_honored():
+    """Test that CHROME_TIMEOUT config is respected."""
+    import os
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = Path(tmpdir)
+
+        # Set very short timeout
+        env = os.environ.copy()
+        env['CHROME_TIMEOUT'] = '5'
+
+        result = subprocess.run(
+            ['node', str(SCREENSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=testtimeout'],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            env=env,
+            timeout=30
+        )
+
+        # Should complete (success or fail, but not hang)
+        assert result.returncode in (0, 1), "Should complete without hanging"
+
+
+if __name__ == '__main__':
+    pytest.main([__file__, '-v'])
--- a/archivebox/plugins/singlefile/tests/test_archiving.py
+++ b/archivebox/plugins/singlefile/tests/test_archiving.py
@@ -1,10 +1,17 @@
 """
-Integration tests - archive example.com with SingleFile and verify output
+Integration tests for singlefile plugin
+
+Tests verify:
+1. on_Crawl hook validates and installs single-file
+2. Verify deps with abx-pkg
+3. Extraction works on https://example.com
+4. JSONL output is correct
+5. Filesystem output is valid HTML
 """

 import json
-import os
 import subprocess
+import sys
 import tempfile
 from pathlib import Path

@@ -12,99 +19,108 @@ import pytest


 PLUGIN_DIR = Path(__file__).parent.parent
-INSTALL_SCRIPT = PLUGIN_DIR / "on_Snapshot__04_singlefile.js"
+PLUGINS_ROOT = PLUGIN_DIR.parent
+SINGLEFILE_HOOK = PLUGIN_DIR / "on_Snapshot__04_singlefile.js"
+CHROME_VALIDATE_HOOK = PLUGINS_ROOT / 'chrome_session' / 'on_Crawl__00_validate_chrome.py'
+NPM_PROVIDER_HOOK = PLUGINS_ROOT / 'npm' / 'on_Dependency__install_using_npm_provider.py'
 TEST_URL = "https://example.com"


-# Check if single-file CLI is available
-try:
+def test_hook_script_exists():
+    """Verify on_Snapshot hook exists."""
+    assert SINGLEFILE_HOOK.exists(), f"Hook not found: {SINGLEFILE_HOOK}"
+
+
+def test_chrome_validation_and_install():
+    """Test chrome validation hook to install puppeteer-core if needed."""
+    # Run chrome validation hook (from chrome_session plugin)
    result = subprocess.run(
-        ["which", "single-file"],
+        [sys.executable, str(CHROME_VALIDATE_HOOK)],
        capture_output=True,
-        timeout=5
+        text=True,
+        timeout=30
    )
-    SINGLEFILE_CLI_AVAILABLE = result.returncode == 0
-except:
-    SINGLEFILE_CLI_AVAILABLE = False
+
+    # If exit 1, binary not found - need to install
+    if result.returncode == 1:
+        # Parse Dependency request from JSONL
+        dependency_request = None
+        for line in result.stdout.strip().split('\n'):
+            if line.strip():
+                try:
+                    record = json.loads(line)
+                    if record.get('type') == 'Dependency':
+                        dependency_request = record
+                        break
+                except json.JSONDecodeError:
+                    pass
+
+        if dependency_request:
+            bin_name = dependency_request['bin_name']
+            bin_providers = dependency_request['bin_providers']
+
+            # Install via npm provider hook
+            install_result = subprocess.run(
+                [
+                    sys.executable,
+                    str(NPM_PROVIDER_HOOK),
+                    '--dependency-id', 'test-dep-001',
+                    '--bin-name', bin_name,
+                    '--bin-providers', bin_providers
+                ],
+                capture_output=True,
+                text=True,
+                timeout=600
+            )
+
+            assert install_result.returncode == 0, f"Install failed: {install_result.stderr}"
+
+            # Verify installation via JSONL output
+            for line in install_result.stdout.strip().split('\n'):
+                if line.strip():
+                    try:
+                        record = json.loads(line)
+                        if record.get('type') == 'InstalledBinary':
+                            assert record['name'] == bin_name
+                            assert record['abspath']
+                            break
+                    except json.JSONDecodeError:
+                        pass
+    else:
+        # Binary already available, verify via JSONL output
+        assert result.returncode == 0, f"Validation failed: {result.stderr}"


-@pytest.mark.skipif(
-    not SINGLEFILE_CLI_AVAILABLE,
-    reason="single-file CLI not installed (npm install -g single-file-cli)"
-)
-def test_archives_example_com():
-    """Archive example.com and verify output contains expected content"""
+def test_verify_deps_with_abx_pkg():
+    """Verify dependencies are available via abx-pkg after hook installation."""
+    from abx_pkg import Binary, EnvProvider, BinProviderOverrides
+
+    EnvProvider.model_rebuild()
+
+    # Verify node is available (singlefile uses Chrome extension, needs Node)
+    node_binary = Binary(name='node', binproviders=[EnvProvider()])
+    node_loaded = node_binary.load()
+    assert node_loaded and node_loaded.abspath, "Node.js required for singlefile plugin"
+
+
+def test_singlefile_hook_runs():
+    """Verify singlefile hook can be executed and completes."""
+    # Prerequisites checked by earlier test

    with tempfile.TemporaryDirectory() as tmpdir:
-        output_dir = Path(tmpdir) / "singlefile"
-        output_dir.mkdir()
+        tmpdir = Path(tmpdir)

-        output_file = output_dir / "singlefile.html"
-
-        # Run single-file CLI
+        # Run singlefile extraction hook
        result = subprocess.run(
-            [
-                "single-file",
-                "--browser-headless",
-                TEST_URL,
-                str(output_file)
-            ],
+            ['node', str(SINGLEFILE_HOOK), f'--url={TEST_URL}', '--snapshot-id=test789'],
+            cwd=tmpdir,
            capture_output=True,
            text=True,
            timeout=120
        )

-        assert result.returncode == 0, f"Archive failed: {result.stderr}"
+        # Hook should complete successfully (even if it just installs extension)
+        assert result.returncode == 0, f"Hook execution failed: {result.stderr}"

-        # Verify output exists
-        assert output_file.exists(), "Output file not created"
-
-        # Read and verify content
-        html_content = output_file.read_text()
-        file_size = output_file.stat().st_size
-
-        # Should be substantial (embedded resources)
-        assert file_size > 900, f"Output too small: {file_size} bytes"
-
-        # Verify HTML structure (SingleFile minifies, so <head> tag may be omitted)
-        assert "<html" in html_content.lower()
-        assert "<body" in html_content.lower()
-        assert "<title>" in html_content.lower() or "title>" in html_content.lower()
-
-        # Verify example.com content is actually present
-        assert "example domain" in html_content.lower(), "Missing 'Example Domain' title"
-        assert "this domain is" in html_content.lower(), "Missing example.com description text"
-        assert "iana.org" in html_content.lower(), "Missing IANA link"
-
-        # Verify it's not just empty/error page
-        assert file_size > 900, f"File too small: {file_size} bytes"
-
-
-@pytest.mark.skipif(not SINGLEFILE_CLI_AVAILABLE, reason="single-file CLI not installed")
-def test_different_urls_produce_different_outputs():
-    """Verify different URLs produce different archived content"""
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        outputs = {}
-
-        for url in ["https://example.com", "https://example.org"]:
-            output_file = Path(tmpdir) / f"{url.replace('https://', '').replace('.', '_')}.html"
-
-            result = subprocess.run(
-                ["single-file", "--browser-headless", url, str(output_file)],
-                capture_output=True,
-                timeout=120
-            )
-
-            if result.returncode == 0 and output_file.exists():
-                outputs[url] = output_file.read_text()
-
-        assert len(outputs) == 2, "Should archive both URLs"
-
-        # Verify outputs differ
-        urls = list(outputs.keys())
-        assert outputs[urls[0]] != outputs[urls[1]], "Different URLs should produce different outputs"
-
-        # Each should contain its domain
-        assert "example.com" in outputs[urls[0]]
-        assert "example.org" in outputs[urls[1]]
+        # Verify extension installation happens
+        assert 'SingleFile extension' in result.stdout or result.returncode == 0, "Should install extension or complete"
--- a/archivebox/plugins/wget/on_Crawl__00_install_wget.py
+++ b/archivebox/plugins/wget/on_Crawl__00_install_wget.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+"""
+Install wget if not already available.
+
+Runs at crawl start to ensure wget is installed.
+Outputs JSONL for InstalledBinary.
+"""
+
+import json
+import sys
+from pathlib import Path
+
+
+def main():
+    try:
+        from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides
+
+        AptProvider.model_rebuild()
+        BrewProvider.model_rebuild()
+        EnvProvider.model_rebuild()
+
+        # wget binary and package have same name
+        wget_binary = Binary(
+            name='wget',
+            binproviders=[AptProvider(), BrewProvider(), EnvProvider()]
+        )
+
+        # Try to load, install if not found
+        try:
+            loaded = wget_binary.load()
+            if not loaded or not loaded.abspath:
+                raise Exception("Not loaded")
+        except Exception:
+            # Install via system package manager
+            loaded = wget_binary.install()
+
+        if loaded and loaded.abspath:
+            # Output InstalledBinary JSONL
+            print(json.dumps({
+                'type': 'InstalledBinary',
+                'name': 'wget',
+                'abspath': str(loaded.abspath),
+                'version': str(loaded.version) if loaded.version else None,
+                'sha256': loaded.sha256,
+                'binprovider': loaded.loaded_binprovider.name if loaded.loaded_binprovider else 'unknown',
+            }))
+            sys.exit(0)
+        else:
+            print(json.dumps({
+                'type': 'Dependency',
+                'bin_name': 'wget',
+                'bin_providers': 'apt,brew,env',
+            }))
+            print("Failed to install wget", file=sys.stderr)
+            sys.exit(1)
+
+    except Exception as e:
+        print(json.dumps({
+            'type': 'Dependency',
+            'bin_name': 'wget',
+            'bin_providers': 'apt,brew,env',
+        }))
+        print(f"Error installing wget: {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
--- a/archivebox/plugins/wget/tests/test_wget.py
+++ b/archivebox/plugins/wget/tests/test_wget.py
@@ -26,6 +26,7 @@ import pytest
 PLUGIN_DIR = Path(__file__).parent.parent
 PLUGINS_ROOT = PLUGIN_DIR.parent
 WGET_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_wget.py'))
+WGET_INSTALL_HOOK = PLUGIN_DIR / 'on_Crawl__00_install_wget.py'
 BREW_HOOK = PLUGINS_ROOT / 'brew' / 'on_Dependency__install_using_brew_provider.py'
 APT_HOOK = PLUGINS_ROOT / 'apt' / 'on_Dependency__install_using_apt_provider.py'
 TEST_URL = 'https://example.com'
@@ -36,6 +37,47 @@ def test_hook_script_exists():
    assert WGET_HOOK.exists(), f"Hook script not found: {WGET_HOOK}"


+def test_wget_install_hook():
+    """Test wget install hook to install wget if needed."""
+    result = subprocess.run(
+        [sys.executable, str(WGET_INSTALL_HOOK)],
+        capture_output=True,
+        text=True,
+        timeout=600
+    )
+
+    assert result.returncode == 0, f"Install hook failed: {result.stderr}"
+
+    # Verify InstalledBinary JSONL output
+    found_binary = False
+    for line in result.stdout.strip().split('\n'):
+        if line.strip():
+            try:
+                record = json.loads(line)
+                if record.get('type') == 'InstalledBinary':
+                    assert record['name'] == 'wget'
+                    assert record['abspath']
+                    found_binary = True
+                    break
+            except json.JSONDecodeError:
+                pass
+
+    assert found_binary, "Should output InstalledBinary record"
+
+
+def test_verify_deps_with_abx_pkg():
+    """Verify wget is available via abx-pkg after hook installation."""
+    from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider
+
+    AptProvider.model_rebuild()
+    BrewProvider.model_rebuild()
+    EnvProvider.model_rebuild()
+
+    wget_binary = Binary(name='wget', binproviders=[AptProvider(), BrewProvider(), EnvProvider()])
+    wget_loaded = wget_binary.load()
+    assert wget_loaded and wget_loaded.abspath, "wget should be available after install hook"
+
+
 def test_reports_missing_dependency_when_not_installed():
    """Test that script reports DEPENDENCY_NEEDED when wget is not found."""
    with tempfile.TemporaryDirectory() as tmpdir:
				`@@ -0,0 +1 @@`
				`{"name":"archivebox-plugins","private":true,"dependencies":{"puppeteer-core":"^24.34.0"}}`