Consolidate Chrome test helpers across all plugin tests (#1738)

# Summary  # Related issues  # Changes these areas - [ ] Bugfixes - [ ] Feature behavior - [ ] Command line interface - [ ] Configuration options - [ ] Internal architecture - [ ] Snapshot data layout on disk
2026-01-03 01:15:57 +10:00 · 2025-12-31 01:25:39 -08:00
parent 29eb6280d3 1cfb77a355
commit 8dab2966cc
15 changed files with 978 additions and 701 deletions
--- a/archivebox/plugins/chrome/chrome_utils.js
+++ b/archivebox/plugins/chrome/chrome_utils.js
@@ -1333,6 +1333,83 @@ function getExtensionsDir() {
        path.join(dataDir, 'personas', persona, 'chrome_extensions');
 }

+/**
+ * Get machine type string for platform-specific paths.
+ * Matches Python's archivebox.config.paths.get_machine_type()
+ *
+ * @returns {string} - Machine type (e.g., 'x86_64-linux', 'arm64-darwin')
+ */
+function getMachineType() {
+    if (process.env.MACHINE_TYPE) {
+        return process.env.MACHINE_TYPE;
+    }
+
+    let machine = process.arch;
+    const system = process.platform;
+
+    // Normalize machine type to match Python's convention
+    if (machine === 'arm64' || machine === 'aarch64') {
+        machine = 'arm64';
+    } else if (machine === 'x64' || machine === 'x86_64' || machine === 'amd64') {
+        machine = 'x86_64';
+    } else if (machine === 'ia32' || machine === 'x86') {
+        machine = 'x86';
+    }
+
+    return `${machine}-${system}`;
+}
+
+/**
+ * Get LIB_DIR path for platform-specific binaries.
+ * Returns DATA_DIR/lib/MACHINE_TYPE/
+ *
+ * @returns {string} - Absolute path to lib directory
+ */
+function getLibDir() {
+    if (process.env.LIB_DIR) {
+        return process.env.LIB_DIR;
+    }
+    const dataDir = getEnv('DATA_DIR', './data');
+    const machineType = getMachineType();
+    return path.join(dataDir, 'lib', machineType);
+}
+
+/**
+ * Get NODE_MODULES_DIR path for npm packages.
+ * Returns LIB_DIR/npm/node_modules/
+ *
+ * @returns {string} - Absolute path to node_modules directory
+ */
+function getNodeModulesDir() {
+    if (process.env.NODE_MODULES_DIR) {
+        return process.env.NODE_MODULES_DIR;
+    }
+    return path.join(getLibDir(), 'npm', 'node_modules');
+}
+
+/**
+ * Get all test environment paths as a JSON object.
+ * This is the single source of truth for path calculations - Python calls this
+ * to avoid duplicating path logic.
+ *
+ * @returns {Object} - Object with all test environment paths
+ */
+function getTestEnv() {
+    const dataDir = getEnv('DATA_DIR', './data');
+    const machineType = getMachineType();
+    const libDir = getLibDir();
+    const nodeModulesDir = getNodeModulesDir();
+
+    return {
+        DATA_DIR: dataDir,
+        MACHINE_TYPE: machineType,
+        LIB_DIR: libDir,
+        NODE_MODULES_DIR: nodeModulesDir,
+        NPM_BIN_DIR: path.join(libDir, 'npm', '.bin'),
+        CHROME_EXTENSIONS_DIR: getExtensionsDir(),
+    };
+}
+
 /**
 * Install a Chrome extension with caching support.
 *
@@ -1442,8 +1519,13 @@ module.exports = {
    getExtensionPaths,
    waitForExtensionTarget,
    getExtensionTargets,
-    // Shared extension installer utilities
+    // Shared path utilities (single source of truth for Python/JS)
+    getMachineType,
+    getLibDir,
+    getNodeModulesDir,
    getExtensionsDir,
+    getTestEnv,
+    // Shared extension installer utilities
    installExtensionWithCache,
    // Deprecated - use enableExtensions option instead
    getExtensionLaunchArgs,
@@ -1457,18 +1539,31 @@ if (require.main === module) {
        console.log('Usage: chrome_utils.js <command> [args...]');
        console.log('');
        console.log('Commands:');
-        console.log('  findChromium');
-        console.log('  installChromium');
-        console.log('  installPuppeteerCore [npm_prefix]');
-        console.log('  launchChromium [output_dir] [extension_paths_json]');
-        console.log('  killChrome <pid> [output_dir]');
-        console.log('  killZombieChrome [data_dir]');
-        console.log('  getExtensionId <path>');
-        console.log('  loadExtensionManifest <path>');
-        console.log('  getExtensionLaunchArgs <extensions_json>');
-        console.log('  loadOrInstallExtension <webstore_id> <name> [extensions_dir]');
-        console.log('  getExtensionsDir');
-        console.log('  installExtensionWithCache <webstore_id> <name>');
+        console.log('  findChromium              Find Chrome/Chromium binary');
+        console.log('  installChromium           Install Chromium via @puppeteer/browsers');
+        console.log('  installPuppeteerCore      Install puppeteer-core npm package');
+        console.log('  launchChromium            Launch Chrome with CDP debugging');
+        console.log('  killChrome <pid>          Kill Chrome process by PID');
+        console.log('  killZombieChrome          Clean up zombie Chrome processes');
+        console.log('');
+        console.log('  getMachineType            Get machine type (e.g., x86_64-linux)');
+        console.log('  getLibDir                 Get LIB_DIR path');
+        console.log('  getNodeModulesDir         Get NODE_MODULES_DIR path');
+        console.log('  getExtensionsDir          Get Chrome extensions directory');
+        console.log('  getTestEnv                Get all paths as JSON (for tests)');
+        console.log('');
+        console.log('  getExtensionId <path>     Get extension ID from unpacked path');
+        console.log('  loadExtensionManifest     Load extension manifest.json');
+        console.log('  loadOrInstallExtension    Load or install an extension');
+        console.log('  installExtensionWithCache Install extension with caching');
+        console.log('');
+        console.log('Environment variables:');
+        console.log('  DATA_DIR                  Base data directory');
+        console.log('  LIB_DIR                   Library directory (computed if not set)');
+        console.log('  MACHINE_TYPE              Machine type override');
+        console.log('  NODE_MODULES_DIR          Node modules directory');
+        console.log('  CHROME_BINARY             Chrome binary path');
+        console.log('  CHROME_EXTENSIONS_DIR     Extensions directory');
        process.exit(1);
    }

@@ -1581,11 +1676,31 @@ if (require.main === module) {
                    break;
                }

+                case 'getMachineType': {
+                    console.log(getMachineType());
+                    break;
+                }
+
+                case 'getLibDir': {
+                    console.log(getLibDir());
+                    break;
+                }
+
+                case 'getNodeModulesDir': {
+                    console.log(getNodeModulesDir());
+                    break;
+                }
+
                case 'getExtensionsDir': {
                    console.log(getExtensionsDir());
                    break;
                }

+                case 'getTestEnv': {
+                    console.log(JSON.stringify(getTestEnv(), null, 2));
+                    break;
+                }
+
                case 'installExtensionWithCache': {
                    const [webstore_id, name] = commandArgs;
                    if (!webstore_id || !name) {
--- a/archivebox/plugins/chrome/tests/chrome_test_helpers.py
+++ b/archivebox/plugins/chrome/tests/chrome_test_helpers.py
@@ -2,25 +2,69 @@
 Shared Chrome test helpers for plugin integration tests.

 This module provides common utilities for Chrome-based plugin tests, reducing
-duplication across test files. It uses the JavaScript utilities from chrome_utils.js
-where appropriate.
+duplication across test files. Functions delegate to chrome_utils.js (the single
+source of truth) with Python fallbacks.
+
+Function names match the JS equivalents in snake_case:
+    JS: getMachineType()  -> Python: get_machine_type()
+    JS: getLibDir()       -> Python: get_lib_dir()
+    JS: getNodeModulesDir() -> Python: get_node_modules_dir()
+    JS: getExtensionsDir() -> Python: get_extensions_dir()
+    JS: findChromium()    -> Python: find_chromium()
+    JS: killChrome()      -> Python: kill_chrome()
+    JS: getTestEnv()      -> Python: get_test_env()

 Usage:
+    # Path helpers (delegate to chrome_utils.js):
    from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-        get_test_env,
-        setup_chrome_session,
-        cleanup_chrome,
-        find_chromium_binary,
-        get_node_modules_dir,
+        get_test_env,           # env dict with LIB_DIR, NODE_MODULES_DIR, MACHINE_TYPE
+        get_machine_type,       # e.g., 'x86_64-linux', 'arm64-darwin'
+        get_lib_dir,            # Path to lib dir
+        get_node_modules_dir,   # Path to node_modules
+        get_extensions_dir,     # Path to chrome extensions
+        find_chromium,          # Find Chrome/Chromium binary
+        kill_chrome,            # Kill Chrome process by PID
+    )
+
+    # Test file helpers:
+    from archivebox.plugins.chrome.tests.chrome_test_helpers import (
+        get_plugin_dir,         # get_plugin_dir(__file__) -> plugin dir Path
+        get_hook_script,        # Find hook script by glob pattern
+        PLUGINS_ROOT,           # Path to plugins root
+        LIB_DIR,                # Path to lib dir (lazy-loaded)
+        NODE_MODULES_DIR,       # Path to node_modules (lazy-loaded)
+    )
+
+    # For Chrome session tests:
+    from archivebox.plugins.chrome.tests.chrome_test_helpers import (
+        setup_chrome_session,   # Full Chrome + tab setup
+        cleanup_chrome,         # Cleanup by PID
+        chrome_session,         # Context manager
+    )
+
+    # For extension tests:
+    from archivebox.plugins.chrome.tests.chrome_test_helpers import (
+        setup_test_env,         # Full dir structure + Chrome install
+        launch_chromium_session, # Launch Chrome, return CDP URL
+        kill_chromium_session,   # Cleanup Chrome
+    )
+
+    # Run hooks and parse JSONL:
+    from archivebox.plugins.chrome.tests.chrome_test_helpers import (
+        run_hook,               # Run hook, return (returncode, stdout, stderr)
+        parse_jsonl_output,     # Parse JSONL from stdout
    )
 """

+import json
 import os
+import platform
 import signal
 import subprocess
 import time
+from datetime import datetime
 from pathlib import Path
-from typing import Tuple, Optional
+from typing import Tuple, Optional, List, Dict, Any
 from contextlib import contextmanager


@@ -29,88 +73,623 @@ CHROME_PLUGIN_DIR = Path(__file__).parent.parent
 PLUGINS_ROOT = CHROME_PLUGIN_DIR.parent

 # Hook script locations
+CHROME_INSTALL_HOOK = CHROME_PLUGIN_DIR / 'on_Crawl__00_install_puppeteer_chromium.py'
 CHROME_LAUNCH_HOOK = CHROME_PLUGIN_DIR / 'on_Crawl__30_chrome_launch.bg.js'
 CHROME_TAB_HOOK = CHROME_PLUGIN_DIR / 'on_Snapshot__20_chrome_tab.bg.js'
 CHROME_NAVIGATE_HOOK = next(CHROME_PLUGIN_DIR.glob('on_Snapshot__*_chrome_navigate.*'), None)
 CHROME_UTILS = CHROME_PLUGIN_DIR / 'chrome_utils.js'


-def get_node_modules_dir() -> Path:
-    """Get NODE_MODULES_DIR for tests, checking env first.
+# =============================================================================
+# Path Helpers - delegates to chrome_utils.js with Python fallback
+# Function names match JS: getMachineType -> get_machine_type, etc.
+# =============================================================================

-    Returns the path to the node_modules directory, checking:
-    1. NODE_MODULES_DIR environment variable
-    2. Computed from LIB_DIR via ArchiveBox config
+
+def _call_chrome_utils(command: str, *args: str, env: Optional[dict] = None) -> Tuple[int, str, str]:
+    """Call chrome_utils.js CLI command (internal helper).
+
+    This is the central dispatch for calling the JS utilities from Python.
+    All path calculations and Chrome operations are centralized in chrome_utils.js
+    to ensure consistency between Python and JavaScript code.
+
+    Args:
+        command: The CLI command (e.g., 'findChromium', 'getTestEnv')
+        *args: Additional command arguments
+        env: Environment dict (default: current env)
+
+    Returns:
+        Tuple of (returncode, stdout, stderr)
    """
+    cmd = ['node', str(CHROME_UTILS), command] + list(args)
+    result = subprocess.run(
+        cmd,
+        capture_output=True,
+        text=True,
+        timeout=30,
+        env=env or os.environ.copy()
+    )
+    return result.returncode, result.stdout, result.stderr
+
+
+def get_plugin_dir(test_file: str) -> Path:
+    """Get the plugin directory from a test file path.
+
+    Usage:
+        PLUGIN_DIR = get_plugin_dir(__file__)
+
+    Args:
+        test_file: The __file__ of the test module (e.g., test_screenshot.py)
+
+    Returns:
+        Path to the plugin directory (e.g., plugins/screenshot/)
+    """
+    return Path(test_file).parent.parent
+
+
+def get_hook_script(plugin_dir: Path, pattern: str) -> Optional[Path]:
+    """Find a hook script in a plugin directory by pattern.
+
+    Usage:
+        HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_screenshot.*')
+
+    Args:
+        plugin_dir: Path to the plugin directory
+        pattern: Glob pattern to match
+
+    Returns:
+        Path to the hook script or None if not found
+    """
+    matches = list(plugin_dir.glob(pattern))
+    return matches[0] if matches else None
+
+
+def get_machine_type() -> str:
+    """Get machine type string (e.g., 'x86_64-linux', 'arm64-darwin').
+
+    Matches JS: getMachineType()
+
+    Tries chrome_utils.js first, falls back to Python computation.
+    """
+    # Try JS first (single source of truth)
+    returncode, stdout, stderr = _call_chrome_utils('getMachineType')
+    if returncode == 0 and stdout.strip():
+        return stdout.strip()
+
+    # Fallback to Python computation
+    if os.environ.get('MACHINE_TYPE'):
+        return os.environ['MACHINE_TYPE']
+
+    machine = platform.machine().lower()
+    system = platform.system().lower()
+    if machine in ('arm64', 'aarch64'):
+        machine = 'arm64'
+    elif machine in ('x86_64', 'amd64'):
+        machine = 'x86_64'
+    return f"{machine}-{system}"
+
+
+def get_lib_dir() -> Path:
+    """Get LIB_DIR path for platform-specific binaries.
+
+    Matches JS: getLibDir()
+
+    Tries chrome_utils.js first, falls back to Python computation.
+    """
+    # Try JS first
+    returncode, stdout, stderr = _call_chrome_utils('getLibDir')
+    if returncode == 0 and stdout.strip():
+        return Path(stdout.strip())
+
+    # Fallback to Python
+    if os.environ.get('LIB_DIR'):
+        return Path(os.environ['LIB_DIR'])
+    from archivebox.config.common import STORAGE_CONFIG
+    return Path(str(STORAGE_CONFIG.LIB_DIR))
+
+
+def get_node_modules_dir() -> Path:
+    """Get NODE_MODULES_DIR path for npm packages.
+
+    Matches JS: getNodeModulesDir()
+
+    Tries chrome_utils.js first, falls back to Python computation.
+    """
+    # Try JS first
+    returncode, stdout, stderr = _call_chrome_utils('getNodeModulesDir')
+    if returncode == 0 and stdout.strip():
+        return Path(stdout.strip())
+
+    # Fallback to Python
    if os.environ.get('NODE_MODULES_DIR'):
        return Path(os.environ['NODE_MODULES_DIR'])
-    # Otherwise compute from LIB_DIR
-    from archivebox.config.common import STORAGE_CONFIG
-    lib_dir = Path(os.environ.get('LIB_DIR') or str(STORAGE_CONFIG.LIB_DIR))
+    lib_dir = get_lib_dir()
    return lib_dir / 'npm' / 'node_modules'


-def get_test_env() -> dict:
-    """Get environment dict with NODE_MODULES_DIR set correctly for tests.
+def get_extensions_dir() -> str:
+    """Get the Chrome extensions directory path.

-    Returns a copy of os.environ with NODE_MODULES_DIR added/updated.
-    Use this for all subprocess calls in plugin tests.
+    Matches JS: getExtensionsDir()
+
+    Tries chrome_utils.js first, falls back to Python computation.
    """
-    env = os.environ.copy()
-    env['NODE_MODULES_DIR'] = str(get_node_modules_dir())
-    return env
+    returncode, stdout, stderr = _call_chrome_utils('getExtensionsDir')
+    if returncode == 0 and stdout.strip():
+        return stdout.strip()
+
+    # Fallback to default computation if JS call fails
+    data_dir = os.environ.get('DATA_DIR', './data')
+    persona = os.environ.get('ACTIVE_PERSONA', 'Default')
+    return str(Path(data_dir) / 'personas' / persona / 'chrome_extensions')


-def find_chromium_binary(data_dir: Optional[str] = None) -> Optional[str]:
-    """Find the Chromium binary using chrome_utils.js findChromium().
+def find_chromium(data_dir: Optional[str] = None) -> Optional[str]:
+    """Find the Chromium binary path.

-    This uses the centralized findChromium() function which checks:
+    Matches JS: findChromium()
+
+    Uses chrome_utils.js which checks:
    - CHROME_BINARY env var
    - @puppeteer/browsers install locations
    - System Chromium locations
    - Falls back to Chrome (with warning)

    Args:
-        data_dir: Directory where chromium was installed (contains chromium/ subdir)
+        data_dir: Optional DATA_DIR override

    Returns:
        Path to Chromium binary or None if not found
    """
-    search_dir = data_dir or os.environ.get('DATA_DIR', '.')
-    result = subprocess.run(
-        ['node', str(CHROME_UTILS), 'findChromium', str(search_dir)],
-        capture_output=True,
-        text=True,
-        timeout=10
-    )
-    if result.returncode == 0 and result.stdout.strip():
-        return result.stdout.strip()
+    env = os.environ.copy()
+    if data_dir:
+        env['DATA_DIR'] = str(data_dir)
+    returncode, stdout, stderr = _call_chrome_utils('findChromium', env=env)
+    if returncode == 0 and stdout.strip():
+        return stdout.strip()
    return None


-def get_extensions_dir() -> str:
-    """Get the Chrome extensions directory using chrome_utils.js getExtensionsDir().
+def kill_chrome(pid: int, output_dir: Optional[str] = None) -> bool:
+    """Kill a Chrome process by PID.

-    This uses the centralized path calculation from chrome_utils.js which checks:
-    - CHROME_EXTENSIONS_DIR env var
-    - DATA_DIR/personas/ACTIVE_PERSONA/chrome_extensions
+    Matches JS: killChrome()
+
+    Uses chrome_utils.js which handles:
+    - SIGTERM then SIGKILL
+    - Process group killing
+    - Zombie process cleanup
+
+    Args:
+        pid: Process ID to kill
+        output_dir: Optional chrome output directory for PID file cleanup

    Returns:
-        Path to extensions directory
+        True if the kill command succeeded
    """
+    args = [str(pid)]
+    if output_dir:
+        args.append(str(output_dir))
+    returncode, stdout, stderr = _call_chrome_utils('killChrome', *args)
+    return returncode == 0
+
+
+def get_test_env() -> dict:
+    """Get environment dict with all paths set correctly for tests.
+
+    Matches JS: getTestEnv()
+
+    Tries chrome_utils.js first for path values, builds env dict.
+    Use this for all subprocess calls in plugin tests.
+    """
+    env = os.environ.copy()
+
+    # Try to get all paths from JS (single source of truth)
+    returncode, stdout, stderr = _call_chrome_utils('getTestEnv')
+    if returncode == 0 and stdout.strip():
+        try:
+            js_env = json.loads(stdout)
+            env.update(js_env)
+            return env
+        except json.JSONDecodeError:
+            pass
+
+    # Fallback to Python computation
+    lib_dir = get_lib_dir()
+    env['LIB_DIR'] = str(lib_dir)
+    env['NODE_MODULES_DIR'] = str(get_node_modules_dir())
+    env['MACHINE_TYPE'] = get_machine_type()
+    return env
+
+
+# Backward compatibility aliases (deprecated, use new names)
+find_chromium_binary = find_chromium
+kill_chrome_via_js = kill_chrome
+get_machine_type_from_js = get_machine_type
+get_test_env_from_js = get_test_env
+
+
+# =============================================================================
+# Module-level constants (lazy-loaded on first access)
+# Import these directly: from chrome_test_helpers import LIB_DIR, NODE_MODULES_DIR
+# =============================================================================
+
+# These are computed once when first accessed
+_LIB_DIR: Optional[Path] = None
+_NODE_MODULES_DIR: Optional[Path] = None
+
+
+def _get_lib_dir_cached() -> Path:
+    global _LIB_DIR
+    if _LIB_DIR is None:
+        _LIB_DIR = get_lib_dir()
+    return _LIB_DIR
+
+
+def _get_node_modules_dir_cached() -> Path:
+    global _NODE_MODULES_DIR
+    if _NODE_MODULES_DIR is None:
+        _NODE_MODULES_DIR = get_node_modules_dir()
+    return _NODE_MODULES_DIR
+
+
+# Module-level constants that can be imported directly
+# Usage: from chrome_test_helpers import LIB_DIR, NODE_MODULES_DIR
+class _LazyPath:
+    """Lazy path that computes value on first access."""
+    def __init__(self, getter):
+        self._getter = getter
+        self._value = None
+
+    def __fspath__(self):
+        if self._value is None:
+            self._value = self._getter()
+        return str(self._value)
+
+    def __truediv__(self, other):
+        if self._value is None:
+            self._value = self._getter()
+        return self._value / other
+
+    def __str__(self):
+        return self.__fspath__()
+
+    def __repr__(self):
+        return f"<LazyPath: {self.__fspath__()}>"
+
+
+LIB_DIR = _LazyPath(_get_lib_dir_cached)
+NODE_MODULES_DIR = _LazyPath(_get_node_modules_dir_cached)
+
+
+# =============================================================================
+# Hook Execution Helpers
+# =============================================================================
+
+
+def run_hook(
+    hook_script: Path,
+    url: str,
+    snapshot_id: str,
+    cwd: Optional[Path] = None,
+    env: Optional[dict] = None,
+    timeout: int = 60,
+    extra_args: Optional[List[str]] = None,
+) -> Tuple[int, str, str]:
+    """Run a hook script and return (returncode, stdout, stderr).
+
+    Usage:
+        returncode, stdout, stderr = run_hook(
+            HOOK_SCRIPT, 'https://example.com', 'test-snap-123',
+            cwd=tmpdir, env=get_test_env()
+        )
+
+    Args:
+        hook_script: Path to the hook script
+        url: URL to process
+        snapshot_id: Snapshot ID
+        cwd: Working directory (default: current dir)
+        env: Environment dict (default: get_test_env())
+        timeout: Timeout in seconds
+        extra_args: Additional arguments to pass
+
+    Returns:
+        Tuple of (returncode, stdout, stderr)
+    """
+    if env is None:
+        env = get_test_env()
+
+    # Determine interpreter based on file extension
+    if hook_script.suffix == '.py':
+        cmd = ['python', str(hook_script)]
+    elif hook_script.suffix == '.js':
+        cmd = ['node', str(hook_script)]
+    else:
+        cmd = [str(hook_script)]
+
+    cmd.extend([f'--url={url}', f'--snapshot-id={snapshot_id}'])
+    if extra_args:
+        cmd.extend(extra_args)
+
    result = subprocess.run(
-        ['node', str(CHROME_UTILS), 'getExtensionsDir'],
+        cmd,
+        cwd=str(cwd) if cwd else None,
        capture_output=True,
        text=True,
-        timeout=10,
-        env=get_test_env()
+        env=env,
+        timeout=timeout
    )
-    if result.returncode == 0 and result.stdout.strip():
-        return result.stdout.strip()
-    # Fallback to default computation if JS call fails
-    data_dir = os.environ.get('DATA_DIR', './data')
-    persona = os.environ.get('ACTIVE_PERSONA', 'Default')
-    return str(Path(data_dir) / 'personas' / persona / 'chrome_extensions')
+    return result.returncode, result.stdout, result.stderr
+
+
+def parse_jsonl_output(stdout: str, record_type: str = 'ArchiveResult') -> Optional[Dict[str, Any]]:
+    """Parse JSONL output from hook stdout and return the specified record type.
+
+    Usage:
+        result = parse_jsonl_output(stdout)
+        if result and result['status'] == 'succeeded':
+            print("Success!")
+
+    Args:
+        stdout: The stdout from a hook execution
+        record_type: The 'type' field to look for (default: 'ArchiveResult')
+
+    Returns:
+        The parsed JSON dict or None if not found
+    """
+    for line in stdout.strip().split('\n'):
+        line = line.strip()
+        if not line.startswith('{'):
+            continue
+        try:
+            record = json.loads(line)
+            if record.get('type') == record_type:
+                return record
+        except json.JSONDecodeError:
+            continue
+    return None
+
+
+def run_hook_and_parse(
+    hook_script: Path,
+    url: str,
+    snapshot_id: str,
+    cwd: Optional[Path] = None,
+    env: Optional[dict] = None,
+    timeout: int = 60,
+    extra_args: Optional[List[str]] = None,
+) -> Tuple[int, Optional[Dict[str, Any]], str]:
+    """Run a hook and parse its JSONL output.
+
+    Convenience function combining run_hook() and parse_jsonl_output().
+
+    Returns:
+        Tuple of (returncode, parsed_result_or_none, stderr)
+    """
+    returncode, stdout, stderr = run_hook(
+        hook_script, url, snapshot_id,
+        cwd=cwd, env=env, timeout=timeout, extra_args=extra_args
+    )
+    result = parse_jsonl_output(stdout)
+    return returncode, result, stderr
+
+
+# =============================================================================
+# Extension Test Helpers
+# Used by extension tests (ublock, istilldontcareaboutcookies, twocaptcha)
+# =============================================================================
+
+
+def setup_test_env(tmpdir: Path) -> dict:
+    """Set up isolated data/lib directory structure for extension tests.
+
+    Creates structure matching real ArchiveBox data dir:
+        <tmpdir>/data/
+            lib/
+                arm64-darwin/   (or x86_64-linux, etc.)
+                    npm/
+                        .bin/
+                        node_modules/
+            personas/
+                Default/
+                    chrome_extensions/
+            users/
+                testuser/
+                    crawls/
+                    snapshots/
+
+    Calls chrome install hook which handles puppeteer-core and chromium installation.
+    Returns env dict with DATA_DIR, LIB_DIR, NPM_BIN_DIR, NODE_MODULES_DIR, CHROME_BINARY, etc.
+
+    Args:
+        tmpdir: Base temporary directory for the test
+
+    Returns:
+        Environment dict with all paths set, or pytest.skip() if Chrome install fails
+    """
+    import pytest
+
+    # Determine machine type (matches archivebox.config.paths.get_machine_type())
+    machine = platform.machine().lower()
+    system = platform.system().lower()
+    if machine in ('arm64', 'aarch64'):
+        machine = 'arm64'
+    elif machine in ('x86_64', 'amd64'):
+        machine = 'x86_64'
+    machine_type = f"{machine}-{system}"
+
+    # Create proper directory structure matching real ArchiveBox layout
+    data_dir = tmpdir / 'data'
+    lib_dir = data_dir / 'lib' / machine_type
+    npm_dir = lib_dir / 'npm'
+    npm_bin_dir = npm_dir / '.bin'
+    node_modules_dir = npm_dir / 'node_modules'
+
+    # Extensions go under personas/Default/
+    chrome_extensions_dir = data_dir / 'personas' / 'Default' / 'chrome_extensions'
+
+    # User data goes under users/{username}/
+    date_str = datetime.now().strftime('%Y%m%d')
+    users_dir = data_dir / 'users' / 'testuser'
+    crawls_dir = users_dir / 'crawls' / date_str
+    snapshots_dir = users_dir / 'snapshots' / date_str
+
+    # Create all directories
+    node_modules_dir.mkdir(parents=True, exist_ok=True)
+    npm_bin_dir.mkdir(parents=True, exist_ok=True)
+    chrome_extensions_dir.mkdir(parents=True, exist_ok=True)
+    crawls_dir.mkdir(parents=True, exist_ok=True)
+    snapshots_dir.mkdir(parents=True, exist_ok=True)
+
+    # Build complete env dict
+    env = os.environ.copy()
+    env.update({
+        'DATA_DIR': str(data_dir),
+        'LIB_DIR': str(lib_dir),
+        'MACHINE_TYPE': machine_type,
+        'NPM_BIN_DIR': str(npm_bin_dir),
+        'NODE_MODULES_DIR': str(node_modules_dir),
+        'CHROME_EXTENSIONS_DIR': str(chrome_extensions_dir),
+        'CRAWLS_DIR': str(crawls_dir),
+        'SNAPSHOTS_DIR': str(snapshots_dir),
+    })
+
+    # Only set headless if not already in environment (allow override for debugging)
+    if 'CHROME_HEADLESS' not in os.environ:
+        env['CHROME_HEADLESS'] = 'true'
+
+    # Call chrome install hook (installs puppeteer-core and chromium, outputs JSONL)
+    result = subprocess.run(
+        ['python', str(CHROME_INSTALL_HOOK)],
+        capture_output=True, text=True, timeout=120, env=env
+    )
+    if result.returncode != 0:
+        pytest.skip(f"Chrome install hook failed: {result.stderr}")
+
+    # Parse JSONL output to get CHROME_BINARY
+    chrome_binary = None
+    for line in result.stdout.strip().split('\n'):
+        if not line.strip():
+            continue
+        try:
+            data = json.loads(line)
+            if data.get('type') == 'Binary' and data.get('abspath'):
+                chrome_binary = data['abspath']
+                break
+        except json.JSONDecodeError:
+            continue
+
+    if not chrome_binary or not Path(chrome_binary).exists():
+        pytest.skip(f"Chromium binary not found: {chrome_binary}")
+
+    env['CHROME_BINARY'] = chrome_binary
+    return env
+
+
+def launch_chromium_session(env: dict, chrome_dir: Path, crawl_id: str) -> Tuple[subprocess.Popen, str]:
+    """Launch Chromium and return (process, cdp_url).
+
+    This launches Chrome using the chrome launch hook and waits for the CDP URL
+    to become available. Use this for extension tests that need direct CDP access.
+
+    Args:
+        env: Environment dict (from setup_test_env)
+        chrome_dir: Directory for Chrome to write its files (cdp_url.txt, chrome.pid, etc.)
+        crawl_id: ID for the crawl
+
+    Returns:
+        Tuple of (chrome_launch_process, cdp_url)
+
+    Raises:
+        RuntimeError: If Chrome fails to launch or CDP URL not available after 20s
+    """
+    chrome_dir.mkdir(parents=True, exist_ok=True)
+
+    chrome_launch_process = subprocess.Popen(
+        ['node', str(CHROME_LAUNCH_HOOK), f'--crawl-id={crawl_id}'],
+        cwd=str(chrome_dir),
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+        env=env
+    )
+
+    # Wait for Chromium to launch and CDP URL to be available
+    cdp_url = None
+    for i in range(20):
+        if chrome_launch_process.poll() is not None:
+            stdout, stderr = chrome_launch_process.communicate()
+            raise RuntimeError(f"Chromium launch failed:\nStdout: {stdout}\nStderr: {stderr}")
+        cdp_file = chrome_dir / 'cdp_url.txt'
+        if cdp_file.exists():
+            cdp_url = cdp_file.read_text().strip()
+            break
+        time.sleep(1)
+
+    if not cdp_url:
+        chrome_launch_process.kill()
+        raise RuntimeError("Chromium CDP URL not found after 20s")
+
+    return chrome_launch_process, cdp_url
+
+
+def kill_chromium_session(chrome_launch_process: subprocess.Popen, chrome_dir: Path) -> None:
+    """Clean up Chromium process launched by launch_chromium_session.
+
+    Uses chrome_utils.js killChrome for proper process group handling.
+
+    Args:
+        chrome_launch_process: The Popen object from launch_chromium_session
+        chrome_dir: The chrome directory containing chrome.pid
+    """
+    # First try to terminate the launch process gracefully
+    try:
+        chrome_launch_process.send_signal(signal.SIGTERM)
+        chrome_launch_process.wait(timeout=5)
+    except Exception:
+        pass
+
+    # Read PID and use JS to kill with proper cleanup
+    chrome_pid_file = chrome_dir / 'chrome.pid'
+    if chrome_pid_file.exists():
+        try:
+            chrome_pid = int(chrome_pid_file.read_text().strip())
+            kill_chrome(chrome_pid, str(chrome_dir))
+        except (ValueError, FileNotFoundError):
+            pass
+
+
+@contextmanager
+def chromium_session(env: dict, chrome_dir: Path, crawl_id: str):
+    """Context manager for Chromium sessions with automatic cleanup.
+
+    Usage:
+        with chromium_session(env, chrome_dir, 'test-crawl') as (process, cdp_url):
+            # Use cdp_url to connect with puppeteer
+            pass
+        # Chromium automatically cleaned up
+
+    Args:
+        env: Environment dict (from setup_test_env)
+        chrome_dir: Directory for Chrome files
+        crawl_id: ID for the crawl
+
+    Yields:
+        Tuple of (chrome_launch_process, cdp_url)
+    """
+    chrome_launch_process = None
+    try:
+        chrome_launch_process, cdp_url = launch_chromium_session(env, chrome_dir, crawl_id)
+        yield chrome_launch_process, cdp_url
+    finally:
+        if chrome_launch_process:
+            kill_chromium_session(chrome_launch_process, chrome_dir)
+
+
+# =============================================================================
+# Tab-based Test Helpers
+# Used by tab-based tests (infiniscroll, modalcloser)
+# =============================================================================


 def setup_chrome_session(
@@ -210,25 +789,28 @@ def setup_chrome_session(
    return chrome_launch_process, chrome_pid, snapshot_chrome_dir


-def cleanup_chrome(chrome_launch_process: subprocess.Popen, chrome_pid: int) -> None:
-    """Clean up Chrome processes.
+def cleanup_chrome(chrome_launch_process: subprocess.Popen, chrome_pid: int, chrome_dir: Optional[Path] = None) -> None:
+    """Clean up Chrome processes using chrome_utils.js killChrome.

-    Sends SIGTERM to the chrome_launch_process and SIGKILL to the Chrome PID.
-    Ignores errors if processes are already dead.
+    Uses the centralized kill logic from chrome_utils.js which handles:
+    - SIGTERM then SIGKILL
+    - Process group killing
+    - Zombie process cleanup

    Args:
        chrome_launch_process: The Popen object for the chrome launch hook
        chrome_pid: The PID of the Chrome process
+        chrome_dir: Optional path to chrome output directory
    """
+    # First try to terminate the launch process gracefully
    try:
        chrome_launch_process.send_signal(signal.SIGTERM)
        chrome_launch_process.wait(timeout=5)
    except Exception:
        pass
-    try:
-        os.kill(chrome_pid, signal.SIGKILL)
-    except OSError:
-        pass
+
+    # Use JS to kill Chrome with proper process group handling
+    kill_chrome(chrome_pid, str(chrome_dir) if chrome_dir else None)


@contextmanager
--- a/archivebox/plugins/chrome/tests/test_chrome.py
+++ b/archivebox/plugins/chrome/tests/test_chrome.py
@@ -28,70 +28,25 @@ import tempfile
 import shutil
 import platform

-PLUGIN_DIR = Path(__file__).parent.parent
-CHROME_LAUNCH_HOOK = PLUGIN_DIR / 'on_Crawl__30_chrome_launch.bg.js'
-CHROME_TAB_HOOK = PLUGIN_DIR / 'on_Snapshot__20_chrome_tab.bg.js'
-CHROME_NAVIGATE_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_chrome_navigate.*'), None)
+from archivebox.plugins.chrome.tests.chrome_test_helpers import (
+    get_test_env,
+    get_lib_dir,
+    get_node_modules_dir,
+    find_chromium_binary,
+    CHROME_PLUGIN_DIR as PLUGIN_DIR,
+    CHROME_LAUNCH_HOOK,
+    CHROME_TAB_HOOK,
+    CHROME_NAVIGATE_HOOK,
+)

-# Get LIB_DIR and MACHINE_TYPE from environment or compute them
-def get_lib_dir_and_machine_type():
-    """Get or compute LIB_DIR and MACHINE_TYPE for tests."""
-    from archivebox.config.paths import get_machine_type
-    from archivebox.config.common import STORAGE_CONFIG
-
-    lib_dir = os.environ.get('LIB_DIR') or str(STORAGE_CONFIG.LIB_DIR)
-    machine_type = os.environ.get('MACHINE_TYPE') or get_machine_type()
-
-    return Path(lib_dir), machine_type
-
-# Setup NODE_MODULES_DIR to find npm packages
-LIB_DIR, MACHINE_TYPE = get_lib_dir_and_machine_type()
-# Note: LIB_DIR already includes machine_type (e.g., data/lib/arm64-darwin)
-NODE_MODULES_DIR = LIB_DIR / 'npm' / 'node_modules'
+# Get LIB_DIR and NODE_MODULES_DIR from shared helpers
+LIB_DIR = get_lib_dir()
+NODE_MODULES_DIR = get_node_modules_dir()
 NPM_PREFIX = LIB_DIR / 'npm'

 # Chromium install location (relative to DATA_DIR)
 CHROMIUM_INSTALL_DIR = Path(os.environ.get('DATA_DIR', '.')).resolve() / 'chromium'

-def get_test_env():
-    """Get environment with NODE_MODULES_DIR and CHROME_BINARY set correctly."""
-    env = os.environ.copy()
-    env['NODE_MODULES_DIR'] = str(NODE_MODULES_DIR)
-    env['LIB_DIR'] = str(LIB_DIR)
-    env['MACHINE_TYPE'] = MACHINE_TYPE
-    # Ensure CHROME_BINARY is set to Chromium
-    if 'CHROME_BINARY' not in env:
-        chromium = find_chromium_binary()
-        if chromium:
-            env['CHROME_BINARY'] = chromium
-    return env
-
-
-def find_chromium_binary(data_dir=None):
-    """Find the Chromium binary using chrome_utils.js findChromium().
-
-    This uses the centralized findChromium() function which checks:
-    - CHROME_BINARY env var
-    - @puppeteer/browsers install locations (in data_dir/chromium)
-    - System Chromium locations
-    - Falls back to Chrome (with warning)
-
-    Args:
-        data_dir: Directory where chromium was installed (contains chromium/ subdir)
-    """
-    chrome_utils = PLUGIN_DIR / 'chrome_utils.js'
-    # Use provided data_dir, or fall back to env var, or current dir
-    search_dir = data_dir or os.environ.get('DATA_DIR', '.')
-    result = subprocess.run(
-        ['node', str(chrome_utils), 'findChromium', str(search_dir)],
-        capture_output=True,
-        text=True,
-        timeout=10
-    )
-    if result.returncode == 0 and result.stdout.strip():
-        return result.stdout.strip()
-    return None
-

@pytest.fixture(scope="session", autouse=True)
 def ensure_chromium_and_puppeteer_installed():
--- a/archivebox/plugins/dom/tests/test_dom.py
+++ b/archivebox/plugins/dom/tests/test_dom.py
@@ -20,29 +20,22 @@ from pathlib import Path

 import pytest

+from archivebox.plugins.chrome.tests.chrome_test_helpers import (
+    get_test_env,
+    get_plugin_dir,
+    get_hook_script,
+    run_hook_and_parse,
+    LIB_DIR,
+    NODE_MODULES_DIR,
+    PLUGINS_ROOT,
+)

-PLUGIN_DIR = Path(__file__).parent.parent
-PLUGINS_ROOT = PLUGIN_DIR.parent
-DOM_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_dom.*'), None)
-NPM_PROVIDER_HOOK = next((PLUGINS_ROOT / 'npm').glob('on_Binary__install_using_npm_provider.py'), None)
+
+PLUGIN_DIR = get_plugin_dir(__file__)
+DOM_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_dom.*')
+NPM_PROVIDER_HOOK = get_hook_script(PLUGINS_ROOT / 'npm', 'on_Binary__install_using_npm_provider.py')
 TEST_URL = 'https://example.com'

-# Get LIB_DIR for NODE_MODULES_DIR
-def get_lib_dir():
-    """Get LIB_DIR for tests."""
-    from archivebox.config.common import STORAGE_CONFIG
-    return Path(os.environ.get('LIB_DIR') or str(STORAGE_CONFIG.LIB_DIR))
-
-LIB_DIR = get_lib_dir()
-NODE_MODULES_DIR = LIB_DIR / 'npm' / 'node_modules'
-
-def get_test_env():
-    """Get environment with NODE_MODULES_DIR set correctly."""
-    env = os.environ.copy()
-    env['NODE_MODULES_DIR'] = str(NODE_MODULES_DIR)
-    env['LIB_DIR'] = str(LIB_DIR)
-    return env
-

 def test_hook_script_exists():
    """Verify on_Snapshot hook exists."""
--- a/archivebox/plugins/favicon/tests/test_favicon.py
+++ b/archivebox/plugins/favicon/tests/test_favicon.py
@@ -2,7 +2,6 @@
 Integration tests for favicon plugin

 Tests verify:
-    pass
 1. Plugin script exists
 2. requests library is available
 3. Favicon extraction works for real example.com
@@ -21,9 +20,15 @@ from pathlib import Path

 import pytest

+from archivebox.plugins.chrome.tests.chrome_test_helpers import (
+    get_plugin_dir,
+    get_hook_script,
+    parse_jsonl_output,
+)

-PLUGIN_DIR = Path(__file__).parent.parent
-FAVICON_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_favicon.*'), None)
+
+PLUGIN_DIR = get_plugin_dir(__file__)
+FAVICON_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_favicon.*')
 TEST_URL = 'https://example.com'


--- a/archivebox/plugins/istilldontcareaboutcookies/tests/test_istilldontcareaboutcookies.py
+++ b/archivebox/plugins/istilldontcareaboutcookies/tests/test_istilldontcareaboutcookies.py
@@ -14,6 +14,14 @@ from pathlib import Path

 import pytest

+from archivebox.plugins.chrome.tests.chrome_test_helpers import (
+    setup_test_env,
+    launch_chromium_session,
+    kill_chromium_session,
+    CHROME_LAUNCH_HOOK,
+    PLUGINS_ROOT,
+)
+

 PLUGIN_DIR = Path(__file__).parent.parent
 INSTALL_SCRIPT = next(PLUGIN_DIR.glob('on_Crawl__*_install_istilldontcareaboutcookies_extension.*'), None)
@@ -124,107 +132,6 @@ def test_no_configuration_required():
        assert "API" not in (result.stdout + result.stderr) or result.returncode == 0


-PLUGINS_ROOT = PLUGIN_DIR.parent
-CHROME_INSTALL_HOOK = PLUGINS_ROOT / 'chrome' / 'on_Crawl__00_install_puppeteer_chromium.py'
-CHROME_LAUNCH_HOOK = PLUGINS_ROOT / 'chrome' / 'on_Crawl__30_chrome_launch.bg.js'
-
-
-def setup_test_env(tmpdir: Path) -> dict:
-    """Set up isolated data/lib directory structure for tests.
-
-    Creates structure matching real ArchiveBox data dir:
-        <tmpdir>/data/
-            lib/
-                arm64-darwin/   (or x86_64-linux, etc.)
-                    npm/
-                        .bin/
-                        node_modules/
-            personas/
-                Default/
-                    chrome_extensions/
-            users/
-                testuser/
-                    crawls/
-                    snapshots/
-
-    Calls chrome install hook which handles puppeteer-core and chromium installation.
-    Returns env dict with DATA_DIR, LIB_DIR, NPM_BIN_DIR, NODE_MODULES_DIR, CHROME_BINARY, etc.
-    """
-    import platform
-    from datetime import datetime
-
-    # Determine machine type (matches archivebox.config.paths.get_machine_type())
-    machine = platform.machine().lower()
-    system = platform.system().lower()
-    if machine in ('arm64', 'aarch64'):
-        machine = 'arm64'
-    elif machine in ('x86_64', 'amd64'):
-        machine = 'x86_64'
-    machine_type = f"{machine}-{system}"
-
-    # Create proper directory structure matching real ArchiveBox layout
-    data_dir = tmpdir / 'data'
-    lib_dir = data_dir / 'lib' / machine_type
-    npm_dir = lib_dir / 'npm'
-    npm_bin_dir = npm_dir / '.bin'
-    node_modules_dir = npm_dir / 'node_modules'
-
-    # Extensions go under personas/Default/
-    chrome_extensions_dir = data_dir / 'personas' / 'Default' / 'chrome_extensions'
-
-    # User data goes under users/{username}/
-    date_str = datetime.now().strftime('%Y%m%d')
-    users_dir = data_dir / 'users' / 'testuser'
-    crawls_dir = users_dir / 'crawls' / date_str
-    snapshots_dir = users_dir / 'snapshots' / date_str
-
-    # Create all directories
-    node_modules_dir.mkdir(parents=True, exist_ok=True)
-    npm_bin_dir.mkdir(parents=True, exist_ok=True)
-    chrome_extensions_dir.mkdir(parents=True, exist_ok=True)
-    crawls_dir.mkdir(parents=True, exist_ok=True)
-    snapshots_dir.mkdir(parents=True, exist_ok=True)
-
-    # Build complete env dict
-    env = os.environ.copy()
-    env.update({
-        'DATA_DIR': str(data_dir),
-        'LIB_DIR': str(lib_dir),
-        'MACHINE_TYPE': machine_type,
-        'NPM_BIN_DIR': str(npm_bin_dir),
-        'NODE_MODULES_DIR': str(node_modules_dir),
-        'CHROME_EXTENSIONS_DIR': str(chrome_extensions_dir),
-        'CRAWLS_DIR': str(crawls_dir),
-        'SNAPSHOTS_DIR': str(snapshots_dir),
-    })
-
-    # Call chrome install hook (installs puppeteer-core and chromium, outputs JSONL)
-    result = subprocess.run(
-        ['python', str(CHROME_INSTALL_HOOK)],
-        capture_output=True, text=True, timeout=120, env=env
-    )
-    if result.returncode != 0:
-        pytest.skip(f"Chrome install hook failed: {result.stderr}")
-
-    # Parse JSONL output to get CHROME_BINARY
-    chrome_binary = None
-    for line in result.stdout.strip().split('\n'):
-        if not line.strip():
-            continue
-        try:
-            data = json.loads(line)
-            if data.get('type') == 'Binary' and data.get('abspath'):
-                chrome_binary = data['abspath']
-                break
-        except json.JSONDecodeError:
-            continue
-
-    if not chrome_binary or not Path(chrome_binary).exists():
-        pytest.skip(f"Chromium binary not found: {chrome_binary}")
-
-    env['CHROME_BINARY'] = chrome_binary
-    return env
-
 TEST_URL = 'https://www.filmin.es/'


@@ -420,54 +327,6 @@ const puppeteer = require('puppeteer-core');
                    pass


-def launch_chromium_session(env: dict, chrome_dir: Path, crawl_id: str):
-    """Launch Chromium and return (process, cdp_url) or raise on failure."""
-    chrome_dir.mkdir(parents=True, exist_ok=True)
-
-    chrome_launch_process = subprocess.Popen(
-        ['node', str(CHROME_LAUNCH_HOOK), f'--crawl-id={crawl_id}'],
-        cwd=str(chrome_dir),
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-        text=True,
-        env=env
-    )
-
-    # Wait for Chromium to launch and CDP URL to be available
-    cdp_url = None
-    for i in range(20):
-        if chrome_launch_process.poll() is not None:
-            stdout, stderr = chrome_launch_process.communicate()
-            raise RuntimeError(f"Chromium launch failed:\nStdout: {stdout}\nStderr: {stderr}")
-        cdp_file = chrome_dir / 'cdp_url.txt'
-        if cdp_file.exists():
-            cdp_url = cdp_file.read_text().strip()
-            break
-        time.sleep(1)
-
-    if not cdp_url:
-        chrome_launch_process.kill()
-        raise RuntimeError("Chromium CDP URL not found after 20s")
-
-    return chrome_launch_process, cdp_url
-
-
-def kill_chromium_session(chrome_launch_process, chrome_dir: Path):
-    """Clean up Chromium process."""
-    try:
-        chrome_launch_process.send_signal(signal.SIGTERM)
-        chrome_launch_process.wait(timeout=5)
-    except:
-        pass
-    chrome_pid_file = chrome_dir / 'chrome.pid'
-    if chrome_pid_file.exists():
-        try:
-            chrome_pid = int(chrome_pid_file.read_text().strip())
-            os.kill(chrome_pid, signal.SIGKILL)
-        except (OSError, ValueError):
-            pass
-
-
 def check_cookie_consent_visibility(cdp_url: str, test_url: str, env: dict, script_dir: Path) -> dict:
    """Check if cookie consent elements are visible on a page.

--- a/archivebox/plugins/mercury/tests/test_mercury.py
+++ b/archivebox/plugins/mercury/tests/test_mercury.py
@@ -2,7 +2,6 @@
 Integration tests for mercury plugin

 Tests verify:
-    pass
 1. Hook script exists
 2. Dependencies installed via validation hooks
 3. Verify deps with abx-pkg
@@ -19,9 +18,15 @@ import tempfile
 from pathlib import Path
 import pytest

-PLUGIN_DIR = Path(__file__).parent.parent
-PLUGINS_ROOT = PLUGIN_DIR.parent
-MERCURY_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_mercury.*'), None)
+from archivebox.plugins.chrome.tests.chrome_test_helpers import (
+    get_plugin_dir,
+    get_hook_script,
+    PLUGINS_ROOT,
+)
+
+
+PLUGIN_DIR = get_plugin_dir(__file__)
+MERCURY_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_mercury.*')
 TEST_URL = 'https://example.com'

 def test_hook_script_exists():
--- a/archivebox/plugins/pdf/tests/test_pdf.py
+++ b/archivebox/plugins/pdf/tests/test_pdf.py
@@ -21,29 +21,22 @@ from pathlib import Path

 import pytest

+from archivebox.plugins.chrome.tests.chrome_test_helpers import (
+    get_test_env,
+    get_plugin_dir,
+    get_hook_script,
+    run_hook_and_parse,
+    LIB_DIR,
+    NODE_MODULES_DIR,
+    PLUGINS_ROOT,
+)

-PLUGIN_DIR = Path(__file__).parent.parent
-PLUGINS_ROOT = PLUGIN_DIR.parent
-PDF_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_pdf.*'), None)
+
+PLUGIN_DIR = get_plugin_dir(__file__)
+PDF_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_pdf.*')
 NPM_PROVIDER_HOOK = PLUGINS_ROOT / 'npm' / 'on_Binary__install_using_npm_provider.py'
 TEST_URL = 'https://example.com'

-# Get LIB_DIR for NODE_MODULES_DIR
-def get_lib_dir():
-    """Get LIB_DIR for tests."""
-    from archivebox.config.common import STORAGE_CONFIG
-    return Path(os.environ.get('LIB_DIR') or str(STORAGE_CONFIG.LIB_DIR))
-
-LIB_DIR = get_lib_dir()
-NODE_MODULES_DIR = LIB_DIR / 'npm' / 'node_modules'
-
-def get_test_env():
-    """Get environment with NODE_MODULES_DIR set correctly."""
-    env = os.environ.copy()
-    env['NODE_MODULES_DIR'] = str(NODE_MODULES_DIR)
-    env['LIB_DIR'] = str(LIB_DIR)
-    return env
-

 def test_hook_script_exists():
    """Verify on_Snapshot hook exists."""
--- a/archivebox/plugins/readability/tests/test_readability.py
+++ b/archivebox/plugins/readability/tests/test_readability.py
@@ -2,7 +2,6 @@
 Integration tests for readability plugin

 Tests verify:
-    pass
 1. Validate hook checks for readability-extractor binary
 2. Verify deps with abx-pkg
 3. Plugin reports missing dependency correctly
@@ -18,10 +17,15 @@ from pathlib import Path

 import pytest

+from archivebox.plugins.chrome.tests.chrome_test_helpers import (
+    get_plugin_dir,
+    get_hook_script,
+    PLUGINS_ROOT,
+)

-PLUGIN_DIR = Path(__file__).parent.parent
-PLUGINS_ROOT = PLUGIN_DIR.parent
-READABILITY_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_readability.*'))
+
+PLUGIN_DIR = get_plugin_dir(__file__)
+READABILITY_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_readability.*')
 TEST_URL = 'https://example.com'


--- a/archivebox/plugins/screenshot/tests/test_screenshot.py
+++ b/archivebox/plugins/screenshot/tests/test_screenshot.py
@@ -20,28 +20,20 @@ from pathlib import Path

 import pytest

+from archivebox.plugins.chrome.tests.chrome_test_helpers import (
+    get_test_env,
+    get_plugin_dir,
+    get_hook_script,
+    run_hook_and_parse,
+    LIB_DIR,
+    NODE_MODULES_DIR,
+)

-PLUGIN_DIR = Path(__file__).parent.parent
-PLUGINS_ROOT = PLUGIN_DIR.parent
-SCREENSHOT_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_screenshot.*'), None)
+
+PLUGIN_DIR = get_plugin_dir(__file__)
+SCREENSHOT_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_screenshot.*')
 TEST_URL = 'https://example.com'

-# Get LIB_DIR for NODE_MODULES_DIR
-def get_lib_dir():
-    """Get LIB_DIR for tests."""
-    from archivebox.config.common import STORAGE_CONFIG
-    return Path(os.environ.get('LIB_DIR') or str(STORAGE_CONFIG.LIB_DIR))
-
-LIB_DIR = get_lib_dir()
-NODE_MODULES_DIR = LIB_DIR / 'npm' / 'node_modules'
-
-def get_test_env():
-    """Get environment with NODE_MODULES_DIR set correctly."""
-    env = os.environ.copy()
-    env['NODE_MODULES_DIR'] = str(NODE_MODULES_DIR)
-    env['LIB_DIR'] = str(LIB_DIR)
-    return env
-

 def test_hook_script_exists():
    """Verify on_Snapshot hook exists."""
--- a/archivebox/plugins/singlefile/on_Snapshot__50_singlefile.py
+++ b/archivebox/plugins/singlefile/on_Snapshot__50_singlefile.py
@@ -77,27 +77,9 @@ def has_staticfile_output() -> bool:
    return staticfile_dir.exists() and any(staticfile_dir.iterdir())


-# Chrome binary search paths
-CHROMIUM_BINARY_NAMES_LINUX = [
-    'chromium', 'chromium-browser', 'chromium-browser-beta',
-    'chromium-browser-unstable', 'chromium-browser-canary', 'chromium-browser-dev',
-]
-CHROME_BINARY_NAMES_LINUX = [
-    'google-chrome', 'google-chrome-stable', 'google-chrome-beta',
-    'google-chrome-canary', 'google-chrome-unstable', 'google-chrome-dev', 'chrome',
-]
-CHROME_BINARY_NAMES_MACOS = [
-    '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
-    '/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary',
-]
-CHROMIUM_BINARY_NAMES_MACOS = ['/Applications/Chromium.app/Contents/MacOS/Chromium']
-
-ALL_CHROME_BINARIES = (
-    CHROME_BINARY_NAMES_LINUX + CHROMIUM_BINARY_NAMES_LINUX +
-    CHROME_BINARY_NAMES_MACOS + CHROMIUM_BINARY_NAMES_MACOS
-)
-
-
+# Chrome session directory (relative to extractor output dir)
+# Note: Chrome binary is obtained via CHROME_BINARY env var, not searched for.
+# The centralized Chrome binary search is in chrome_utils.js findChromium().
 CHROME_SESSION_DIR = '../chrome'


--- a/archivebox/plugins/singlefile/tests/test_singlefile.py
+++ b/archivebox/plugins/singlefile/tests/test_singlefile.py
@@ -6,6 +6,8 @@ Tests verify:
 2. CLI-based singlefile extraction works
 3. Dependencies available via abx-pkg
 4. Output contains valid HTML
+5. Connects to Chrome session via CDP when available
+6. Works with extensions loaded (ublock, etc.)
 """

 import json
@@ -16,10 +18,17 @@ from pathlib import Path

 import pytest

+from archivebox.plugins.chrome.tests.chrome_test_helpers import (
+    get_test_env,
+    get_plugin_dir,
+    get_hook_script,
+    setup_chrome_session,
+    cleanup_chrome,
+)

-PLUGIN_DIR = Path(__file__).parent.parent
-PLUGINS_ROOT = PLUGIN_DIR.parent
-SNAPSHOT_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_singlefile.py'), None)
+
+PLUGIN_DIR = get_plugin_dir(__file__)
+SNAPSHOT_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_singlefile.py')
 TEST_URL = "https://example.com"


@@ -52,7 +61,7 @@ def test_singlefile_cli_archives_example_com():
    with tempfile.TemporaryDirectory() as tmpdir:
        tmpdir = Path(tmpdir)

-        env = os.environ.copy()
+        env = get_test_env()
        env['SINGLEFILE_ENABLED'] = 'true'

        # Run singlefile snapshot hook
@@ -78,5 +87,89 @@ def test_singlefile_cli_archives_example_com():
        assert 'Example Domain' in html_content, "Output should contain example.com content"


+def test_singlefile_with_chrome_session():
+    """Test singlefile connects to existing Chrome session via CDP.
+
+    When a Chrome session exists (chrome/cdp_url.txt), singlefile should
+    connect to it instead of launching a new Chrome instance.
+    """
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = Path(tmpdir)
+
+        try:
+            # Set up Chrome session using shared helper
+            chrome_launch_process, chrome_pid, snapshot_chrome_dir = setup_chrome_session(
+                tmpdir=tmpdir,
+                crawl_id='singlefile-test-crawl',
+                snapshot_id='singlefile-test-snap',
+                test_url=TEST_URL,
+                navigate=False,  # Don't navigate, singlefile will do that
+                timeout=20,
+            )
+
+            # singlefile looks for ../chrome/cdp_url.txt relative to cwd
+            # So we need to run from a directory that has ../chrome pointing to our chrome dir
+            singlefile_output_dir = tmpdir / 'snapshot' / 'singlefile'
+            singlefile_output_dir.mkdir(parents=True, exist_ok=True)
+
+            # Create symlink so singlefile can find the chrome session
+            chrome_link = singlefile_output_dir.parent / 'chrome'
+            if not chrome_link.exists():
+                chrome_link.symlink_to(tmpdir / 'crawl' / 'chrome')
+
+            env = get_test_env()
+            env['SINGLEFILE_ENABLED'] = 'true'
+            env['CHROME_HEADLESS'] = 'true'
+
+            # Run singlefile - it should find and use the existing Chrome session
+            result = subprocess.run(
+                ['python', str(SNAPSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=singlefile-test-snap'],
+                cwd=str(singlefile_output_dir),
+                capture_output=True,
+                text=True,
+                env=env,
+                timeout=120
+            )
+
+            # Verify output
+            output_file = singlefile_output_dir / 'singlefile.html'
+            if output_file.exists():
+                html_content = output_file.read_text()
+                assert len(html_content) > 500, "Output file too small"
+                assert 'Example Domain' in html_content, "Should contain example.com content"
+            else:
+                # If singlefile couldn't connect to Chrome, it may have failed
+                # Check if it mentioned browser-server in its args (indicating it tried to use CDP)
+                assert result.returncode == 0 or 'browser-server' in result.stderr or 'cdp' in result.stderr.lower(), \
+                    f"Singlefile should attempt CDP connection. stderr: {result.stderr}"
+
+        finally:
+            cleanup_chrome(chrome_launch_process, chrome_pid)
+
+
+def test_singlefile_disabled_skips():
+    """Test that SINGLEFILE_ENABLED=False exits without JSONL."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = Path(tmpdir)
+
+        env = get_test_env()
+        env['SINGLEFILE_ENABLED'] = 'False'
+
+        result = subprocess.run(
+            ['python', str(SNAPSHOT_HOOK), f'--url={TEST_URL}', '--snapshot-id=test-disabled'],
+            cwd=tmpdir,
+            capture_output=True,
+            text=True,
+            env=env,
+            timeout=30
+        )
+
+        assert result.returncode == 0, f"Should exit 0 when disabled: {result.stderr}"
+
+        # Should NOT emit JSONL when disabled
+        jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
+        assert len(jsonl_lines) == 0, f"Should not emit JSONL when disabled, but got: {jsonl_lines}"
+
+
 if __name__ == '__main__':
    pytest.main([__file__, '-v'])
--- a/archivebox/plugins/title/tests/test_title.py
+++ b/archivebox/plugins/title/tests/test_title.py
@@ -2,7 +2,6 @@
 Integration tests for title plugin

 Tests verify:
-    pass
 1. Plugin script exists
 2. Node.js is available
 3. Title extraction works for real example.com
@@ -20,9 +19,15 @@ from pathlib import Path

 import pytest

+from archivebox.plugins.chrome.tests.chrome_test_helpers import (
+    get_plugin_dir,
+    get_hook_script,
+    parse_jsonl_output,
+)

-PLUGIN_DIR = Path(__file__).parent.parent
-TITLE_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_title.*'), None)
+
+PLUGIN_DIR = get_plugin_dir(__file__)
+TITLE_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_title.*')
 TEST_URL = 'https://example.com'


--- a/archivebox/plugins/twocaptcha/tests/test_twocaptcha.py
+++ b/archivebox/plugins/twocaptcha/tests/test_twocaptcha.py
@@ -16,184 +16,25 @@ from pathlib import Path

 import pytest

+from archivebox.plugins.chrome.tests.chrome_test_helpers import (
+    setup_test_env,
+    launch_chromium_session,
+    kill_chromium_session,
+    CHROME_LAUNCH_HOOK,
+    PLUGINS_ROOT,
+)
+

 PLUGIN_DIR = Path(__file__).parent.parent
-PLUGINS_ROOT = PLUGIN_DIR.parent
 INSTALL_SCRIPT = PLUGIN_DIR / 'on_Crawl__20_install_twocaptcha_extension.js'
 CONFIG_SCRIPT = PLUGIN_DIR / 'on_Crawl__25_configure_twocaptcha_extension_options.js'
-CHROME_INSTALL_HOOK = PLUGINS_ROOT / 'chrome' / 'on_Crawl__00_install_puppeteer_chromium.py'
-CHROME_LAUNCH_HOOK = PLUGINS_ROOT / 'chrome' / 'on_Crawl__30_chrome_launch.bg.js'

 TEST_URL = 'https://2captcha.com/demo/recaptcha-v2'


-def setup_test_env(tmpdir: Path) -> dict:
-    """Set up isolated data/lib directory structure for tests.
-
-    Creates structure matching real ArchiveBox data dir:
-        <tmpdir>/data/
-            lib/
-                arm64-darwin/   (or x86_64-linux, etc.)
-                    npm/
-                        .bin/
-                        node_modules/
-            personas/
-                default/
-                    chrome_extensions/
-            users/
-                testuser/
-                    crawls/
-                    snapshots/
-
-    Calls chrome install hook which handles puppeteer-core and chromium installation.
-    Returns env dict with DATA_DIR, LIB_DIR, NPM_BIN_DIR, NODE_MODULES_DIR, CHROME_BINARY, etc.
-    """
-    import platform
-    from datetime import datetime
-
-    # Determine machine type (matches archivebox.config.paths.get_machine_type())
-    machine = platform.machine().lower()
-    system = platform.system().lower()
-    if machine in ('arm64', 'aarch64'):
-        machine = 'arm64'
-    elif machine in ('x86_64', 'amd64'):
-        machine = 'x86_64'
-    machine_type = f"{machine}-{system}"
-
-    # Create proper directory structure matching real ArchiveBox layout
-    data_dir = tmpdir / 'data'
-    lib_dir = data_dir / 'lib' / machine_type
-    npm_dir = lib_dir / 'npm'
-    npm_bin_dir = npm_dir / '.bin'
-    node_modules_dir = npm_dir / 'node_modules'
-
-    # Extensions go under personas/Default/
-    chrome_extensions_dir = data_dir / 'personas' / 'Default' / 'chrome_extensions'
-
-    # User data goes under users/{username}/
-    date_str = datetime.now().strftime('%Y%m%d')
-    users_dir = data_dir / 'users' / 'testuser'
-    crawls_dir = users_dir / 'crawls' / date_str
-    snapshots_dir = users_dir / 'snapshots' / date_str
-
-    # Create all directories
-    node_modules_dir.mkdir(parents=True, exist_ok=True)
-    npm_bin_dir.mkdir(parents=True, exist_ok=True)
-    chrome_extensions_dir.mkdir(parents=True, exist_ok=True)
-    crawls_dir.mkdir(parents=True, exist_ok=True)
-    snapshots_dir.mkdir(parents=True, exist_ok=True)
-
-    # Build complete env dict
-    env = os.environ.copy()
-    env.update({
-        'DATA_DIR': str(data_dir),
-        'LIB_DIR': str(lib_dir),
-        'MACHINE_TYPE': machine_type,
-        'NPM_BIN_DIR': str(npm_bin_dir),
-        'NODE_MODULES_DIR': str(node_modules_dir),
-        'CHROME_EXTENSIONS_DIR': str(chrome_extensions_dir),
-        'CRAWLS_DIR': str(crawls_dir),
-        'SNAPSHOTS_DIR': str(snapshots_dir),
-    })
-
-    # Only set headless if not already in environment (allow override for debugging)
-    if 'CHROME_HEADLESS' not in os.environ:
-        env['CHROME_HEADLESS'] = 'true'
-
-    # Call chrome install hook (installs puppeteer-core and chromium, outputs JSONL)
-    result = subprocess.run(
-        ['python', str(CHROME_INSTALL_HOOK)],
-        capture_output=True, text=True, timeout=120, env=env
-    )
-    if result.returncode != 0:
-        pytest.skip(f"Chrome install hook failed: {result.stderr}")
-
-    # Parse JSONL output to get CHROME_BINARY
-    chrome_binary = None
-    for line in result.stdout.strip().split('\n'):
-        if not line.strip():
-            continue
-        try:
-            data = json.loads(line)
-            if data.get('type') == 'Binary' and data.get('abspath'):
-                chrome_binary = data['abspath']
-                break
-        except json.JSONDecodeError:
-            continue
-
-    if not chrome_binary or not Path(chrome_binary).exists():
-        pytest.skip(f"Chromium binary not found: {chrome_binary}")
-
-    env['CHROME_BINARY'] = chrome_binary
-    return env
-
-
-def launch_chrome(env: dict, chrome_dir: Path, crawl_id: str):
-    """Launch Chromium and return (process, cdp_url)."""
-    chrome_dir.mkdir(parents=True, exist_ok=True)
-
-    process = subprocess.Popen(
-        ['node', str(CHROME_LAUNCH_HOOK), f'--crawl-id={crawl_id}'],
-        cwd=str(chrome_dir),
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-        text=True,
-        env=env
-    )
-
-    cdp_url = None
-    extensions_ready = False
-    for _ in range(30):
-        if process.poll() is not None:
-            stdout, stderr = process.communicate()
-            raise RuntimeError(f"Chromium failed:\n{stdout}\n{stderr}")
-        cdp_file = chrome_dir / 'cdp_url.txt'
-        ext_file = chrome_dir / 'extensions.json'
-        if cdp_file.exists() and not cdp_url:
-            cdp_url = cdp_file.read_text().strip()
-        if ext_file.exists():
-            extensions_ready = True
-        if cdp_url and extensions_ready:
-            break
-        time.sleep(1)
-
-    if not cdp_url:
-        process.kill()
-        stdout, stderr = process.communicate()
-        raise RuntimeError(f"CDP URL not found after 30s.\nstdout: {stdout}\nstderr: {stderr}")
-
-    # Print chrome launch hook output for debugging
-    import select
-    if hasattr(select, 'poll'):
-        # Read any available stderr without blocking
-        import fcntl
-        import os as os_module
-        fd = process.stderr.fileno()
-        fl = fcntl.fcntl(fd, fcntl.F_GETFL)
-        fcntl.fcntl(fd, fcntl.F_SETFL, fl | os_module.O_NONBLOCK)
-        try:
-            stderr_output = process.stderr.read()
-            if stderr_output:
-                print(f"[Chrome Launch Hook Output]\n{stderr_output}")
-        except:
-            pass
-
-    return process, cdp_url
-
-
-def kill_chrome(process, chrome_dir: Path):
-    """Kill Chromium process."""
-    try:
-        process.send_signal(signal.SIGTERM)
-        process.wait(timeout=5)
-    except:
-        pass
-    pid_file = chrome_dir / 'chrome.pid'
-    if pid_file.exists():
-        try:
-            os.kill(int(pid_file.read_text().strip()), signal.SIGKILL)
-        except:
-            pass
+# Alias for backward compatibility with existing test names
+launch_chrome = launch_chromium_session
+kill_chrome = kill_chromium_session


 class TestTwoCaptcha:
--- a/archivebox/plugins/ublock/tests/test_ublock.py
+++ b/archivebox/plugins/ublock/tests/test_ublock.py
@@ -12,6 +12,14 @@ from pathlib import Path

 import pytest

+from archivebox.plugins.chrome.tests.chrome_test_helpers import (
+    setup_test_env,
+    launch_chromium_session,
+    kill_chromium_session,
+    CHROME_LAUNCH_HOOK,
+    PLUGINS_ROOT,
+)
+

 PLUGIN_DIR = Path(__file__).parent.parent
 INSTALL_SCRIPT = next(PLUGIN_DIR.glob('on_Crawl__*_install_ublock_extension.*'), None)
@@ -157,64 +165,6 @@ def test_large_extension_size():
            assert size_bytes > 1_000_000, f"uBlock Origin should be > 1MB, got {size_bytes} bytes"


-PLUGINS_ROOT = PLUGIN_DIR.parent
-CHROME_INSTALL_HOOK = PLUGINS_ROOT / 'chrome' / 'on_Crawl__00_install_puppeteer_chromium.py'
-CHROME_LAUNCH_HOOK = PLUGINS_ROOT / 'chrome' / 'on_Crawl__30_chrome_launch.bg.js'
-
-
-def launch_chromium_session(env: dict, chrome_dir: Path, crawl_id: str):
-    """Launch Chromium and return (process, cdp_url) or raise on failure."""
-    import signal
-    import time
-
-    chrome_dir.mkdir(parents=True, exist_ok=True)
-
-    chrome_launch_process = subprocess.Popen(
-        ['node', str(CHROME_LAUNCH_HOOK), f'--crawl-id={crawl_id}'],
-        cwd=str(chrome_dir),
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-        text=True,
-        env=env
-    )
-
-    # Wait for Chromium to launch and CDP URL to be available
-    cdp_url = None
-    for i in range(20):
-        if chrome_launch_process.poll() is not None:
-            stdout, stderr = chrome_launch_process.communicate()
-            raise RuntimeError(f"Chromium launch failed:\nStdout: {stdout}\nStderr: {stderr}")
-        cdp_file = chrome_dir / 'cdp_url.txt'
-        if cdp_file.exists():
-            cdp_url = cdp_file.read_text().strip()
-            break
-        time.sleep(1)
-
-    if not cdp_url:
-        chrome_launch_process.kill()
-        raise RuntimeError("Chromium CDP URL not found after 20s")
-
-    return chrome_launch_process, cdp_url
-
-
-def kill_chromium_session(chrome_launch_process, chrome_dir: Path):
-    """Clean up Chromium process."""
-    import signal
-
-    try:
-        chrome_launch_process.send_signal(signal.SIGTERM)
-        chrome_launch_process.wait(timeout=5)
-    except:
-        pass
-    chrome_pid_file = chrome_dir / 'chrome.pid'
-    if chrome_pid_file.exists():
-        try:
-            chrome_pid = int(chrome_pid_file.read_text().strip())
-            os.kill(chrome_pid, signal.SIGKILL)
-        except (OSError, ValueError):
-            pass
-
-
 def check_ad_blocking(cdp_url: str, test_url: str, env: dict, script_dir: Path) -> dict:
    """Check ad blocking effectiveness by counting ad elements on page.

@@ -350,103 +300,6 @@ const puppeteer = require('puppeteer-core');
    return json.loads(output_lines[-1])


-def setup_test_env(tmpdir: Path) -> dict:
-    """Set up isolated data/lib directory structure for tests.
-
-    Creates structure matching real ArchiveBox data dir:
-        <tmpdir>/data/
-            lib/
-                arm64-darwin/   (or x86_64-linux, etc.)
-                    npm/
-                        .bin/
-                        node_modules/
-            personas/
-                default/
-                    chrome_extensions/
-            users/
-                testuser/
-                    crawls/
-                    snapshots/
-
-    Calls chrome install hook which handles puppeteer-core and chromium installation.
-    Returns env dict with DATA_DIR, LIB_DIR, NPM_BIN_DIR, NODE_MODULES_DIR, CHROME_BINARY, etc.
-    """
-    import platform
-    from datetime import datetime
-
-    # Determine machine type (matches archivebox.config.paths.get_machine_type())
-    machine = platform.machine().lower()
-    system = platform.system().lower()
-    if machine in ('arm64', 'aarch64'):
-        machine = 'arm64'
-    elif machine in ('x86_64', 'amd64'):
-        machine = 'x86_64'
-    machine_type = f"{machine}-{system}"
-
-    # Create proper directory structure matching real ArchiveBox layout
-    data_dir = tmpdir / 'data'
-    lib_dir = data_dir / 'lib' / machine_type
-    npm_dir = lib_dir / 'npm'
-    npm_bin_dir = npm_dir / '.bin'
-    node_modules_dir = npm_dir / 'node_modules'
-
-    # Extensions go under personas/Default/
-    chrome_extensions_dir = data_dir / 'personas' / 'Default' / 'chrome_extensions'
-
-    # User data goes under users/{username}/
-    date_str = datetime.now().strftime('%Y%m%d')
-    users_dir = data_dir / 'users' / 'testuser'
-    crawls_dir = users_dir / 'crawls' / date_str
-    snapshots_dir = users_dir / 'snapshots' / date_str
-
-    # Create all directories
-    node_modules_dir.mkdir(parents=True, exist_ok=True)
-    npm_bin_dir.mkdir(parents=True, exist_ok=True)
-    chrome_extensions_dir.mkdir(parents=True, exist_ok=True)
-    crawls_dir.mkdir(parents=True, exist_ok=True)
-    snapshots_dir.mkdir(parents=True, exist_ok=True)
-
-    # Build complete env dict
-    env = os.environ.copy()
-    env.update({
-        'DATA_DIR': str(data_dir),
-        'LIB_DIR': str(lib_dir),
-        'MACHINE_TYPE': machine_type,
-        'NPM_BIN_DIR': str(npm_bin_dir),
-        'NODE_MODULES_DIR': str(node_modules_dir),
-        'CHROME_EXTENSIONS_DIR': str(chrome_extensions_dir),
-        'CRAWLS_DIR': str(crawls_dir),
-        'SNAPSHOTS_DIR': str(snapshots_dir),
-    })
-
-    # Call chrome install hook (installs puppeteer-core and chromium, outputs JSONL)
-    result = subprocess.run(
-        ['python', str(CHROME_INSTALL_HOOK)],
-        capture_output=True, text=True, timeout=120, env=env
-    )
-    if result.returncode != 0:
-        pytest.skip(f"Chrome install hook failed: {result.stderr}")
-
-    # Parse JSONL output to get CHROME_BINARY
-    chrome_binary = None
-    for line in result.stdout.strip().split('\n'):
-        if not line.strip():
-            continue
-        try:
-            data = json.loads(line)
-            if data.get('type') == 'Binary' and data.get('abspath'):
-                chrome_binary = data['abspath']
-                break
-        except json.JSONDecodeError:
-            continue
-
-    if not chrome_binary or not Path(chrome_binary).exists():
-        pytest.skip(f"Chromium binary not found: {chrome_binary}")
-
-    env['CHROME_BINARY'] = chrome_binary
-    return env
-
-
 # Test URL: Yahoo has many ads that uBlock should block
 TEST_URL = 'https://www.yahoo.com/'