Simplify chrome_test_helpers: remove trivial wrappers, shorten docstrings

- Remove get_plugin_dir() and get_hook_script() - inline as simple patterns - Remove _LazyPath class and LIB_DIR/NODE_MODULES_DIR constants - Remove backward compatibility aliases - Shorten all docstrings to one line each - Keep Python get_machine_type() implementation (no JS dependency) - Update 8 test files to use inlined patterns directly
2026-01-03 09:25:42 +10:00 · 2025-12-31 09:39:24 +00:00
parent 1cfb77a355
commit 7971b10cea
9 changed files with 84 additions and 577 deletions
--- a/archivebox/plugins/chrome/tests/chrome_test_helpers.py
+++ b/archivebox/plugins/chrome/tests/chrome_test_helpers.py
@@ -1,59 +1,8 @@
 """
-Shared Chrome test helpers for plugin integration tests.
+Chrome test helpers - delegates to chrome_utils.js (single source of truth).

-This module provides common utilities for Chrome-based plugin tests, reducing
-duplication across test files. Functions delegate to chrome_utils.js (the single
-source of truth) with Python fallbacks.
-
-Function names match the JS equivalents in snake_case:
-    JS: getMachineType()  -> Python: get_machine_type()
-    JS: getLibDir()       -> Python: get_lib_dir()
-    JS: getNodeModulesDir() -> Python: get_node_modules_dir()
-    JS: getExtensionsDir() -> Python: get_extensions_dir()
-    JS: findChromium()    -> Python: find_chromium()
-    JS: killChrome()      -> Python: kill_chrome()
-    JS: getTestEnv()      -> Python: get_test_env()
-
-Usage:
-    # Path helpers (delegate to chrome_utils.js):
-    from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-        get_test_env,           # env dict with LIB_DIR, NODE_MODULES_DIR, MACHINE_TYPE
-        get_machine_type,       # e.g., 'x86_64-linux', 'arm64-darwin'
-        get_lib_dir,            # Path to lib dir
-        get_node_modules_dir,   # Path to node_modules
-        get_extensions_dir,     # Path to chrome extensions
-        find_chromium,          # Find Chrome/Chromium binary
-        kill_chrome,            # Kill Chrome process by PID
-    )
-
-    # Test file helpers:
-    from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-        get_plugin_dir,         # get_plugin_dir(__file__) -> plugin dir Path
-        get_hook_script,        # Find hook script by glob pattern
-        PLUGINS_ROOT,           # Path to plugins root
-        LIB_DIR,                # Path to lib dir (lazy-loaded)
-        NODE_MODULES_DIR,       # Path to node_modules (lazy-loaded)
-    )
-
-    # For Chrome session tests:
-    from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-        setup_chrome_session,   # Full Chrome + tab setup
-        cleanup_chrome,         # Cleanup by PID
-        chrome_session,         # Context manager
-    )
-
-    # For extension tests:
-    from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-        setup_test_env,         # Full dir structure + Chrome install
-        launch_chromium_session, # Launch Chrome, return CDP URL
-        kill_chromium_session,   # Cleanup Chrome
-    )
-
-    # Run hooks and parse JSONL:
-    from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-        run_hook,               # Run hook, return (returncode, stdout, stderr)
-        parse_jsonl_output,     # Parse JSONL from stdout
-    )
+Function names match JS equivalents in snake_case:
+    getMachineType -> get_machine_type, getLibDir -> get_lib_dir, etc.
 """

 import json
@@ -81,85 +30,21 @@ CHROME_UTILS = CHROME_PLUGIN_DIR / 'chrome_utils.js'


 # =============================================================================
-# Path Helpers - delegates to chrome_utils.js with Python fallback
-# Function names match JS: getMachineType -> get_machine_type, etc.
+# Path Helpers - delegates to chrome_utils.js (single source of truth)
 # =============================================================================


 def _call_chrome_utils(command: str, *args: str, env: Optional[dict] = None) -> Tuple[int, str, str]:
-    """Call chrome_utils.js CLI command (internal helper).
-
-    This is the central dispatch for calling the JS utilities from Python.
-    All path calculations and Chrome operations are centralized in chrome_utils.js
-    to ensure consistency between Python and JavaScript code.
-
-    Args:
-        command: The CLI command (e.g., 'findChromium', 'getTestEnv')
-        *args: Additional command arguments
-        env: Environment dict (default: current env)
-
-    Returns:
-        Tuple of (returncode, stdout, stderr)
-    """
+    """Call chrome_utils.js CLI command. Returns (returncode, stdout, stderr)."""
    cmd = ['node', str(CHROME_UTILS), command] + list(args)
-    result = subprocess.run(
-        cmd,
-        capture_output=True,
-        text=True,
-        timeout=30,
-        env=env or os.environ.copy()
-    )
+    result = subprocess.run(cmd, capture_output=True, text=True, timeout=30, env=env or os.environ.copy())
    return result.returncode, result.stdout, result.stderr


-def get_plugin_dir(test_file: str) -> Path:
-    """Get the plugin directory from a test file path.
-
-    Usage:
-        PLUGIN_DIR = get_plugin_dir(__file__)
-
-    Args:
-        test_file: The __file__ of the test module (e.g., test_screenshot.py)
-
-    Returns:
-        Path to the plugin directory (e.g., plugins/screenshot/)
-    """
-    return Path(test_file).parent.parent
-
-
-def get_hook_script(plugin_dir: Path, pattern: str) -> Optional[Path]:
-    """Find a hook script in a plugin directory by pattern.
-
-    Usage:
-        HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_screenshot.*')
-
-    Args:
-        plugin_dir: Path to the plugin directory
-        pattern: Glob pattern to match
-
-    Returns:
-        Path to the hook script or None if not found
-    """
-    matches = list(plugin_dir.glob(pattern))
-    return matches[0] if matches else None
-
-
 def get_machine_type() -> str:
-    """Get machine type string (e.g., 'x86_64-linux', 'arm64-darwin').
-
-    Matches JS: getMachineType()
-
-    Tries chrome_utils.js first, falls back to Python computation.
-    """
-    # Try JS first (single source of truth)
-    returncode, stdout, stderr = _call_chrome_utils('getMachineType')
-    if returncode == 0 and stdout.strip():
-        return stdout.strip()
-
-    # Fallback to Python computation
+    """Get machine type (e.g., 'x86_64-linux'). Matches JS getMachineType()."""
    if os.environ.get('MACHINE_TYPE'):
        return os.environ['MACHINE_TYPE']
-
    machine = platform.machine().lower()
    system = platform.system().lower()
    if machine in ('arm64', 'aarch64'):
@@ -170,103 +55,40 @@ def get_machine_type() -> str:


 def get_lib_dir() -> Path:
-    """Get LIB_DIR path for platform-specific binaries.
-
-    Matches JS: getLibDir()
-
-    Tries chrome_utils.js first, falls back to Python computation.
-    """
-    # Try JS first
+    """Get LIB_DIR path. Matches JS getLibDir()."""
    returncode, stdout, stderr = _call_chrome_utils('getLibDir')
-    if returncode == 0 and stdout.strip():
-        return Path(stdout.strip())
-
-    # Fallback to Python
-    if os.environ.get('LIB_DIR'):
-        return Path(os.environ['LIB_DIR'])
-    from archivebox.config.common import STORAGE_CONFIG
-    return Path(str(STORAGE_CONFIG.LIB_DIR))
+    if returncode != 0:
+        raise RuntimeError(f"getLibDir failed: {stderr}")
+    return Path(stdout.strip())


 def get_node_modules_dir() -> Path:
-    """Get NODE_MODULES_DIR path for npm packages.
-
-    Matches JS: getNodeModulesDir()
-
-    Tries chrome_utils.js first, falls back to Python computation.
-    """
-    # Try JS first
+    """Get NODE_MODULES_DIR path. Matches JS getNodeModulesDir()."""
    returncode, stdout, stderr = _call_chrome_utils('getNodeModulesDir')
-    if returncode == 0 and stdout.strip():
-        return Path(stdout.strip())
-
-    # Fallback to Python
-    if os.environ.get('NODE_MODULES_DIR'):
-        return Path(os.environ['NODE_MODULES_DIR'])
-    lib_dir = get_lib_dir()
-    return lib_dir / 'npm' / 'node_modules'
+    if returncode != 0:
+        raise RuntimeError(f"getNodeModulesDir failed: {stderr}")
+    return Path(stdout.strip())


 def get_extensions_dir() -> str:
-    """Get the Chrome extensions directory path.
-
-    Matches JS: getExtensionsDir()
-
-    Tries chrome_utils.js first, falls back to Python computation.
-    """
+    """Get Chrome extensions directory. Matches JS getExtensionsDir()."""
    returncode, stdout, stderr = _call_chrome_utils('getExtensionsDir')
-    if returncode == 0 and stdout.strip():
-        return stdout.strip()
-
-    # Fallback to default computation if JS call fails
-    data_dir = os.environ.get('DATA_DIR', './data')
-    persona = os.environ.get('ACTIVE_PERSONA', 'Default')
-    return str(Path(data_dir) / 'personas' / persona / 'chrome_extensions')
+    if returncode != 0:
+        raise RuntimeError(f"getExtensionsDir failed: {stderr}")
+    return stdout.strip()


 def find_chromium(data_dir: Optional[str] = None) -> Optional[str]:
-    """Find the Chromium binary path.
-
-    Matches JS: findChromium()
-
-    Uses chrome_utils.js which checks:
-    - CHROME_BINARY env var
-    - @puppeteer/browsers install locations
-    - System Chromium locations
-    - Falls back to Chrome (with warning)
-
-    Args:
-        data_dir: Optional DATA_DIR override
-
-    Returns:
-        Path to Chromium binary or None if not found
-    """
+    """Find Chromium binary path. Matches JS findChromium()."""
    env = os.environ.copy()
    if data_dir:
        env['DATA_DIR'] = str(data_dir)
    returncode, stdout, stderr = _call_chrome_utils('findChromium', env=env)
-    if returncode == 0 and stdout.strip():
-        return stdout.strip()
-    return None
+    return stdout.strip() if returncode == 0 and stdout.strip() else None


 def kill_chrome(pid: int, output_dir: Optional[str] = None) -> bool:
-    """Kill a Chrome process by PID.
-
-    Matches JS: killChrome()
-
-    Uses chrome_utils.js which handles:
-    - SIGTERM then SIGKILL
-    - Process group killing
-    - Zombie process cleanup
-
-    Args:
-        pid: Process ID to kill
-        output_dir: Optional chrome output directory for PID file cleanup
-
-    Returns:
-        True if the kill command succeeded
-    """
+    """Kill Chrome process by PID. Matches JS killChrome()."""
    args = [str(pid)]
    if output_dir:
        args.append(str(output_dir))
@@ -275,93 +97,15 @@ def kill_chrome(pid: int, output_dir: Optional[str] = None) -> bool:


 def get_test_env() -> dict:
-    """Get environment dict with all paths set correctly for tests.
-
-    Matches JS: getTestEnv()
-
-    Tries chrome_utils.js first for path values, builds env dict.
-    Use this for all subprocess calls in plugin tests.
-    """
+    """Get env dict with all paths set for tests. Matches JS getTestEnv()."""
    env = os.environ.copy()
-
-    # Try to get all paths from JS (single source of truth)
    returncode, stdout, stderr = _call_chrome_utils('getTestEnv')
-    if returncode == 0 and stdout.strip():
-        try:
-            js_env = json.loads(stdout)
-            env.update(js_env)
-            return env
-        except json.JSONDecodeError:
-            pass
-
-    # Fallback to Python computation
-    lib_dir = get_lib_dir()
-    env['LIB_DIR'] = str(lib_dir)
-    env['NODE_MODULES_DIR'] = str(get_node_modules_dir())
-    env['MACHINE_TYPE'] = get_machine_type()
+    if returncode != 0:
+        raise RuntimeError(f"getTestEnv failed: {stderr}")
+    env.update(json.loads(stdout))
    return env


-# Backward compatibility aliases (deprecated, use new names)
-find_chromium_binary = find_chromium
-kill_chrome_via_js = kill_chrome
-get_machine_type_from_js = get_machine_type
-get_test_env_from_js = get_test_env
-
-
-# =============================================================================
-# Module-level constants (lazy-loaded on first access)
-# Import these directly: from chrome_test_helpers import LIB_DIR, NODE_MODULES_DIR
-# =============================================================================
-
-# These are computed once when first accessed
-_LIB_DIR: Optional[Path] = None
-_NODE_MODULES_DIR: Optional[Path] = None
-
-
-def _get_lib_dir_cached() -> Path:
-    global _LIB_DIR
-    if _LIB_DIR is None:
-        _LIB_DIR = get_lib_dir()
-    return _LIB_DIR
-
-
-def _get_node_modules_dir_cached() -> Path:
-    global _NODE_MODULES_DIR
-    if _NODE_MODULES_DIR is None:
-        _NODE_MODULES_DIR = get_node_modules_dir()
-    return _NODE_MODULES_DIR
-
-
-# Module-level constants that can be imported directly
-# Usage: from chrome_test_helpers import LIB_DIR, NODE_MODULES_DIR
-class _LazyPath:
-    """Lazy path that computes value on first access."""
-    def __init__(self, getter):
-        self._getter = getter
-        self._value = None
-
-    def __fspath__(self):
-        if self._value is None:
-            self._value = self._getter()
-        return str(self._value)
-
-    def __truediv__(self, other):
-        if self._value is None:
-            self._value = self._getter()
-        return self._value / other
-
-    def __str__(self):
-        return self.__fspath__()
-
-    def __repr__(self):
-        return f"<LazyPath: {self.__fspath__()}>"
-
-
-LIB_DIR = _LazyPath(_get_lib_dir_cached)
-NODE_MODULES_DIR = _LazyPath(_get_node_modules_dir_cached)
-
-
 # =============================================================================
 # Hook Execution Helpers
 # =============================================================================
@@ -376,30 +120,10 @@ def run_hook(
    timeout: int = 60,
    extra_args: Optional[List[str]] = None,
 ) -> Tuple[int, str, str]:
-    """Run a hook script and return (returncode, stdout, stderr).
-
-    Usage:
-        returncode, stdout, stderr = run_hook(
-            HOOK_SCRIPT, 'https://example.com', 'test-snap-123',
-            cwd=tmpdir, env=get_test_env()
-        )
-
-    Args:
-        hook_script: Path to the hook script
-        url: URL to process
-        snapshot_id: Snapshot ID
-        cwd: Working directory (default: current dir)
-        env: Environment dict (default: get_test_env())
-        timeout: Timeout in seconds
-        extra_args: Additional arguments to pass
-
-    Returns:
-        Tuple of (returncode, stdout, stderr)
-    """
+    """Run a hook script. Returns (returncode, stdout, stderr)."""
    if env is None:
        env = get_test_env()

-    # Determine interpreter based on file extension
    if hook_script.suffix == '.py':
        cmd = ['python', str(hook_script)]
    elif hook_script.suffix == '.js':
@@ -411,32 +135,12 @@ def run_hook(
    if extra_args:
        cmd.extend(extra_args)

-    result = subprocess.run(
-        cmd,
-        cwd=str(cwd) if cwd else None,
-        capture_output=True,
-        text=True,
-        env=env,
-        timeout=timeout
-    )
+    result = subprocess.run(cmd, cwd=str(cwd) if cwd else None, capture_output=True, text=True, env=env, timeout=timeout)
    return result.returncode, result.stdout, result.stderr


 def parse_jsonl_output(stdout: str, record_type: str = 'ArchiveResult') -> Optional[Dict[str, Any]]:
-    """Parse JSONL output from hook stdout and return the specified record type.
-
-    Usage:
-        result = parse_jsonl_output(stdout)
-        if result and result['status'] == 'succeeded':
-            print("Success!")
-
-    Args:
-        stdout: The stdout from a hook execution
-        record_type: The 'type' field to look for (default: 'ArchiveResult')
-
-    Returns:
-        The parsed JSON dict or None if not found
-    """
+    """Parse JSONL output, return first record matching type."""
    for line in stdout.strip().split('\n'):
        line = line.strip()
        if not line.startswith('{'):
@@ -459,89 +163,39 @@ def run_hook_and_parse(
    timeout: int = 60,
    extra_args: Optional[List[str]] = None,
 ) -> Tuple[int, Optional[Dict[str, Any]], str]:
-    """Run a hook and parse its JSONL output.
-
-    Convenience function combining run_hook() and parse_jsonl_output().
-
-    Returns:
-        Tuple of (returncode, parsed_result_or_none, stderr)
-    """
-    returncode, stdout, stderr = run_hook(
-        hook_script, url, snapshot_id,
-        cwd=cwd, env=env, timeout=timeout, extra_args=extra_args
-    )
-    result = parse_jsonl_output(stdout)
-    return returncode, result, stderr
+    """Run hook and parse JSONL output. Returns (returncode, parsed_result, stderr)."""
+    returncode, stdout, stderr = run_hook(hook_script, url, snapshot_id, cwd=cwd, env=env, timeout=timeout, extra_args=extra_args)
+    return returncode, parse_jsonl_output(stdout), stderr


 # =============================================================================
-# Extension Test Helpers
-# Used by extension tests (ublock, istilldontcareaboutcookies, twocaptcha)
+# Extension Test Helpers (ublock, istilldontcareaboutcookies, twocaptcha)
 # =============================================================================


 def setup_test_env(tmpdir: Path) -> dict:
    """Set up isolated data/lib directory structure for extension tests.
-
-    Creates structure matching real ArchiveBox data dir:
-        <tmpdir>/data/
-            lib/
-                arm64-darwin/   (or x86_64-linux, etc.)
-                    npm/
-                        .bin/
-                        node_modules/
-            personas/
-                Default/
-                    chrome_extensions/
-            users/
-                testuser/
-                    crawls/
-                    snapshots/
-
-    Calls chrome install hook which handles puppeteer-core and chromium installation.
-    Returns env dict with DATA_DIR, LIB_DIR, NPM_BIN_DIR, NODE_MODULES_DIR, CHROME_BINARY, etc.
-
-    Args:
-        tmpdir: Base temporary directory for the test
-
-    Returns:
-        Environment dict with all paths set, or pytest.skip() if Chrome install fails
+    Returns env dict with DATA_DIR, LIB_DIR, CHROME_BINARY, etc.
    """
    import pytest

-    # Determine machine type (matches archivebox.config.paths.get_machine_type())
-    machine = platform.machine().lower()
-    system = platform.system().lower()
-    if machine in ('arm64', 'aarch64'):
-        machine = 'arm64'
-    elif machine in ('x86_64', 'amd64'):
-        machine = 'x86_64'
-    machine_type = f"{machine}-{system}"
+    machine_type = get_machine_type()

-    # Create proper directory structure matching real ArchiveBox layout
+    # Create directory structure
    data_dir = tmpdir / 'data'
    lib_dir = data_dir / 'lib' / machine_type
    npm_dir = lib_dir / 'npm'
    npm_bin_dir = npm_dir / '.bin'
    node_modules_dir = npm_dir / 'node_modules'
-
-    # Extensions go under personas/Default/
    chrome_extensions_dir = data_dir / 'personas' / 'Default' / 'chrome_extensions'
-
-    # User data goes under users/{username}/
    date_str = datetime.now().strftime('%Y%m%d')
    users_dir = data_dir / 'users' / 'testuser'
    crawls_dir = users_dir / 'crawls' / date_str
    snapshots_dir = users_dir / 'snapshots' / date_str

-    # Create all directories
-    node_modules_dir.mkdir(parents=True, exist_ok=True)
-    npm_bin_dir.mkdir(parents=True, exist_ok=True)
-    chrome_extensions_dir.mkdir(parents=True, exist_ok=True)
-    crawls_dir.mkdir(parents=True, exist_ok=True)
-    snapshots_dir.mkdir(parents=True, exist_ok=True)
+    for d in [node_modules_dir, npm_bin_dir, chrome_extensions_dir, crawls_dir, snapshots_dir]:
+        d.mkdir(parents=True, exist_ok=True)

-    # Build complete env dict
    env = os.environ.copy()
    env.update({
        'DATA_DIR': str(data_dir),
@@ -553,20 +207,14 @@ def setup_test_env(tmpdir: Path) -> dict:
        'CRAWLS_DIR': str(crawls_dir),
        'SNAPSHOTS_DIR': str(snapshots_dir),
    })
-
-    # Only set headless if not already in environment (allow override for debugging)
    if 'CHROME_HEADLESS' not in os.environ:
        env['CHROME_HEADLESS'] = 'true'

-    # Call chrome install hook (installs puppeteer-core and chromium, outputs JSONL)
-    result = subprocess.run(
-        ['python', str(CHROME_INSTALL_HOOK)],
-        capture_output=True, text=True, timeout=120, env=env
-    )
+    # Install Chrome
+    result = subprocess.run(['python', str(CHROME_INSTALL_HOOK)], capture_output=True, text=True, timeout=120, env=env)
    if result.returncode != 0:
-        pytest.skip(f"Chrome install hook failed: {result.stderr}")
+        pytest.skip(f"Chrome install failed: {result.stderr}")

-    # Parse JSONL output to get CHROME_BINARY
    chrome_binary = None
    for line in result.stdout.strip().split('\n'):
        if not line.strip():
@@ -587,39 +235,19 @@ def setup_test_env(tmpdir: Path) -> dict:


 def launch_chromium_session(env: dict, chrome_dir: Path, crawl_id: str) -> Tuple[subprocess.Popen, str]:
-    """Launch Chromium and return (process, cdp_url).
-
-    This launches Chrome using the chrome launch hook and waits for the CDP URL
-    to become available. Use this for extension tests that need direct CDP access.
-
-    Args:
-        env: Environment dict (from setup_test_env)
-        chrome_dir: Directory for Chrome to write its files (cdp_url.txt, chrome.pid, etc.)
-        crawl_id: ID for the crawl
-
-    Returns:
-        Tuple of (chrome_launch_process, cdp_url)
-
-    Raises:
-        RuntimeError: If Chrome fails to launch or CDP URL not available after 20s
-    """
+    """Launch Chromium and return (process, cdp_url)."""
    chrome_dir.mkdir(parents=True, exist_ok=True)

    chrome_launch_process = subprocess.Popen(
        ['node', str(CHROME_LAUNCH_HOOK), f'--crawl-id={crawl_id}'],
-        cwd=str(chrome_dir),
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-        text=True,
-        env=env
+        cwd=str(chrome_dir), stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, env=env
    )

-    # Wait for Chromium to launch and CDP URL to be available
    cdp_url = None
-    for i in range(20):
+    for _ in range(20):
        if chrome_launch_process.poll() is not None:
            stdout, stderr = chrome_launch_process.communicate()
-            raise RuntimeError(f"Chromium launch failed:\nStdout: {stdout}\nStderr: {stderr}")
+            raise RuntimeError(f"Chromium launch failed:\n{stdout}\n{stderr}")
        cdp_file = chrome_dir / 'cdp_url.txt'
        if cdp_file.exists():
            cdp_url = cdp_file.read_text().strip()
@@ -634,22 +262,13 @@ def launch_chromium_session(env: dict, chrome_dir: Path, crawl_id: str) -> Tuple


 def kill_chromium_session(chrome_launch_process: subprocess.Popen, chrome_dir: Path) -> None:
-    """Clean up Chromium process launched by launch_chromium_session.
-
-    Uses chrome_utils.js killChrome for proper process group handling.
-
-    Args:
-        chrome_launch_process: The Popen object from launch_chromium_session
-        chrome_dir: The chrome directory containing chrome.pid
-    """
-    # First try to terminate the launch process gracefully
+    """Clean up Chromium process."""
    try:
        chrome_launch_process.send_signal(signal.SIGTERM)
        chrome_launch_process.wait(timeout=5)
    except Exception:
        pass

-    # Read PID and use JS to kill with proper cleanup
    chrome_pid_file = chrome_dir / 'chrome.pid'
    if chrome_pid_file.exists():
        try:
@@ -661,22 +280,7 @@ def kill_chromium_session(chrome_launch_process: subprocess.Popen, chrome_dir: P

@contextmanager
 def chromium_session(env: dict, chrome_dir: Path, crawl_id: str):
-    """Context manager for Chromium sessions with automatic cleanup.
-
-    Usage:
-        with chromium_session(env, chrome_dir, 'test-crawl') as (process, cdp_url):
-            # Use cdp_url to connect with puppeteer
-            pass
-        # Chromium automatically cleaned up
-
-    Args:
-        env: Environment dict (from setup_test_env)
-        chrome_dir: Directory for Chrome files
-        crawl_id: ID for the crawl
-
-    Yields:
-        Tuple of (chrome_launch_process, cdp_url)
-    """
+    """Context manager for Chromium sessions with automatic cleanup."""
    chrome_launch_process = None
    try:
        chrome_launch_process, cdp_url = launch_chromium_session(env, chrome_dir, crawl_id)
@@ -687,8 +291,7 @@ def chromium_session(env: dict, chrome_dir: Path, crawl_id: str):


 # =============================================================================
-# Tab-based Test Helpers
-# Used by tab-based tests (infiniscroll, modalcloser)
+# Tab-based Test Helpers (infiniscroll, modalcloser)
 # =============================================================================


@@ -700,25 +303,7 @@ def setup_chrome_session(
    navigate: bool = True,
    timeout: int = 15,
 ) -> Tuple[subprocess.Popen, int, Path]:
-    """Set up a Chrome session with tab and optional navigation.
-
-    Creates the directory structure, launches Chrome, creates a tab,
-    and optionally navigates to the test URL.
-
-    Args:
-        tmpdir: Temporary directory for test files
-        crawl_id: ID to use for the crawl
-        snapshot_id: ID to use for the snapshot
-        test_url: URL to navigate to (if navigate=True)
-        navigate: Whether to navigate to the URL after creating tab
-        timeout: Seconds to wait for Chrome to start
-
-    Returns:
-        Tuple of (chrome_launch_process, chrome_pid, snapshot_chrome_dir)
-
-    Raises:
-        RuntimeError: If Chrome fails to start or tab creation fails
-    """
+    """Set up Chrome session with tab. Returns (process, pid, snapshot_chrome_dir)."""
    crawl_dir = Path(tmpdir) / 'crawl'
    crawl_dir.mkdir(exist_ok=True)
    chrome_dir = crawl_dir / 'chrome'
@@ -727,21 +312,15 @@ def setup_chrome_session(
    env = get_test_env()
    env['CHROME_HEADLESS'] = 'true'

-    # Launch Chrome at crawl level
    chrome_launch_process = subprocess.Popen(
        ['node', str(CHROME_LAUNCH_HOOK), f'--crawl-id={crawl_id}'],
-        cwd=str(chrome_dir),
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-        text=True,
-        env=env
+        cwd=str(chrome_dir), stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, env=env
    )

-    # Wait for Chrome to launch
-    for i in range(timeout):
+    for _ in range(timeout):
        if chrome_launch_process.poll() is not None:
            stdout, stderr = chrome_launch_process.communicate()
-            raise RuntimeError(f"Chrome launch failed:\nStdout: {stdout}\nStderr: {stderr}")
+            raise RuntimeError(f"Chrome launch failed:\n{stdout}\n{stderr}")
        if (chrome_dir / 'cdp_url.txt').exists():
            break
        time.sleep(1)
@@ -751,36 +330,25 @@ def setup_chrome_session(

    chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())

-    # Create snapshot directory structure
    snapshot_dir = Path(tmpdir) / 'snapshot'
    snapshot_dir.mkdir(exist_ok=True)
    snapshot_chrome_dir = snapshot_dir / 'chrome'
    snapshot_chrome_dir.mkdir(exist_ok=True)

-    # Create tab
    tab_env = env.copy()
    tab_env['CRAWL_OUTPUT_DIR'] = str(crawl_dir)
    result = subprocess.run(
        ['node', str(CHROME_TAB_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}', f'--crawl-id={crawl_id}'],
-        cwd=str(snapshot_chrome_dir),
-        capture_output=True,
-        text=True,
-        timeout=60,
-        env=tab_env
+        cwd=str(snapshot_chrome_dir), capture_output=True, text=True, timeout=60, env=tab_env
    )
    if result.returncode != 0:
        cleanup_chrome(chrome_launch_process, chrome_pid)
        raise RuntimeError(f"Tab creation failed: {result.stderr}")

-    # Navigate to URL if requested
    if navigate and CHROME_NAVIGATE_HOOK and test_url != 'about:blank':
        result = subprocess.run(
            ['node', str(CHROME_NAVIGATE_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-            cwd=str(snapshot_chrome_dir),
-            capture_output=True,
-            text=True,
-            timeout=120,
-            env=env
+            cwd=str(snapshot_chrome_dir), capture_output=True, text=True, timeout=120, env=env
        )
        if result.returncode != 0:
            cleanup_chrome(chrome_launch_process, chrome_pid)
@@ -790,26 +358,12 @@ def setup_chrome_session(


 def cleanup_chrome(chrome_launch_process: subprocess.Popen, chrome_pid: int, chrome_dir: Optional[Path] = None) -> None:
-    """Clean up Chrome processes using chrome_utils.js killChrome.
-
-    Uses the centralized kill logic from chrome_utils.js which handles:
-    - SIGTERM then SIGKILL
-    - Process group killing
-    - Zombie process cleanup
-
-    Args:
-        chrome_launch_process: The Popen object for the chrome launch hook
-        chrome_pid: The PID of the Chrome process
-        chrome_dir: Optional path to chrome output directory
-    """
-    # First try to terminate the launch process gracefully
+    """Clean up Chrome processes."""
    try:
        chrome_launch_process.send_signal(signal.SIGTERM)
        chrome_launch_process.wait(timeout=5)
    except Exception:
        pass
-
-    # Use JS to kill Chrome with proper process group handling
    kill_chrome(chrome_pid, str(chrome_dir) if chrome_dir else None)


@@ -822,35 +376,12 @@ def chrome_session(
    navigate: bool = True,
    timeout: int = 15,
 ):
-    """Context manager for Chrome sessions with automatic cleanup.
-
-    Usage:
-        with chrome_session(tmpdir, test_url='https://example.com') as (process, pid, chrome_dir):
-            # Run tests with chrome session
-            pass
-        # Chrome automatically cleaned up
-
-    Args:
-        tmpdir: Temporary directory for test files
-        crawl_id: ID to use for the crawl
-        snapshot_id: ID to use for the snapshot
-        test_url: URL to navigate to (if navigate=True)
-        navigate: Whether to navigate to the URL after creating tab
-        timeout: Seconds to wait for Chrome to start
-
-    Yields:
-        Tuple of (chrome_launch_process, chrome_pid, snapshot_chrome_dir)
-    """
+    """Context manager for Chrome sessions with automatic cleanup."""
    chrome_launch_process = None
    chrome_pid = None
    try:
        chrome_launch_process, chrome_pid, snapshot_chrome_dir = setup_chrome_session(
-            tmpdir=tmpdir,
-            crawl_id=crawl_id,
-            snapshot_id=snapshot_id,
-            test_url=test_url,
-            navigate=navigate,
-            timeout=timeout,
+            tmpdir=tmpdir, crawl_id=crawl_id, snapshot_id=snapshot_id, test_url=test_url, navigate=navigate, timeout=timeout
        )
        yield chrome_launch_process, chrome_pid, snapshot_chrome_dir
    finally:
--- a/archivebox/plugins/dom/tests/test_dom.py
+++ b/archivebox/plugins/dom/tests/test_dom.py
@@ -22,18 +22,16 @@ import pytest

 from archivebox.plugins.chrome.tests.chrome_test_helpers import (
    get_test_env,
-    get_plugin_dir,
-    get_hook_script,
+    get_lib_dir,
+    get_node_modules_dir,
    run_hook_and_parse,
-    LIB_DIR,
-    NODE_MODULES_DIR,
    PLUGINS_ROOT,
 )


-PLUGIN_DIR = get_plugin_dir(__file__)
-DOM_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_dom.*')
-NPM_PROVIDER_HOOK = get_hook_script(PLUGINS_ROOT / 'npm', 'on_Binary__install_using_npm_provider.py')
+PLUGIN_DIR = Path(__file__).parent.parent
+DOM_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_dom.*'), None)
+NPM_PROVIDER_HOOK = next((PLUGINS_ROOT / 'npm').glob('on_Binary__install_using_npm_provider.py'), None)
 TEST_URL = 'https://example.com'


--- a/archivebox/plugins/favicon/tests/test_favicon.py
+++ b/archivebox/plugins/favicon/tests/test_favicon.py
@@ -20,15 +20,11 @@ from pathlib import Path

 import pytest

-from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-    get_plugin_dir,
-    get_hook_script,
-    parse_jsonl_output,
-)
+from archivebox.plugins.chrome.tests.chrome_test_helpers import parse_jsonl_output


-PLUGIN_DIR = get_plugin_dir(__file__)
-FAVICON_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_favicon.*')
+PLUGIN_DIR = Path(__file__).parent.parent
+FAVICON_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_favicon.*'), None)
 TEST_URL = 'https://example.com'


--- a/archivebox/plugins/mercury/tests/test_mercury.py
+++ b/archivebox/plugins/mercury/tests/test_mercury.py
@@ -18,15 +18,11 @@ import tempfile
 from pathlib import Path
 import pytest

-from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-    get_plugin_dir,
-    get_hook_script,
-    PLUGINS_ROOT,
-)
+from archivebox.plugins.chrome.tests.chrome_test_helpers import PLUGINS_ROOT


-PLUGIN_DIR = get_plugin_dir(__file__)
-MERCURY_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_mercury.*')
+PLUGIN_DIR = Path(__file__).parent.parent
+MERCURY_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_mercury.*'), None)
 TEST_URL = 'https://example.com'

 def test_hook_script_exists():
--- a/archivebox/plugins/pdf/tests/test_pdf.py
+++ b/archivebox/plugins/pdf/tests/test_pdf.py
@@ -23,17 +23,15 @@ import pytest

 from archivebox.plugins.chrome.tests.chrome_test_helpers import (
    get_test_env,
-    get_plugin_dir,
-    get_hook_script,
+    get_lib_dir,
+    get_node_modules_dir,
    run_hook_and_parse,
-    LIB_DIR,
-    NODE_MODULES_DIR,
    PLUGINS_ROOT,
 )


-PLUGIN_DIR = get_plugin_dir(__file__)
-PDF_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_pdf.*')
+PLUGIN_DIR = Path(__file__).parent.parent
+PDF_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_pdf.*'), None)
 NPM_PROVIDER_HOOK = PLUGINS_ROOT / 'npm' / 'on_Binary__install_using_npm_provider.py'
 TEST_URL = 'https://example.com'

--- a/archivebox/plugins/readability/tests/test_readability.py
+++ b/archivebox/plugins/readability/tests/test_readability.py
@@ -17,15 +17,11 @@ from pathlib import Path

 import pytest

-from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-    get_plugin_dir,
-    get_hook_script,
-    PLUGINS_ROOT,
-)
+from archivebox.plugins.chrome.tests.chrome_test_helpers import PLUGINS_ROOT


-PLUGIN_DIR = get_plugin_dir(__file__)
-READABILITY_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_readability.*')
+PLUGIN_DIR = Path(__file__).parent.parent
+READABILITY_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_readability.*'), None)
 TEST_URL = 'https://example.com'


--- a/archivebox/plugins/screenshot/tests/test_screenshot.py
+++ b/archivebox/plugins/screenshot/tests/test_screenshot.py
@@ -22,16 +22,14 @@ import pytest

 from archivebox.plugins.chrome.tests.chrome_test_helpers import (
    get_test_env,
-    get_plugin_dir,
-    get_hook_script,
+    get_lib_dir,
+    get_node_modules_dir,
    run_hook_and_parse,
-    LIB_DIR,
-    NODE_MODULES_DIR,
 )


-PLUGIN_DIR = get_plugin_dir(__file__)
-SCREENSHOT_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_screenshot.*')
+PLUGIN_DIR = Path(__file__).parent.parent
+SCREENSHOT_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_screenshot.*'), None)
 TEST_URL = 'https://example.com'


--- a/archivebox/plugins/singlefile/tests/test_singlefile.py
+++ b/archivebox/plugins/singlefile/tests/test_singlefile.py
@@ -20,15 +20,13 @@ import pytest

 from archivebox.plugins.chrome.tests.chrome_test_helpers import (
    get_test_env,
-    get_plugin_dir,
-    get_hook_script,
    setup_chrome_session,
    cleanup_chrome,
 )


-PLUGIN_DIR = get_plugin_dir(__file__)
-SNAPSHOT_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_singlefile.py')
+PLUGIN_DIR = Path(__file__).parent.parent
+SNAPSHOT_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_singlefile.py'), None)
 TEST_URL = "https://example.com"


--- a/archivebox/plugins/title/tests/test_title.py
+++ b/archivebox/plugins/title/tests/test_title.py
@@ -19,15 +19,11 @@ from pathlib import Path

 import pytest

-from archivebox.plugins.chrome.tests.chrome_test_helpers import (
-    get_plugin_dir,
-    get_hook_script,
-    parse_jsonl_output,
-)
+from archivebox.plugins.chrome.tests.chrome_test_helpers import parse_jsonl_output


-PLUGIN_DIR = get_plugin_dir(__file__)
-TITLE_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_title.*')
+PLUGIN_DIR = Path(__file__).parent.parent
+TITLE_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_title.*'), None)
 TEST_URL = 'https://example.com'