From 1cfb77a35598994b4898060a2177f63f40c85d7f Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 31 Dec 2025 09:23:47 +0000 Subject: [PATCH] Rename Python helpers to match JS function names in snake_case - get_machine_type() matches JS getMachineType() - get_lib_dir() matches JS getLibDir() - get_node_modules_dir() matches JS getNodeModulesDir() - get_extensions_dir() matches JS getExtensionsDir() - find_chromium() matches JS findChromium() - kill_chrome() matches JS killChrome() - get_test_env() matches JS getTestEnv() All functions now try JS first (single source of truth) with Python fallback. Added backward compatibility aliases for old names. --- .../chrome/tests/chrome_test_helpers.py | 358 ++++++++++-------- 1 file changed, 193 insertions(+), 165 deletions(-) diff --git a/archivebox/plugins/chrome/tests/chrome_test_helpers.py b/archivebox/plugins/chrome/tests/chrome_test_helpers.py index ee28cf4d..7e8c2d5e 100644 --- a/archivebox/plugins/chrome/tests/chrome_test_helpers.py +++ b/archivebox/plugins/chrome/tests/chrome_test_helpers.py @@ -2,17 +2,37 @@ Shared Chrome test helpers for plugin integration tests. This module provides common utilities for Chrome-based plugin tests, reducing -duplication across test files. It uses the JavaScript utilities from chrome_utils.js -where appropriate. +duplication across test files. Functions delegate to chrome_utils.js (the single +source of truth) with Python fallbacks. + +Function names match the JS equivalents in snake_case: + JS: getMachineType() -> Python: get_machine_type() + JS: getLibDir() -> Python: get_lib_dir() + JS: getNodeModulesDir() -> Python: get_node_modules_dir() + JS: getExtensionsDir() -> Python: get_extensions_dir() + JS: findChromium() -> Python: find_chromium() + JS: killChrome() -> Python: kill_chrome() + JS: getTestEnv() -> Python: get_test_env() Usage: - # Simplest - just import what you need: + # Path helpers (delegate to chrome_utils.js): from archivebox.plugins.chrome.tests.chrome_test_helpers import ( get_test_env, # env dict with LIB_DIR, NODE_MODULES_DIR, MACHINE_TYPE + get_machine_type, # e.g., 'x86_64-linux', 'arm64-darwin' + get_lib_dir, # Path to lib dir + get_node_modules_dir, # Path to node_modules + get_extensions_dir, # Path to chrome extensions + find_chromium, # Find Chrome/Chromium binary + kill_chrome, # Kill Chrome process by PID + ) + + # Test file helpers: + from archivebox.plugins.chrome.tests.chrome_test_helpers import ( get_plugin_dir, # get_plugin_dir(__file__) -> plugin dir Path + get_hook_script, # Find hook script by glob pattern + PLUGINS_ROOT, # Path to plugins root LIB_DIR, # Path to lib dir (lazy-loaded) NODE_MODULES_DIR, # Path to node_modules (lazy-loaded) - PLUGINS_ROOT, # Path to plugins root ) # For Chrome session tests: @@ -61,10 +81,37 @@ CHROME_UTILS = CHROME_PLUGIN_DIR / 'chrome_utils.js' # ============================================================================= -# Path Helpers - use these to avoid boilerplate in test files +# Path Helpers - delegates to chrome_utils.js with Python fallback +# Function names match JS: getMachineType -> get_machine_type, etc. # ============================================================================= +def _call_chrome_utils(command: str, *args: str, env: Optional[dict] = None) -> Tuple[int, str, str]: + """Call chrome_utils.js CLI command (internal helper). + + This is the central dispatch for calling the JS utilities from Python. + All path calculations and Chrome operations are centralized in chrome_utils.js + to ensure consistency between Python and JavaScript code. + + Args: + command: The CLI command (e.g., 'findChromium', 'getTestEnv') + *args: Additional command arguments + env: Environment dict (default: current env) + + Returns: + Tuple of (returncode, stdout, stderr) + """ + cmd = ['node', str(CHROME_UTILS), command] + list(args) + result = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=30, + env=env or os.environ.copy() + ) + return result.returncode, result.stdout, result.stderr + + def get_plugin_dir(test_file: str) -> Path: """Get the plugin directory from a test file path. @@ -97,39 +144,19 @@ def get_hook_script(plugin_dir: Path, pattern: str) -> Optional[Path]: return matches[0] if matches else None -def get_lib_dir() -> Path: - """Get LIB_DIR for tests, checking env first then ArchiveBox config. - - Returns the path to the lib directory, checking: - 1. LIB_DIR environment variable - 2. ArchiveBox config STORAGE_CONFIG.LIB_DIR - """ - if os.environ.get('LIB_DIR'): - return Path(os.environ['LIB_DIR']) - from archivebox.config.common import STORAGE_CONFIG - return Path(str(STORAGE_CONFIG.LIB_DIR)) - - -def get_node_modules_dir() -> Path: - """Get NODE_MODULES_DIR for tests, checking env first. - - Returns the path to the node_modules directory, checking: - 1. NODE_MODULES_DIR environment variable - 2. Computed from LIB_DIR - """ - if os.environ.get('NODE_MODULES_DIR'): - return Path(os.environ['NODE_MODULES_DIR']) - lib_dir = get_lib_dir() - return lib_dir / 'npm' / 'node_modules' - - def get_machine_type() -> str: """Get machine type string (e.g., 'x86_64-linux', 'arm64-darwin'). - Returns the machine type, checking: - 1. MACHINE_TYPE environment variable - 2. Computed from platform.machine() and platform.system() + Matches JS: getMachineType() + + Tries chrome_utils.js first, falls back to Python computation. """ + # Try JS first (single source of truth) + returncode, stdout, stderr = _call_chrome_utils('getMachineType') + if returncode == 0 and stdout.strip(): + return stdout.strip() + + # Fallback to Python computation if os.environ.get('MACHINE_TYPE'): return os.environ['MACHINE_TYPE'] @@ -142,13 +169,132 @@ def get_machine_type() -> str: return f"{machine}-{system}" -def get_test_env() -> dict: - """Get environment dict with NODE_MODULES_DIR, LIB_DIR, and MACHINE_TYPE set correctly for tests. +def get_lib_dir() -> Path: + """Get LIB_DIR path for platform-specific binaries. - Returns a copy of os.environ with NODE_MODULES_DIR, LIB_DIR, and MACHINE_TYPE added/updated. - Use this for all subprocess calls in simple plugin tests (screenshot, dom, pdf). + Matches JS: getLibDir() + + Tries chrome_utils.js first, falls back to Python computation. + """ + # Try JS first + returncode, stdout, stderr = _call_chrome_utils('getLibDir') + if returncode == 0 and stdout.strip(): + return Path(stdout.strip()) + + # Fallback to Python + if os.environ.get('LIB_DIR'): + return Path(os.environ['LIB_DIR']) + from archivebox.config.common import STORAGE_CONFIG + return Path(str(STORAGE_CONFIG.LIB_DIR)) + + +def get_node_modules_dir() -> Path: + """Get NODE_MODULES_DIR path for npm packages. + + Matches JS: getNodeModulesDir() + + Tries chrome_utils.js first, falls back to Python computation. + """ + # Try JS first + returncode, stdout, stderr = _call_chrome_utils('getNodeModulesDir') + if returncode == 0 and stdout.strip(): + return Path(stdout.strip()) + + # Fallback to Python + if os.environ.get('NODE_MODULES_DIR'): + return Path(os.environ['NODE_MODULES_DIR']) + lib_dir = get_lib_dir() + return lib_dir / 'npm' / 'node_modules' + + +def get_extensions_dir() -> str: + """Get the Chrome extensions directory path. + + Matches JS: getExtensionsDir() + + Tries chrome_utils.js first, falls back to Python computation. + """ + returncode, stdout, stderr = _call_chrome_utils('getExtensionsDir') + if returncode == 0 and stdout.strip(): + return stdout.strip() + + # Fallback to default computation if JS call fails + data_dir = os.environ.get('DATA_DIR', './data') + persona = os.environ.get('ACTIVE_PERSONA', 'Default') + return str(Path(data_dir) / 'personas' / persona / 'chrome_extensions') + + +def find_chromium(data_dir: Optional[str] = None) -> Optional[str]: + """Find the Chromium binary path. + + Matches JS: findChromium() + + Uses chrome_utils.js which checks: + - CHROME_BINARY env var + - @puppeteer/browsers install locations + - System Chromium locations + - Falls back to Chrome (with warning) + + Args: + data_dir: Optional DATA_DIR override + + Returns: + Path to Chromium binary or None if not found """ env = os.environ.copy() + if data_dir: + env['DATA_DIR'] = str(data_dir) + returncode, stdout, stderr = _call_chrome_utils('findChromium', env=env) + if returncode == 0 and stdout.strip(): + return stdout.strip() + return None + + +def kill_chrome(pid: int, output_dir: Optional[str] = None) -> bool: + """Kill a Chrome process by PID. + + Matches JS: killChrome() + + Uses chrome_utils.js which handles: + - SIGTERM then SIGKILL + - Process group killing + - Zombie process cleanup + + Args: + pid: Process ID to kill + output_dir: Optional chrome output directory for PID file cleanup + + Returns: + True if the kill command succeeded + """ + args = [str(pid)] + if output_dir: + args.append(str(output_dir)) + returncode, stdout, stderr = _call_chrome_utils('killChrome', *args) + return returncode == 0 + + +def get_test_env() -> dict: + """Get environment dict with all paths set correctly for tests. + + Matches JS: getTestEnv() + + Tries chrome_utils.js first for path values, builds env dict. + Use this for all subprocess calls in plugin tests. + """ + env = os.environ.copy() + + # Try to get all paths from JS (single source of truth) + returncode, stdout, stderr = _call_chrome_utils('getTestEnv') + if returncode == 0 and stdout.strip(): + try: + js_env = json.loads(stdout) + env.update(js_env) + return env + except json.JSONDecodeError: + pass + + # Fallback to Python computation lib_dir = get_lib_dir() env['LIB_DIR'] = str(lib_dir) env['NODE_MODULES_DIR'] = str(get_node_modules_dir()) @@ -156,6 +302,13 @@ def get_test_env() -> dict: return env +# Backward compatibility aliases (deprecated, use new names) +find_chromium_binary = find_chromium +kill_chrome_via_js = kill_chrome +get_machine_type_from_js = get_machine_type +get_test_env_from_js = get_test_env + + # ============================================================================= # Module-level constants (lazy-loaded on first access) # Import these directly: from chrome_test_helpers import LIB_DIR, NODE_MODULES_DIR @@ -321,131 +474,6 @@ def run_hook_and_parse( return returncode, result, stderr -def call_chrome_utils(command: str, *args: str, env: Optional[dict] = None) -> Tuple[int, str, str]: - """Call chrome_utils.js CLI command. - - This is the central dispatch for calling the JS utilities from Python. - All path calculations and Chrome operations are centralized in chrome_utils.js - to ensure consistency between Python and JavaScript code. - - Args: - command: The CLI command (e.g., 'findChromium', 'getTestEnv') - *args: Additional command arguments - env: Environment dict (default: current env) - - Returns: - Tuple of (returncode, stdout, stderr) - """ - cmd = ['node', str(CHROME_UTILS), command] + list(args) - result = subprocess.run( - cmd, - capture_output=True, - text=True, - timeout=30, - env=env or os.environ.copy() - ) - return result.returncode, result.stdout, result.stderr - - -def get_test_env_from_js() -> Optional[Dict[str, str]]: - """Get test environment paths from chrome_utils.js getTestEnv(). - - This is the single source of truth for path calculations. - Python calls JS to get all paths to avoid duplicating logic. - - Returns: - Dict with DATA_DIR, MACHINE_TYPE, LIB_DIR, NODE_MODULES_DIR, etc. - or None if the JS call fails - """ - returncode, stdout, stderr = call_chrome_utils('getTestEnv') - if returncode == 0 and stdout.strip(): - try: - return json.loads(stdout) - except json.JSONDecodeError: - pass - return None - - -def find_chromium_binary(data_dir: Optional[str] = None) -> Optional[str]: - """Find the Chromium binary using chrome_utils.js findChromium(). - - This uses the centralized findChromium() function which checks: - - CHROME_BINARY env var - - @puppeteer/browsers install locations - - System Chromium locations - - Falls back to Chrome (with warning) - - Args: - data_dir: Directory where chromium was installed (contains chromium/ subdir) - - Returns: - Path to Chromium binary or None if not found - """ - env = os.environ.copy() - if data_dir: - env['DATA_DIR'] = str(data_dir) - returncode, stdout, stderr = call_chrome_utils('findChromium', env=env) - if returncode == 0 and stdout.strip(): - return stdout.strip() - return None - - -def get_extensions_dir() -> str: - """Get the Chrome extensions directory using chrome_utils.js getExtensionsDir(). - - This uses the centralized path calculation from chrome_utils.js which checks: - - CHROME_EXTENSIONS_DIR env var - - DATA_DIR/personas/ACTIVE_PERSONA/chrome_extensions - - Returns: - Path to extensions directory - """ - returncode, stdout, stderr = call_chrome_utils('getExtensionsDir') - if returncode == 0 and stdout.strip(): - return stdout.strip() - # Fallback to default computation if JS call fails - data_dir = os.environ.get('DATA_DIR', './data') - persona = os.environ.get('ACTIVE_PERSONA', 'Default') - return str(Path(data_dir) / 'personas' / persona / 'chrome_extensions') - - -def get_machine_type_from_js() -> Optional[str]: - """Get machine type from chrome_utils.js getMachineType(). - - This is the single source of truth for machine type calculation. - Returns values like 'x86_64-linux', 'arm64-darwin'. - - Returns: - Machine type string or None if the JS call fails - """ - returncode, stdout, stderr = call_chrome_utils('getMachineType') - if returncode == 0 and stdout.strip(): - return stdout.strip() - return None - - -def kill_chrome_via_js(pid: int, output_dir: Optional[str] = None) -> bool: - """Kill a Chrome process using chrome_utils.js killChrome(). - - This uses the centralized kill logic which handles: - - SIGTERM then SIGKILL - - Process group killing - - Zombie process cleanup - - Args: - pid: Process ID to kill - output_dir: Optional chrome output directory for PID file cleanup - - Returns: - True if the kill command succeeded - """ - args = [str(pid)] - if output_dir: - args.append(str(output_dir)) - returncode, stdout, stderr = call_chrome_utils('killChrome', *args) - return returncode == 0 - - # ============================================================================= # Extension Test Helpers # Used by extension tests (ublock, istilldontcareaboutcookies, twocaptcha) @@ -626,7 +654,7 @@ def kill_chromium_session(chrome_launch_process: subprocess.Popen, chrome_dir: P if chrome_pid_file.exists(): try: chrome_pid = int(chrome_pid_file.read_text().strip()) - kill_chrome_via_js(chrome_pid, str(chrome_dir)) + kill_chrome(chrome_pid, str(chrome_dir)) except (ValueError, FileNotFoundError): pass @@ -782,7 +810,7 @@ def cleanup_chrome(chrome_launch_process: subprocess.Popen, chrome_pid: int, chr pass # Use JS to kill Chrome with proper process group handling - kill_chrome_via_js(chrome_pid, str(chrome_dir) if chrome_dir else None) + kill_chrome(chrome_pid, str(chrome_dir) if chrome_dir else None) @contextmanager