From ef92a99c4ac854fac29a228119ecfd176ddd1860 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 31 Dec 2025 08:34:35 +0000 Subject: [PATCH] Refactor test_chrome.py to use shared helpers - Add get_machine_type() to chrome_test_helpers.py - Update get_test_env() to include MACHINE_TYPE - Refactor test_chrome.py to import from shared helpers - Removes ~50 lines of duplicate code --- .../chrome/tests/chrome_test_helpers.py | 26 ++++++- .../plugins/chrome/tests/test_chrome.py | 71 ++++--------------- 2 files changed, 36 insertions(+), 61 deletions(-) diff --git a/archivebox/plugins/chrome/tests/chrome_test_helpers.py b/archivebox/plugins/chrome/tests/chrome_test_helpers.py index bccc3bac..935081d5 100644 --- a/archivebox/plugins/chrome/tests/chrome_test_helpers.py +++ b/archivebox/plugins/chrome/tests/chrome_test_helpers.py @@ -78,16 +78,36 @@ def get_node_modules_dir() -> Path: return lib_dir / 'npm' / 'node_modules' -def get_test_env() -> dict: - """Get environment dict with NODE_MODULES_DIR and LIB_DIR set correctly for tests. +def get_machine_type() -> str: + """Get machine type string (e.g., 'x86_64-linux', 'arm64-darwin'). - Returns a copy of os.environ with NODE_MODULES_DIR and LIB_DIR added/updated. + Returns the machine type, checking: + 1. MACHINE_TYPE environment variable + 2. Computed from platform.machine() and platform.system() + """ + if os.environ.get('MACHINE_TYPE'): + return os.environ['MACHINE_TYPE'] + + machine = platform.machine().lower() + system = platform.system().lower() + if machine in ('arm64', 'aarch64'): + machine = 'arm64' + elif machine in ('x86_64', 'amd64'): + machine = 'x86_64' + return f"{machine}-{system}" + + +def get_test_env() -> dict: + """Get environment dict with NODE_MODULES_DIR, LIB_DIR, and MACHINE_TYPE set correctly for tests. + + Returns a copy of os.environ with NODE_MODULES_DIR, LIB_DIR, and MACHINE_TYPE added/updated. Use this for all subprocess calls in simple plugin tests (screenshot, dom, pdf). """ env = os.environ.copy() lib_dir = get_lib_dir() env['LIB_DIR'] = str(lib_dir) env['NODE_MODULES_DIR'] = str(get_node_modules_dir()) + env['MACHINE_TYPE'] = get_machine_type() return env diff --git a/archivebox/plugins/chrome/tests/test_chrome.py b/archivebox/plugins/chrome/tests/test_chrome.py index ca8ad874..d455ba41 100644 --- a/archivebox/plugins/chrome/tests/test_chrome.py +++ b/archivebox/plugins/chrome/tests/test_chrome.py @@ -28,70 +28,25 @@ import tempfile import shutil import platform -PLUGIN_DIR = Path(__file__).parent.parent -CHROME_LAUNCH_HOOK = PLUGIN_DIR / 'on_Crawl__30_chrome_launch.bg.js' -CHROME_TAB_HOOK = PLUGIN_DIR / 'on_Snapshot__20_chrome_tab.bg.js' -CHROME_NAVIGATE_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_chrome_navigate.*'), None) +from archivebox.plugins.chrome.tests.chrome_test_helpers import ( + get_test_env, + get_lib_dir, + get_node_modules_dir, + find_chromium_binary, + CHROME_PLUGIN_DIR as PLUGIN_DIR, + CHROME_LAUNCH_HOOK, + CHROME_TAB_HOOK, + CHROME_NAVIGATE_HOOK, +) -# Get LIB_DIR and MACHINE_TYPE from environment or compute them -def get_lib_dir_and_machine_type(): - """Get or compute LIB_DIR and MACHINE_TYPE for tests.""" - from archivebox.config.paths import get_machine_type - from archivebox.config.common import STORAGE_CONFIG - - lib_dir = os.environ.get('LIB_DIR') or str(STORAGE_CONFIG.LIB_DIR) - machine_type = os.environ.get('MACHINE_TYPE') or get_machine_type() - - return Path(lib_dir), machine_type - -# Setup NODE_MODULES_DIR to find npm packages -LIB_DIR, MACHINE_TYPE = get_lib_dir_and_machine_type() -# Note: LIB_DIR already includes machine_type (e.g., data/lib/arm64-darwin) -NODE_MODULES_DIR = LIB_DIR / 'npm' / 'node_modules' +# Get LIB_DIR and NODE_MODULES_DIR from shared helpers +LIB_DIR = get_lib_dir() +NODE_MODULES_DIR = get_node_modules_dir() NPM_PREFIX = LIB_DIR / 'npm' # Chromium install location (relative to DATA_DIR) CHROMIUM_INSTALL_DIR = Path(os.environ.get('DATA_DIR', '.')).resolve() / 'chromium' -def get_test_env(): - """Get environment with NODE_MODULES_DIR and CHROME_BINARY set correctly.""" - env = os.environ.copy() - env['NODE_MODULES_DIR'] = str(NODE_MODULES_DIR) - env['LIB_DIR'] = str(LIB_DIR) - env['MACHINE_TYPE'] = MACHINE_TYPE - # Ensure CHROME_BINARY is set to Chromium - if 'CHROME_BINARY' not in env: - chromium = find_chromium_binary() - if chromium: - env['CHROME_BINARY'] = chromium - return env - - -def find_chromium_binary(data_dir=None): - """Find the Chromium binary using chrome_utils.js findChromium(). - - This uses the centralized findChromium() function which checks: - - CHROME_BINARY env var - - @puppeteer/browsers install locations (in data_dir/chromium) - - System Chromium locations - - Falls back to Chrome (with warning) - - Args: - data_dir: Directory where chromium was installed (contains chromium/ subdir) - """ - chrome_utils = PLUGIN_DIR / 'chrome_utils.js' - # Use provided data_dir, or fall back to env var, or current dir - search_dir = data_dir or os.environ.get('DATA_DIR', '.') - result = subprocess.run( - ['node', str(chrome_utils), 'findChromium', str(search_dir)], - capture_output=True, - text=True, - timeout=10 - ) - if result.returncode == 0 and result.stdout.strip(): - return result.stdout.strip() - return None - @pytest.fixture(scope="session", autouse=True) def ensure_chromium_and_puppeteer_installed():