""" Tests for chrome_test_helpers.py functions. These tests verify the Python helper functions used across Chrome plugin tests. """ import os import pytest import tempfile from pathlib import Path from archivebox.plugins.chrome.tests.chrome_test_helpers import ( get_test_env, get_machine_type, get_lib_dir, get_node_modules_dir, get_extensions_dir, find_chromium_binary, get_plugin_dir, get_hook_script, parse_jsonl_output, ) def test_get_machine_type(): """Test get_machine_type() returns valid format.""" machine_type = get_machine_type() assert isinstance(machine_type, str) assert '-' in machine_type, "Machine type should be in format: arch-os" # Should be one of the expected formats assert any(x in machine_type for x in ['arm64', 'x86_64']), "Should contain valid architecture" assert any(x in machine_type for x in ['darwin', 'linux', 'win32']), "Should contain valid OS" def test_get_lib_dir_with_env_var(): """Test get_lib_dir() respects LIB_DIR env var.""" with tempfile.TemporaryDirectory() as tmpdir: custom_lib = Path(tmpdir) / 'custom_lib' custom_lib.mkdir() old_lib_dir = os.environ.get('LIB_DIR') try: os.environ['LIB_DIR'] = str(custom_lib) lib_dir = get_lib_dir() assert lib_dir == custom_lib finally: if old_lib_dir: os.environ['LIB_DIR'] = old_lib_dir else: os.environ.pop('LIB_DIR', None) def test_get_node_modules_dir_with_env_var(): """Test get_node_modules_dir() respects NODE_MODULES_DIR env var.""" with tempfile.TemporaryDirectory() as tmpdir: custom_nm = Path(tmpdir) / 'node_modules' custom_nm.mkdir() old_nm_dir = os.environ.get('NODE_MODULES_DIR') try: os.environ['NODE_MODULES_DIR'] = str(custom_nm) nm_dir = get_node_modules_dir() assert nm_dir == custom_nm finally: if old_nm_dir: os.environ['NODE_MODULES_DIR'] = old_nm_dir else: os.environ.pop('NODE_MODULES_DIR', None) def test_get_extensions_dir_default(): """Test get_extensions_dir() returns expected path format.""" ext_dir = get_extensions_dir() assert isinstance(ext_dir, str) assert 'personas' in ext_dir assert 'chrome_extensions' in ext_dir def test_get_extensions_dir_with_custom_persona(): """Test get_extensions_dir() respects ACTIVE_PERSONA env var.""" old_persona = os.environ.get('ACTIVE_PERSONA') old_data_dir = os.environ.get('DATA_DIR') try: os.environ['ACTIVE_PERSONA'] = 'TestPersona' os.environ['DATA_DIR'] = '/tmp/test' ext_dir = get_extensions_dir() assert 'TestPersona' in ext_dir assert '/tmp/test' in ext_dir finally: if old_persona: os.environ['ACTIVE_PERSONA'] = old_persona else: os.environ.pop('ACTIVE_PERSONA', None) if old_data_dir: os.environ['DATA_DIR'] = old_data_dir else: os.environ.pop('DATA_DIR', None) def test_get_test_env_returns_dict(): """Test get_test_env() returns properly formatted environment dict.""" env = get_test_env() assert isinstance(env, dict) # Should include key paths assert 'MACHINE_TYPE' in env assert 'LIB_DIR' in env assert 'NODE_MODULES_DIR' in env assert 'NODE_PATH' in env # Critical for module resolution assert 'NPM_BIN_DIR' in env assert 'CHROME_EXTENSIONS_DIR' in env # Verify NODE_PATH equals NODE_MODULES_DIR (for Node.js module resolution) assert env['NODE_PATH'] == env['NODE_MODULES_DIR'] def test_get_test_env_paths_are_absolute(): """Test that get_test_env() returns absolute paths.""" env = get_test_env() # All path-like values should be absolute assert Path(env['LIB_DIR']).is_absolute() assert Path(env['NODE_MODULES_DIR']).is_absolute() assert Path(env['NODE_PATH']).is_absolute() def test_find_chromium_binary(): """Test find_chromium_binary() returns a path or None.""" binary = find_chromium_binary() if binary: assert isinstance(binary, str) # Should be an absolute path if found assert os.path.isabs(binary) def test_get_plugin_dir(): """Test get_plugin_dir() finds correct plugin directory.""" # Use this test file's path test_file = __file__ plugin_dir = get_plugin_dir(test_file) assert plugin_dir.exists() assert plugin_dir.is_dir() # Should be the chrome plugin directory assert plugin_dir.name == 'chrome' assert (plugin_dir.parent.name == 'plugins') def test_get_hook_script_finds_existing_hook(): """Test get_hook_script() can find an existing hook.""" from archivebox.plugins.chrome.tests.chrome_test_helpers import CHROME_PLUGIN_DIR # Try to find the chrome launch hook hook = get_hook_script(CHROME_PLUGIN_DIR, 'on_Crawl__*_chrome_launch.*') if hook: # May not exist in all test environments assert hook.exists() assert hook.is_file() assert 'chrome_launch' in hook.name def test_get_hook_script_returns_none_for_missing(): """Test get_hook_script() returns None for non-existent hooks.""" from archivebox.plugins.chrome.tests.chrome_test_helpers import CHROME_PLUGIN_DIR hook = get_hook_script(CHROME_PLUGIN_DIR, 'nonexistent_hook_*_pattern.*') assert hook is None def test_parse_jsonl_output_valid(): """Test parse_jsonl_output() parses valid JSONL.""" jsonl_output = '''{"type": "ArchiveResult", "status": "succeeded", "output": "test1"} {"type": "ArchiveResult", "status": "failed", "error": "test2"} ''' # Returns first match only result = parse_jsonl_output(jsonl_output) assert result is not None assert result['type'] == 'ArchiveResult' assert result['status'] == 'succeeded' assert result['output'] == 'test1' def test_parse_jsonl_output_with_non_json_lines(): """Test parse_jsonl_output() skips non-JSON lines.""" mixed_output = '''Some non-JSON output {"type": "ArchiveResult", "status": "succeeded"} More non-JSON {"type": "ArchiveResult", "status": "failed"} ''' result = parse_jsonl_output(mixed_output) assert result is not None assert result['type'] == 'ArchiveResult' assert result['status'] == 'succeeded' def test_parse_jsonl_output_empty(): """Test parse_jsonl_output() handles empty input.""" result = parse_jsonl_output('') assert result is None def test_parse_jsonl_output_filters_by_type(): """Test parse_jsonl_output() can filter by record type.""" jsonl_output = '''{"type": "LogEntry", "data": "log1"} {"type": "ArchiveResult", "data": "result1"} {"type": "ArchiveResult", "data": "result2"} ''' # Should return first ArchiveResult, not LogEntry result = parse_jsonl_output(jsonl_output, record_type='ArchiveResult') assert result is not None assert result['type'] == 'ArchiveResult' assert result['data'] == 'result1' # First ArchiveResult def test_parse_jsonl_output_filters_custom_type(): """Test parse_jsonl_output() can filter by custom record type.""" jsonl_output = '''{"type": "ArchiveResult", "data": "result1"} {"type": "LogEntry", "data": "log1"} {"type": "ArchiveResult", "data": "result2"} ''' result = parse_jsonl_output(jsonl_output, record_type='LogEntry') assert result is not None assert result['type'] == 'LogEntry' assert result['data'] == 'log1' def test_machine_type_consistency(): """Test that machine type is consistent across calls.""" mt1 = get_machine_type() mt2 = get_machine_type() assert mt1 == mt2, "Machine type should be stable across calls" def test_lib_dir_is_directory(): """Test that lib_dir points to an actual directory when DATA_DIR is set.""" with tempfile.TemporaryDirectory() as tmpdir: old_data_dir = os.environ.get('DATA_DIR') try: os.environ['DATA_DIR'] = tmpdir # Create the expected directory structure machine_type = get_machine_type() lib_dir = Path(tmpdir) / 'lib' / machine_type lib_dir.mkdir(parents=True, exist_ok=True) result = get_lib_dir() # Should return a Path object assert isinstance(result, Path) finally: if old_data_dir: os.environ['DATA_DIR'] = old_data_dir else: os.environ.pop('DATA_DIR', None) if __name__ == '__main__': pytest.main([__file__, '-v'])