mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-01-03 01:15:57 +10:00
Add simpler Chrome test helpers and update test files
New helpers in chrome_test_helpers.py: - get_plugin_dir(__file__) - get plugin dir from test file path - get_hook_script(dir, pattern) - find hook script by glob pattern - run_hook() - run hook script and return (returncode, stdout, stderr) - parse_jsonl_output() - parse JSONL from hook output - run_hook_and_parse() - convenience combo of above two - LIB_DIR, NODE_MODULES_DIR - lazy-loaded module constants - _LazyPath class for deferred path resolution Updated test files to use simpler patterns: - screenshot/tests/test_screenshot.py - dom/tests/test_dom.py - pdf/tests/test_pdf.py - singlefile/tests/test_singlefile.py Before: PLUGIN_DIR = Path(__file__).parent.parent After: PLUGIN_DIR = get_plugin_dir(__file__) Before: LIB_DIR = get_lib_dir(); NODE_MODULES_DIR = LIB_DIR / 'npm' / 'node_modules' After: from chrome_test_helpers import LIB_DIR, NODE_MODULES_DIR
This commit is contained in:
@@ -6,25 +6,33 @@ duplication across test files. It uses the JavaScript utilities from chrome_util
|
||||
where appropriate.
|
||||
|
||||
Usage:
|
||||
# For simple tests (screenshot, dom, pdf, etc.):
|
||||
# Simplest - just import what you need:
|
||||
from archivebox.plugins.chrome.tests.chrome_test_helpers import (
|
||||
get_test_env,
|
||||
get_lib_dir,
|
||||
find_chromium_binary,
|
||||
get_test_env, # env dict with LIB_DIR, NODE_MODULES_DIR, MACHINE_TYPE
|
||||
get_plugin_dir, # get_plugin_dir(__file__) -> plugin dir Path
|
||||
LIB_DIR, # Path to lib dir (lazy-loaded)
|
||||
NODE_MODULES_DIR, # Path to node_modules (lazy-loaded)
|
||||
PLUGINS_ROOT, # Path to plugins root
|
||||
)
|
||||
|
||||
# For extension tests (ublock, istilldontcareaboutcookies, twocaptcha):
|
||||
# For Chrome session tests:
|
||||
from archivebox.plugins.chrome.tests.chrome_test_helpers import (
|
||||
setup_test_env,
|
||||
launch_chromium_session,
|
||||
kill_chromium_session,
|
||||
setup_chrome_session, # Full Chrome + tab setup
|
||||
cleanup_chrome, # Cleanup by PID
|
||||
chrome_session, # Context manager
|
||||
)
|
||||
|
||||
# For tab-based tests (infiniscroll, modalcloser):
|
||||
# For extension tests:
|
||||
from archivebox.plugins.chrome.tests.chrome_test_helpers import (
|
||||
setup_chrome_session,
|
||||
cleanup_chrome,
|
||||
chrome_session,
|
||||
setup_test_env, # Full dir structure + Chrome install
|
||||
launch_chromium_session, # Launch Chrome, return CDP URL
|
||||
kill_chromium_session, # Cleanup Chrome
|
||||
)
|
||||
|
||||
# Run hooks and parse JSONL:
|
||||
from archivebox.plugins.chrome.tests.chrome_test_helpers import (
|
||||
run_hook, # Run hook, return (returncode, stdout, stderr)
|
||||
parse_jsonl_output, # Parse JSONL from stdout
|
||||
)
|
||||
"""
|
||||
|
||||
@@ -36,7 +44,7 @@ import subprocess
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Tuple, Optional
|
||||
from typing import Tuple, Optional, List, Dict, Any
|
||||
from contextlib import contextmanager
|
||||
|
||||
|
||||
@@ -52,6 +60,43 @@ CHROME_NAVIGATE_HOOK = next(CHROME_PLUGIN_DIR.glob('on_Snapshot__*_chrome_naviga
|
||||
CHROME_UTILS = CHROME_PLUGIN_DIR / 'chrome_utils.js'
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Path Helpers - use these to avoid boilerplate in test files
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def get_plugin_dir(test_file: str) -> Path:
|
||||
"""Get the plugin directory from a test file path.
|
||||
|
||||
Usage:
|
||||
PLUGIN_DIR = get_plugin_dir(__file__)
|
||||
|
||||
Args:
|
||||
test_file: The __file__ of the test module (e.g., test_screenshot.py)
|
||||
|
||||
Returns:
|
||||
Path to the plugin directory (e.g., plugins/screenshot/)
|
||||
"""
|
||||
return Path(test_file).parent.parent
|
||||
|
||||
|
||||
def get_hook_script(plugin_dir: Path, pattern: str) -> Optional[Path]:
|
||||
"""Find a hook script in a plugin directory by pattern.
|
||||
|
||||
Usage:
|
||||
HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_screenshot.*')
|
||||
|
||||
Args:
|
||||
plugin_dir: Path to the plugin directory
|
||||
pattern: Glob pattern to match
|
||||
|
||||
Returns:
|
||||
Path to the hook script or None if not found
|
||||
"""
|
||||
matches = list(plugin_dir.glob(pattern))
|
||||
return matches[0] if matches else None
|
||||
|
||||
|
||||
def get_lib_dir() -> Path:
|
||||
"""Get LIB_DIR for tests, checking env first then ArchiveBox config.
|
||||
|
||||
@@ -111,6 +156,171 @@ def get_test_env() -> dict:
|
||||
return env
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Module-level constants (lazy-loaded on first access)
|
||||
# Import these directly: from chrome_test_helpers import LIB_DIR, NODE_MODULES_DIR
|
||||
# =============================================================================
|
||||
|
||||
# These are computed once when first accessed
|
||||
_LIB_DIR: Optional[Path] = None
|
||||
_NODE_MODULES_DIR: Optional[Path] = None
|
||||
|
||||
|
||||
def _get_lib_dir_cached() -> Path:
|
||||
global _LIB_DIR
|
||||
if _LIB_DIR is None:
|
||||
_LIB_DIR = get_lib_dir()
|
||||
return _LIB_DIR
|
||||
|
||||
|
||||
def _get_node_modules_dir_cached() -> Path:
|
||||
global _NODE_MODULES_DIR
|
||||
if _NODE_MODULES_DIR is None:
|
||||
_NODE_MODULES_DIR = get_node_modules_dir()
|
||||
return _NODE_MODULES_DIR
|
||||
|
||||
|
||||
# Module-level constants that can be imported directly
|
||||
# Usage: from chrome_test_helpers import LIB_DIR, NODE_MODULES_DIR
|
||||
class _LazyPath:
|
||||
"""Lazy path that computes value on first access."""
|
||||
def __init__(self, getter):
|
||||
self._getter = getter
|
||||
self._value = None
|
||||
|
||||
def __fspath__(self):
|
||||
if self._value is None:
|
||||
self._value = self._getter()
|
||||
return str(self._value)
|
||||
|
||||
def __truediv__(self, other):
|
||||
if self._value is None:
|
||||
self._value = self._getter()
|
||||
return self._value / other
|
||||
|
||||
def __str__(self):
|
||||
return self.__fspath__()
|
||||
|
||||
def __repr__(self):
|
||||
return f"<LazyPath: {self.__fspath__()}>"
|
||||
|
||||
|
||||
LIB_DIR = _LazyPath(_get_lib_dir_cached)
|
||||
NODE_MODULES_DIR = _LazyPath(_get_node_modules_dir_cached)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Hook Execution Helpers
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def run_hook(
|
||||
hook_script: Path,
|
||||
url: str,
|
||||
snapshot_id: str,
|
||||
cwd: Optional[Path] = None,
|
||||
env: Optional[dict] = None,
|
||||
timeout: int = 60,
|
||||
extra_args: Optional[List[str]] = None,
|
||||
) -> Tuple[int, str, str]:
|
||||
"""Run a hook script and return (returncode, stdout, stderr).
|
||||
|
||||
Usage:
|
||||
returncode, stdout, stderr = run_hook(
|
||||
HOOK_SCRIPT, 'https://example.com', 'test-snap-123',
|
||||
cwd=tmpdir, env=get_test_env()
|
||||
)
|
||||
|
||||
Args:
|
||||
hook_script: Path to the hook script
|
||||
url: URL to process
|
||||
snapshot_id: Snapshot ID
|
||||
cwd: Working directory (default: current dir)
|
||||
env: Environment dict (default: get_test_env())
|
||||
timeout: Timeout in seconds
|
||||
extra_args: Additional arguments to pass
|
||||
|
||||
Returns:
|
||||
Tuple of (returncode, stdout, stderr)
|
||||
"""
|
||||
if env is None:
|
||||
env = get_test_env()
|
||||
|
||||
# Determine interpreter based on file extension
|
||||
if hook_script.suffix == '.py':
|
||||
cmd = ['python', str(hook_script)]
|
||||
elif hook_script.suffix == '.js':
|
||||
cmd = ['node', str(hook_script)]
|
||||
else:
|
||||
cmd = [str(hook_script)]
|
||||
|
||||
cmd.extend([f'--url={url}', f'--snapshot-id={snapshot_id}'])
|
||||
if extra_args:
|
||||
cmd.extend(extra_args)
|
||||
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
cwd=str(cwd) if cwd else None,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=env,
|
||||
timeout=timeout
|
||||
)
|
||||
return result.returncode, result.stdout, result.stderr
|
||||
|
||||
|
||||
def parse_jsonl_output(stdout: str, record_type: str = 'ArchiveResult') -> Optional[Dict[str, Any]]:
|
||||
"""Parse JSONL output from hook stdout and return the specified record type.
|
||||
|
||||
Usage:
|
||||
result = parse_jsonl_output(stdout)
|
||||
if result and result['status'] == 'succeeded':
|
||||
print("Success!")
|
||||
|
||||
Args:
|
||||
stdout: The stdout from a hook execution
|
||||
record_type: The 'type' field to look for (default: 'ArchiveResult')
|
||||
|
||||
Returns:
|
||||
The parsed JSON dict or None if not found
|
||||
"""
|
||||
for line in stdout.strip().split('\n'):
|
||||
line = line.strip()
|
||||
if not line.startswith('{'):
|
||||
continue
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get('type') == record_type:
|
||||
return record
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def run_hook_and_parse(
|
||||
hook_script: Path,
|
||||
url: str,
|
||||
snapshot_id: str,
|
||||
cwd: Optional[Path] = None,
|
||||
env: Optional[dict] = None,
|
||||
timeout: int = 60,
|
||||
extra_args: Optional[List[str]] = None,
|
||||
) -> Tuple[int, Optional[Dict[str, Any]], str]:
|
||||
"""Run a hook and parse its JSONL output.
|
||||
|
||||
Convenience function combining run_hook() and parse_jsonl_output().
|
||||
|
||||
Returns:
|
||||
Tuple of (returncode, parsed_result_or_none, stderr)
|
||||
"""
|
||||
returncode, stdout, stderr = run_hook(
|
||||
hook_script, url, snapshot_id,
|
||||
cwd=cwd, env=env, timeout=timeout, extra_args=extra_args
|
||||
)
|
||||
result = parse_jsonl_output(stdout)
|
||||
return returncode, result, stderr
|
||||
|
||||
|
||||
def find_chromium_binary(data_dir: Optional[str] = None) -> Optional[str]:
|
||||
"""Find the Chromium binary using chrome_utils.js findChromium().
|
||||
|
||||
|
||||
@@ -22,19 +22,20 @@ import pytest
|
||||
|
||||
from archivebox.plugins.chrome.tests.chrome_test_helpers import (
|
||||
get_test_env,
|
||||
get_lib_dir,
|
||||
get_plugin_dir,
|
||||
get_hook_script,
|
||||
run_hook_and_parse,
|
||||
LIB_DIR,
|
||||
NODE_MODULES_DIR,
|
||||
PLUGINS_ROOT,
|
||||
)
|
||||
|
||||
|
||||
PLUGIN_DIR = Path(__file__).parent.parent
|
||||
PLUGINS_ROOT = PLUGIN_DIR.parent
|
||||
DOM_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_dom.*'), None)
|
||||
NPM_PROVIDER_HOOK = next((PLUGINS_ROOT / 'npm').glob('on_Binary__install_using_npm_provider.py'), None)
|
||||
PLUGIN_DIR = get_plugin_dir(__file__)
|
||||
DOM_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_dom.*')
|
||||
NPM_PROVIDER_HOOK = get_hook_script(PLUGINS_ROOT / 'npm', 'on_Binary__install_using_npm_provider.py')
|
||||
TEST_URL = 'https://example.com'
|
||||
|
||||
LIB_DIR = get_lib_dir()
|
||||
NODE_MODULES_DIR = LIB_DIR / 'npm' / 'node_modules'
|
||||
|
||||
|
||||
def test_hook_script_exists():
|
||||
"""Verify on_Snapshot hook exists."""
|
||||
|
||||
@@ -23,19 +23,20 @@ import pytest
|
||||
|
||||
from archivebox.plugins.chrome.tests.chrome_test_helpers import (
|
||||
get_test_env,
|
||||
get_lib_dir,
|
||||
get_plugin_dir,
|
||||
get_hook_script,
|
||||
run_hook_and_parse,
|
||||
LIB_DIR,
|
||||
NODE_MODULES_DIR,
|
||||
PLUGINS_ROOT,
|
||||
)
|
||||
|
||||
|
||||
PLUGIN_DIR = Path(__file__).parent.parent
|
||||
PLUGINS_ROOT = PLUGIN_DIR.parent
|
||||
PDF_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_pdf.*'), None)
|
||||
PLUGIN_DIR = get_plugin_dir(__file__)
|
||||
PDF_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_pdf.*')
|
||||
NPM_PROVIDER_HOOK = PLUGINS_ROOT / 'npm' / 'on_Binary__install_using_npm_provider.py'
|
||||
TEST_URL = 'https://example.com'
|
||||
|
||||
LIB_DIR = get_lib_dir()
|
||||
NODE_MODULES_DIR = LIB_DIR / 'npm' / 'node_modules'
|
||||
|
||||
|
||||
def test_hook_script_exists():
|
||||
"""Verify on_Snapshot hook exists."""
|
||||
|
||||
@@ -22,18 +22,18 @@ import pytest
|
||||
|
||||
from archivebox.plugins.chrome.tests.chrome_test_helpers import (
|
||||
get_test_env,
|
||||
get_lib_dir,
|
||||
get_plugin_dir,
|
||||
get_hook_script,
|
||||
run_hook_and_parse,
|
||||
LIB_DIR,
|
||||
NODE_MODULES_DIR,
|
||||
)
|
||||
|
||||
|
||||
PLUGIN_DIR = Path(__file__).parent.parent
|
||||
PLUGINS_ROOT = PLUGIN_DIR.parent
|
||||
SCREENSHOT_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_screenshot.*'), None)
|
||||
PLUGIN_DIR = get_plugin_dir(__file__)
|
||||
SCREENSHOT_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_screenshot.*')
|
||||
TEST_URL = 'https://example.com'
|
||||
|
||||
LIB_DIR = get_lib_dir()
|
||||
NODE_MODULES_DIR = LIB_DIR / 'npm' / 'node_modules'
|
||||
|
||||
|
||||
def test_hook_script_exists():
|
||||
"""Verify on_Snapshot hook exists."""
|
||||
|
||||
@@ -20,15 +20,15 @@ import pytest
|
||||
|
||||
from archivebox.plugins.chrome.tests.chrome_test_helpers import (
|
||||
get_test_env,
|
||||
get_plugin_dir,
|
||||
get_hook_script,
|
||||
setup_chrome_session,
|
||||
cleanup_chrome,
|
||||
CHROME_PLUGIN_DIR,
|
||||
)
|
||||
|
||||
|
||||
PLUGIN_DIR = Path(__file__).parent.parent
|
||||
PLUGINS_ROOT = PLUGIN_DIR.parent
|
||||
SNAPSHOT_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_singlefile.py'), None)
|
||||
PLUGIN_DIR = get_plugin_dir(__file__)
|
||||
SNAPSHOT_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_singlefile.py')
|
||||
TEST_URL = "https://example.com"
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user