mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
Add JS-Python path delegation to reduce Chrome-related duplication
- Add getMachineType, getLibDir, getNodeModulesDir, getTestEnv CLI commands to chrome_utils.js These are now the single source of truth for path calculations - Update chrome_test_helpers.py with call_chrome_utils() dispatcher - Add get_test_env_from_js(), get_machine_type_from_js(), kill_chrome_via_js() helpers - Update cleanup_chrome and kill_chromium_session to use JS killChrome - Remove unused Chrome binary search lists from singlefile hook (~25 lines) - Update readability, mercury, favicon, title tests to use shared helpers
This commit is contained in:
@@ -1333,6 +1333,83 @@ function getExtensionsDir() {
|
|||||||
path.join(dataDir, 'personas', persona, 'chrome_extensions');
|
path.join(dataDir, 'personas', persona, 'chrome_extensions');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get machine type string for platform-specific paths.
|
||||||
|
* Matches Python's archivebox.config.paths.get_machine_type()
|
||||||
|
*
|
||||||
|
* @returns {string} - Machine type (e.g., 'x86_64-linux', 'arm64-darwin')
|
||||||
|
*/
|
||||||
|
function getMachineType() {
|
||||||
|
if (process.env.MACHINE_TYPE) {
|
||||||
|
return process.env.MACHINE_TYPE;
|
||||||
|
}
|
||||||
|
|
||||||
|
let machine = process.arch;
|
||||||
|
const system = process.platform;
|
||||||
|
|
||||||
|
// Normalize machine type to match Python's convention
|
||||||
|
if (machine === 'arm64' || machine === 'aarch64') {
|
||||||
|
machine = 'arm64';
|
||||||
|
} else if (machine === 'x64' || machine === 'x86_64' || machine === 'amd64') {
|
||||||
|
machine = 'x86_64';
|
||||||
|
} else if (machine === 'ia32' || machine === 'x86') {
|
||||||
|
machine = 'x86';
|
||||||
|
}
|
||||||
|
|
||||||
|
return `${machine}-${system}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get LIB_DIR path for platform-specific binaries.
|
||||||
|
* Returns DATA_DIR/lib/MACHINE_TYPE/
|
||||||
|
*
|
||||||
|
* @returns {string} - Absolute path to lib directory
|
||||||
|
*/
|
||||||
|
function getLibDir() {
|
||||||
|
if (process.env.LIB_DIR) {
|
||||||
|
return process.env.LIB_DIR;
|
||||||
|
}
|
||||||
|
const dataDir = getEnv('DATA_DIR', './data');
|
||||||
|
const machineType = getMachineType();
|
||||||
|
return path.join(dataDir, 'lib', machineType);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get NODE_MODULES_DIR path for npm packages.
|
||||||
|
* Returns LIB_DIR/npm/node_modules/
|
||||||
|
*
|
||||||
|
* @returns {string} - Absolute path to node_modules directory
|
||||||
|
*/
|
||||||
|
function getNodeModulesDir() {
|
||||||
|
if (process.env.NODE_MODULES_DIR) {
|
||||||
|
return process.env.NODE_MODULES_DIR;
|
||||||
|
}
|
||||||
|
return path.join(getLibDir(), 'npm', 'node_modules');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get all test environment paths as a JSON object.
|
||||||
|
* This is the single source of truth for path calculations - Python calls this
|
||||||
|
* to avoid duplicating path logic.
|
||||||
|
*
|
||||||
|
* @returns {Object} - Object with all test environment paths
|
||||||
|
*/
|
||||||
|
function getTestEnv() {
|
||||||
|
const dataDir = getEnv('DATA_DIR', './data');
|
||||||
|
const machineType = getMachineType();
|
||||||
|
const libDir = getLibDir();
|
||||||
|
const nodeModulesDir = getNodeModulesDir();
|
||||||
|
|
||||||
|
return {
|
||||||
|
DATA_DIR: dataDir,
|
||||||
|
MACHINE_TYPE: machineType,
|
||||||
|
LIB_DIR: libDir,
|
||||||
|
NODE_MODULES_DIR: nodeModulesDir,
|
||||||
|
NPM_BIN_DIR: path.join(libDir, 'npm', '.bin'),
|
||||||
|
CHROME_EXTENSIONS_DIR: getExtensionsDir(),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Install a Chrome extension with caching support.
|
* Install a Chrome extension with caching support.
|
||||||
*
|
*
|
||||||
@@ -1442,8 +1519,13 @@ module.exports = {
|
|||||||
getExtensionPaths,
|
getExtensionPaths,
|
||||||
waitForExtensionTarget,
|
waitForExtensionTarget,
|
||||||
getExtensionTargets,
|
getExtensionTargets,
|
||||||
// Shared extension installer utilities
|
// Shared path utilities (single source of truth for Python/JS)
|
||||||
|
getMachineType,
|
||||||
|
getLibDir,
|
||||||
|
getNodeModulesDir,
|
||||||
getExtensionsDir,
|
getExtensionsDir,
|
||||||
|
getTestEnv,
|
||||||
|
// Shared extension installer utilities
|
||||||
installExtensionWithCache,
|
installExtensionWithCache,
|
||||||
// Deprecated - use enableExtensions option instead
|
// Deprecated - use enableExtensions option instead
|
||||||
getExtensionLaunchArgs,
|
getExtensionLaunchArgs,
|
||||||
@@ -1457,18 +1539,31 @@ if (require.main === module) {
|
|||||||
console.log('Usage: chrome_utils.js <command> [args...]');
|
console.log('Usage: chrome_utils.js <command> [args...]');
|
||||||
console.log('');
|
console.log('');
|
||||||
console.log('Commands:');
|
console.log('Commands:');
|
||||||
console.log(' findChromium');
|
console.log(' findChromium Find Chrome/Chromium binary');
|
||||||
console.log(' installChromium');
|
console.log(' installChromium Install Chromium via @puppeteer/browsers');
|
||||||
console.log(' installPuppeteerCore [npm_prefix]');
|
console.log(' installPuppeteerCore Install puppeteer-core npm package');
|
||||||
console.log(' launchChromium [output_dir] [extension_paths_json]');
|
console.log(' launchChromium Launch Chrome with CDP debugging');
|
||||||
console.log(' killChrome <pid> [output_dir]');
|
console.log(' killChrome <pid> Kill Chrome process by PID');
|
||||||
console.log(' killZombieChrome [data_dir]');
|
console.log(' killZombieChrome Clean up zombie Chrome processes');
|
||||||
console.log(' getExtensionId <path>');
|
console.log('');
|
||||||
console.log(' loadExtensionManifest <path>');
|
console.log(' getMachineType Get machine type (e.g., x86_64-linux)');
|
||||||
console.log(' getExtensionLaunchArgs <extensions_json>');
|
console.log(' getLibDir Get LIB_DIR path');
|
||||||
console.log(' loadOrInstallExtension <webstore_id> <name> [extensions_dir]');
|
console.log(' getNodeModulesDir Get NODE_MODULES_DIR path');
|
||||||
console.log(' getExtensionsDir');
|
console.log(' getExtensionsDir Get Chrome extensions directory');
|
||||||
console.log(' installExtensionWithCache <webstore_id> <name>');
|
console.log(' getTestEnv Get all paths as JSON (for tests)');
|
||||||
|
console.log('');
|
||||||
|
console.log(' getExtensionId <path> Get extension ID from unpacked path');
|
||||||
|
console.log(' loadExtensionManifest Load extension manifest.json');
|
||||||
|
console.log(' loadOrInstallExtension Load or install an extension');
|
||||||
|
console.log(' installExtensionWithCache Install extension with caching');
|
||||||
|
console.log('');
|
||||||
|
console.log('Environment variables:');
|
||||||
|
console.log(' DATA_DIR Base data directory');
|
||||||
|
console.log(' LIB_DIR Library directory (computed if not set)');
|
||||||
|
console.log(' MACHINE_TYPE Machine type override');
|
||||||
|
console.log(' NODE_MODULES_DIR Node modules directory');
|
||||||
|
console.log(' CHROME_BINARY Chrome binary path');
|
||||||
|
console.log(' CHROME_EXTENSIONS_DIR Extensions directory');
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1581,11 +1676,31 @@ if (require.main === module) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case 'getMachineType': {
|
||||||
|
console.log(getMachineType());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case 'getLibDir': {
|
||||||
|
console.log(getLibDir());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case 'getNodeModulesDir': {
|
||||||
|
console.log(getNodeModulesDir());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case 'getExtensionsDir': {
|
case 'getExtensionsDir': {
|
||||||
console.log(getExtensionsDir());
|
console.log(getExtensionsDir());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case 'getTestEnv': {
|
||||||
|
console.log(JSON.stringify(getTestEnv(), null, 2));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case 'installExtensionWithCache': {
|
case 'installExtensionWithCache': {
|
||||||
const [webstore_id, name] = commandArgs;
|
const [webstore_id, name] = commandArgs;
|
||||||
if (!webstore_id || !name) {
|
if (!webstore_id || !name) {
|
||||||
|
|||||||
@@ -321,6 +321,51 @@ def run_hook_and_parse(
|
|||||||
return returncode, result, stderr
|
return returncode, result, stderr
|
||||||
|
|
||||||
|
|
||||||
|
def call_chrome_utils(command: str, *args: str, env: Optional[dict] = None) -> Tuple[int, str, str]:
|
||||||
|
"""Call chrome_utils.js CLI command.
|
||||||
|
|
||||||
|
This is the central dispatch for calling the JS utilities from Python.
|
||||||
|
All path calculations and Chrome operations are centralized in chrome_utils.js
|
||||||
|
to ensure consistency between Python and JavaScript code.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
command: The CLI command (e.g., 'findChromium', 'getTestEnv')
|
||||||
|
*args: Additional command arguments
|
||||||
|
env: Environment dict (default: current env)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (returncode, stdout, stderr)
|
||||||
|
"""
|
||||||
|
cmd = ['node', str(CHROME_UTILS), command] + list(args)
|
||||||
|
result = subprocess.run(
|
||||||
|
cmd,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=30,
|
||||||
|
env=env or os.environ.copy()
|
||||||
|
)
|
||||||
|
return result.returncode, result.stdout, result.stderr
|
||||||
|
|
||||||
|
|
||||||
|
def get_test_env_from_js() -> Optional[Dict[str, str]]:
|
||||||
|
"""Get test environment paths from chrome_utils.js getTestEnv().
|
||||||
|
|
||||||
|
This is the single source of truth for path calculations.
|
||||||
|
Python calls JS to get all paths to avoid duplicating logic.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with DATA_DIR, MACHINE_TYPE, LIB_DIR, NODE_MODULES_DIR, etc.
|
||||||
|
or None if the JS call fails
|
||||||
|
"""
|
||||||
|
returncode, stdout, stderr = call_chrome_utils('getTestEnv')
|
||||||
|
if returncode == 0 and stdout.strip():
|
||||||
|
try:
|
||||||
|
return json.loads(stdout)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def find_chromium_binary(data_dir: Optional[str] = None) -> Optional[str]:
|
def find_chromium_binary(data_dir: Optional[str] = None) -> Optional[str]:
|
||||||
"""Find the Chromium binary using chrome_utils.js findChromium().
|
"""Find the Chromium binary using chrome_utils.js findChromium().
|
||||||
|
|
||||||
@@ -336,15 +381,12 @@ def find_chromium_binary(data_dir: Optional[str] = None) -> Optional[str]:
|
|||||||
Returns:
|
Returns:
|
||||||
Path to Chromium binary or None if not found
|
Path to Chromium binary or None if not found
|
||||||
"""
|
"""
|
||||||
search_dir = data_dir or os.environ.get('DATA_DIR', '.')
|
env = os.environ.copy()
|
||||||
result = subprocess.run(
|
if data_dir:
|
||||||
['node', str(CHROME_UTILS), 'findChromium', str(search_dir)],
|
env['DATA_DIR'] = str(data_dir)
|
||||||
capture_output=True,
|
returncode, stdout, stderr = call_chrome_utils('findChromium', env=env)
|
||||||
text=True,
|
if returncode == 0 and stdout.strip():
|
||||||
timeout=10
|
return stdout.strip()
|
||||||
)
|
|
||||||
if result.returncode == 0 and result.stdout.strip():
|
|
||||||
return result.stdout.strip()
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
@@ -358,21 +400,52 @@ def get_extensions_dir() -> str:
|
|||||||
Returns:
|
Returns:
|
||||||
Path to extensions directory
|
Path to extensions directory
|
||||||
"""
|
"""
|
||||||
result = subprocess.run(
|
returncode, stdout, stderr = call_chrome_utils('getExtensionsDir')
|
||||||
['node', str(CHROME_UTILS), 'getExtensionsDir'],
|
if returncode == 0 and stdout.strip():
|
||||||
capture_output=True,
|
return stdout.strip()
|
||||||
text=True,
|
|
||||||
timeout=10,
|
|
||||||
env=get_test_env()
|
|
||||||
)
|
|
||||||
if result.returncode == 0 and result.stdout.strip():
|
|
||||||
return result.stdout.strip()
|
|
||||||
# Fallback to default computation if JS call fails
|
# Fallback to default computation if JS call fails
|
||||||
data_dir = os.environ.get('DATA_DIR', './data')
|
data_dir = os.environ.get('DATA_DIR', './data')
|
||||||
persona = os.environ.get('ACTIVE_PERSONA', 'Default')
|
persona = os.environ.get('ACTIVE_PERSONA', 'Default')
|
||||||
return str(Path(data_dir) / 'personas' / persona / 'chrome_extensions')
|
return str(Path(data_dir) / 'personas' / persona / 'chrome_extensions')
|
||||||
|
|
||||||
|
|
||||||
|
def get_machine_type_from_js() -> Optional[str]:
|
||||||
|
"""Get machine type from chrome_utils.js getMachineType().
|
||||||
|
|
||||||
|
This is the single source of truth for machine type calculation.
|
||||||
|
Returns values like 'x86_64-linux', 'arm64-darwin'.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Machine type string or None if the JS call fails
|
||||||
|
"""
|
||||||
|
returncode, stdout, stderr = call_chrome_utils('getMachineType')
|
||||||
|
if returncode == 0 and stdout.strip():
|
||||||
|
return stdout.strip()
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def kill_chrome_via_js(pid: int, output_dir: Optional[str] = None) -> bool:
|
||||||
|
"""Kill a Chrome process using chrome_utils.js killChrome().
|
||||||
|
|
||||||
|
This uses the centralized kill logic which handles:
|
||||||
|
- SIGTERM then SIGKILL
|
||||||
|
- Process group killing
|
||||||
|
- Zombie process cleanup
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pid: Process ID to kill
|
||||||
|
output_dir: Optional chrome output directory for PID file cleanup
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if the kill command succeeded
|
||||||
|
"""
|
||||||
|
args = [str(pid)]
|
||||||
|
if output_dir:
|
||||||
|
args.append(str(output_dir))
|
||||||
|
returncode, stdout, stderr = call_chrome_utils('killChrome', *args)
|
||||||
|
return returncode == 0
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Extension Test Helpers
|
# Extension Test Helpers
|
||||||
# Used by extension tests (ublock, istilldontcareaboutcookies, twocaptcha)
|
# Used by extension tests (ublock, istilldontcareaboutcookies, twocaptcha)
|
||||||
@@ -535,21 +608,26 @@ def launch_chromium_session(env: dict, chrome_dir: Path, crawl_id: str) -> Tuple
|
|||||||
def kill_chromium_session(chrome_launch_process: subprocess.Popen, chrome_dir: Path) -> None:
|
def kill_chromium_session(chrome_launch_process: subprocess.Popen, chrome_dir: Path) -> None:
|
||||||
"""Clean up Chromium process launched by launch_chromium_session.
|
"""Clean up Chromium process launched by launch_chromium_session.
|
||||||
|
|
||||||
|
Uses chrome_utils.js killChrome for proper process group handling.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
chrome_launch_process: The Popen object from launch_chromium_session
|
chrome_launch_process: The Popen object from launch_chromium_session
|
||||||
chrome_dir: The chrome directory containing chrome.pid
|
chrome_dir: The chrome directory containing chrome.pid
|
||||||
"""
|
"""
|
||||||
|
# First try to terminate the launch process gracefully
|
||||||
try:
|
try:
|
||||||
chrome_launch_process.send_signal(signal.SIGTERM)
|
chrome_launch_process.send_signal(signal.SIGTERM)
|
||||||
chrome_launch_process.wait(timeout=5)
|
chrome_launch_process.wait(timeout=5)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# Read PID and use JS to kill with proper cleanup
|
||||||
chrome_pid_file = chrome_dir / 'chrome.pid'
|
chrome_pid_file = chrome_dir / 'chrome.pid'
|
||||||
if chrome_pid_file.exists():
|
if chrome_pid_file.exists():
|
||||||
try:
|
try:
|
||||||
chrome_pid = int(chrome_pid_file.read_text().strip())
|
chrome_pid = int(chrome_pid_file.read_text().strip())
|
||||||
os.kill(chrome_pid, signal.SIGKILL)
|
kill_chrome_via_js(chrome_pid, str(chrome_dir))
|
||||||
except (OSError, ValueError):
|
except (ValueError, FileNotFoundError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
@@ -683,25 +761,28 @@ def setup_chrome_session(
|
|||||||
return chrome_launch_process, chrome_pid, snapshot_chrome_dir
|
return chrome_launch_process, chrome_pid, snapshot_chrome_dir
|
||||||
|
|
||||||
|
|
||||||
def cleanup_chrome(chrome_launch_process: subprocess.Popen, chrome_pid: int) -> None:
|
def cleanup_chrome(chrome_launch_process: subprocess.Popen, chrome_pid: int, chrome_dir: Optional[Path] = None) -> None:
|
||||||
"""Clean up Chrome processes.
|
"""Clean up Chrome processes using chrome_utils.js killChrome.
|
||||||
|
|
||||||
Sends SIGTERM to the chrome_launch_process and SIGKILL to the Chrome PID.
|
Uses the centralized kill logic from chrome_utils.js which handles:
|
||||||
Ignores errors if processes are already dead.
|
- SIGTERM then SIGKILL
|
||||||
|
- Process group killing
|
||||||
|
- Zombie process cleanup
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
chrome_launch_process: The Popen object for the chrome launch hook
|
chrome_launch_process: The Popen object for the chrome launch hook
|
||||||
chrome_pid: The PID of the Chrome process
|
chrome_pid: The PID of the Chrome process
|
||||||
|
chrome_dir: Optional path to chrome output directory
|
||||||
"""
|
"""
|
||||||
|
# First try to terminate the launch process gracefully
|
||||||
try:
|
try:
|
||||||
chrome_launch_process.send_signal(signal.SIGTERM)
|
chrome_launch_process.send_signal(signal.SIGTERM)
|
||||||
chrome_launch_process.wait(timeout=5)
|
chrome_launch_process.wait(timeout=5)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
try:
|
|
||||||
os.kill(chrome_pid, signal.SIGKILL)
|
# Use JS to kill Chrome with proper process group handling
|
||||||
except OSError:
|
kill_chrome_via_js(chrome_pid, str(chrome_dir) if chrome_dir else None)
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
|
|||||||
@@ -2,7 +2,6 @@
|
|||||||
Integration tests for favicon plugin
|
Integration tests for favicon plugin
|
||||||
|
|
||||||
Tests verify:
|
Tests verify:
|
||||||
pass
|
|
||||||
1. Plugin script exists
|
1. Plugin script exists
|
||||||
2. requests library is available
|
2. requests library is available
|
||||||
3. Favicon extraction works for real example.com
|
3. Favicon extraction works for real example.com
|
||||||
@@ -21,9 +20,15 @@ from pathlib import Path
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from archivebox.plugins.chrome.tests.chrome_test_helpers import (
|
||||||
|
get_plugin_dir,
|
||||||
|
get_hook_script,
|
||||||
|
parse_jsonl_output,
|
||||||
|
)
|
||||||
|
|
||||||
PLUGIN_DIR = Path(__file__).parent.parent
|
|
||||||
FAVICON_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_favicon.*'), None)
|
PLUGIN_DIR = get_plugin_dir(__file__)
|
||||||
|
FAVICON_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_favicon.*')
|
||||||
TEST_URL = 'https://example.com'
|
TEST_URL = 'https://example.com'
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,6 @@
|
|||||||
Integration tests for mercury plugin
|
Integration tests for mercury plugin
|
||||||
|
|
||||||
Tests verify:
|
Tests verify:
|
||||||
pass
|
|
||||||
1. Hook script exists
|
1. Hook script exists
|
||||||
2. Dependencies installed via validation hooks
|
2. Dependencies installed via validation hooks
|
||||||
3. Verify deps with abx-pkg
|
3. Verify deps with abx-pkg
|
||||||
@@ -19,9 +18,15 @@ import tempfile
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
PLUGIN_DIR = Path(__file__).parent.parent
|
from archivebox.plugins.chrome.tests.chrome_test_helpers import (
|
||||||
PLUGINS_ROOT = PLUGIN_DIR.parent
|
get_plugin_dir,
|
||||||
MERCURY_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_mercury.*'), None)
|
get_hook_script,
|
||||||
|
PLUGINS_ROOT,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
PLUGIN_DIR = get_plugin_dir(__file__)
|
||||||
|
MERCURY_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_mercury.*')
|
||||||
TEST_URL = 'https://example.com'
|
TEST_URL = 'https://example.com'
|
||||||
|
|
||||||
def test_hook_script_exists():
|
def test_hook_script_exists():
|
||||||
|
|||||||
@@ -2,7 +2,6 @@
|
|||||||
Integration tests for readability plugin
|
Integration tests for readability plugin
|
||||||
|
|
||||||
Tests verify:
|
Tests verify:
|
||||||
pass
|
|
||||||
1. Validate hook checks for readability-extractor binary
|
1. Validate hook checks for readability-extractor binary
|
||||||
2. Verify deps with abx-pkg
|
2. Verify deps with abx-pkg
|
||||||
3. Plugin reports missing dependency correctly
|
3. Plugin reports missing dependency correctly
|
||||||
@@ -18,10 +17,15 @@ from pathlib import Path
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from archivebox.plugins.chrome.tests.chrome_test_helpers import (
|
||||||
|
get_plugin_dir,
|
||||||
|
get_hook_script,
|
||||||
|
PLUGINS_ROOT,
|
||||||
|
)
|
||||||
|
|
||||||
PLUGIN_DIR = Path(__file__).parent.parent
|
|
||||||
PLUGINS_ROOT = PLUGIN_DIR.parent
|
PLUGIN_DIR = get_plugin_dir(__file__)
|
||||||
READABILITY_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_readability.*'))
|
READABILITY_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_readability.*')
|
||||||
TEST_URL = 'https://example.com'
|
TEST_URL = 'https://example.com'
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -77,27 +77,9 @@ def has_staticfile_output() -> bool:
|
|||||||
return staticfile_dir.exists() and any(staticfile_dir.iterdir())
|
return staticfile_dir.exists() and any(staticfile_dir.iterdir())
|
||||||
|
|
||||||
|
|
||||||
# Chrome binary search paths
|
# Chrome session directory (relative to extractor output dir)
|
||||||
CHROMIUM_BINARY_NAMES_LINUX = [
|
# Note: Chrome binary is obtained via CHROME_BINARY env var, not searched for.
|
||||||
'chromium', 'chromium-browser', 'chromium-browser-beta',
|
# The centralized Chrome binary search is in chrome_utils.js findChromium().
|
||||||
'chromium-browser-unstable', 'chromium-browser-canary', 'chromium-browser-dev',
|
|
||||||
]
|
|
||||||
CHROME_BINARY_NAMES_LINUX = [
|
|
||||||
'google-chrome', 'google-chrome-stable', 'google-chrome-beta',
|
|
||||||
'google-chrome-canary', 'google-chrome-unstable', 'google-chrome-dev', 'chrome',
|
|
||||||
]
|
|
||||||
CHROME_BINARY_NAMES_MACOS = [
|
|
||||||
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
|
|
||||||
'/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary',
|
|
||||||
]
|
|
||||||
CHROMIUM_BINARY_NAMES_MACOS = ['/Applications/Chromium.app/Contents/MacOS/Chromium']
|
|
||||||
|
|
||||||
ALL_CHROME_BINARIES = (
|
|
||||||
CHROME_BINARY_NAMES_LINUX + CHROMIUM_BINARY_NAMES_LINUX +
|
|
||||||
CHROME_BINARY_NAMES_MACOS + CHROMIUM_BINARY_NAMES_MACOS
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
CHROME_SESSION_DIR = '../chrome'
|
CHROME_SESSION_DIR = '../chrome'
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,6 @@
|
|||||||
Integration tests for title plugin
|
Integration tests for title plugin
|
||||||
|
|
||||||
Tests verify:
|
Tests verify:
|
||||||
pass
|
|
||||||
1. Plugin script exists
|
1. Plugin script exists
|
||||||
2. Node.js is available
|
2. Node.js is available
|
||||||
3. Title extraction works for real example.com
|
3. Title extraction works for real example.com
|
||||||
@@ -20,9 +19,15 @@ from pathlib import Path
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from archivebox.plugins.chrome.tests.chrome_test_helpers import (
|
||||||
|
get_plugin_dir,
|
||||||
|
get_hook_script,
|
||||||
|
parse_jsonl_output,
|
||||||
|
)
|
||||||
|
|
||||||
PLUGIN_DIR = Path(__file__).parent.parent
|
|
||||||
TITLE_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_title.*'), None)
|
PLUGIN_DIR = get_plugin_dir(__file__)
|
||||||
|
TITLE_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_title.*')
|
||||||
TEST_URL = 'https://example.com'
|
TEST_URL = 'https://example.com'
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user