mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-01-04 01:46:54 +10:00
Reduce code duplication between Chrome utilities (#1737)
This change consolidates duplicated logic between chrome_utils.js and extension installer hooks, as well as between Python plugin tests: JavaScript changes: - Add getExtensionsDir() to centralize extension directory path calculation - Add installExtensionWithCache() to handle extension install + cache workflow - Add CLI commands for new utilities - Refactor all 3 extension installers (ublock, istilldontcareaboutcookies, twocaptcha) to use shared utilities, reducing each from ~115 lines to ~60 - Update chrome_launch hook to use getExtensionsDir() Python test changes: - Add chrome_test_helpers.py with shared Chrome session management utilities - Refactor infiniscroll and modalcloser tests to use shared helpers - setup_chrome_session(), cleanup_chrome(), get_test_env() now centralized - Add chrome_session() context manager for automatic cleanup Net result: ~208 lines of code removed while maintaining same functionality. <!-- IMPORTANT: Do not submit PRs with only formatting / PEP8 / line length changes. --> # Summary <!--e.g. This PR fixes ABC or adds the ability to do XYZ...--> # Related issues <!-- e.g. #123 or Roadmap goal # https://github.com/pirate/ArchiveBox/wiki/Roadmap --> # Changes these areas - [ ] Bugfixes - [ ] Feature behavior - [ ] Command line interface - [ ] Configuration options - [ ] Internal architecture - [ ] Snapshot data layout on disk
This commit is contained in:
@@ -1312,6 +1312,99 @@ function findChromium() {
|
||||
return null;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Shared Extension Installer Utilities
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Get the extensions directory path.
|
||||
* Centralized path calculation used by extension installers and chrome launch.
|
||||
*
|
||||
* Path is derived from environment variables in this priority:
|
||||
* 1. CHROME_EXTENSIONS_DIR (explicit override)
|
||||
* 2. DATA_DIR/personas/ACTIVE_PERSONA/chrome_extensions (default)
|
||||
*
|
||||
* @returns {string} - Absolute path to extensions directory
|
||||
*/
|
||||
function getExtensionsDir() {
|
||||
const dataDir = getEnv('DATA_DIR', './data');
|
||||
const persona = getEnv('ACTIVE_PERSONA', 'Default');
|
||||
return getEnv('CHROME_EXTENSIONS_DIR') ||
|
||||
path.join(dataDir, 'personas', persona, 'chrome_extensions');
|
||||
}
|
||||
|
||||
/**
|
||||
* Install a Chrome extension with caching support.
|
||||
*
|
||||
* This is the main entry point for extension installer hooks. It handles:
|
||||
* - Checking for cached extension metadata
|
||||
* - Installing the extension if not cached
|
||||
* - Writing cache file for future runs
|
||||
*
|
||||
* @param {Object} extension - Extension metadata object
|
||||
* @param {string} extension.webstore_id - Chrome Web Store extension ID
|
||||
* @param {string} extension.name - Human-readable extension name (used for cache file)
|
||||
* @param {Object} [options] - Options
|
||||
* @param {string} [options.extensionsDir] - Override extensions directory
|
||||
* @param {boolean} [options.quiet=false] - Suppress info logging
|
||||
* @returns {Promise<Object|null>} - Installed extension metadata or null on failure
|
||||
*/
|
||||
async function installExtensionWithCache(extension, options = {}) {
|
||||
const {
|
||||
extensionsDir = getExtensionsDir(),
|
||||
quiet = false,
|
||||
} = options;
|
||||
|
||||
const cacheFile = path.join(extensionsDir, `${extension.name}.extension.json`);
|
||||
|
||||
// Check if extension is already cached and valid
|
||||
if (fs.existsSync(cacheFile)) {
|
||||
try {
|
||||
const cached = JSON.parse(fs.readFileSync(cacheFile, 'utf-8'));
|
||||
const manifestPath = path.join(cached.unpacked_path, 'manifest.json');
|
||||
|
||||
if (fs.existsSync(manifestPath)) {
|
||||
if (!quiet) {
|
||||
console.log(`[*] ${extension.name} extension already installed (using cache)`);
|
||||
}
|
||||
return cached;
|
||||
}
|
||||
} catch (e) {
|
||||
// Cache file corrupted, re-install
|
||||
console.warn(`[⚠️] Extension cache corrupted for ${extension.name}, re-installing...`);
|
||||
}
|
||||
}
|
||||
|
||||
// Install extension
|
||||
if (!quiet) {
|
||||
console.log(`[*] Installing ${extension.name} extension...`);
|
||||
}
|
||||
|
||||
const installedExt = await loadOrInstallExtension(extension, extensionsDir);
|
||||
|
||||
if (!installedExt) {
|
||||
console.error(`[❌] Failed to install ${extension.name} extension`);
|
||||
return null;
|
||||
}
|
||||
|
||||
// Write cache file
|
||||
try {
|
||||
await fs.promises.mkdir(extensionsDir, { recursive: true });
|
||||
await fs.promises.writeFile(cacheFile, JSON.stringify(installedExt, null, 2));
|
||||
if (!quiet) {
|
||||
console.log(`[+] Extension metadata written to ${cacheFile}`);
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn(`[⚠️] Failed to write cache file: ${e.message}`);
|
||||
}
|
||||
|
||||
if (!quiet) {
|
||||
console.log(`[+] ${extension.name} extension installed`);
|
||||
}
|
||||
|
||||
return installedExt;
|
||||
}
|
||||
|
||||
// Export all functions
|
||||
module.exports = {
|
||||
// Environment helpers
|
||||
@@ -1349,6 +1442,9 @@ module.exports = {
|
||||
getExtensionPaths,
|
||||
waitForExtensionTarget,
|
||||
getExtensionTargets,
|
||||
// Shared extension installer utilities
|
||||
getExtensionsDir,
|
||||
installExtensionWithCache,
|
||||
// Deprecated - use enableExtensions option instead
|
||||
getExtensionLaunchArgs,
|
||||
};
|
||||
@@ -1371,6 +1467,8 @@ if (require.main === module) {
|
||||
console.log(' loadExtensionManifest <path>');
|
||||
console.log(' getExtensionLaunchArgs <extensions_json>');
|
||||
console.log(' loadOrInstallExtension <webstore_id> <name> [extensions_dir]');
|
||||
console.log(' getExtensionsDir');
|
||||
console.log(' installExtensionWithCache <webstore_id> <name>');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
@@ -1483,6 +1581,26 @@ if (require.main === module) {
|
||||
break;
|
||||
}
|
||||
|
||||
case 'getExtensionsDir': {
|
||||
console.log(getExtensionsDir());
|
||||
break;
|
||||
}
|
||||
|
||||
case 'installExtensionWithCache': {
|
||||
const [webstore_id, name] = commandArgs;
|
||||
if (!webstore_id || !name) {
|
||||
console.error('Usage: installExtensionWithCache <webstore_id> <name>');
|
||||
process.exit(1);
|
||||
}
|
||||
const ext = await installExtensionWithCache({ webstore_id, name });
|
||||
if (ext) {
|
||||
console.log(JSON.stringify(ext, null, 2));
|
||||
} else {
|
||||
process.exit(1);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
console.error(`Unknown command: ${command}`);
|
||||
process.exit(1);
|
||||
|
||||
@@ -38,6 +38,7 @@ const {
|
||||
killChrome,
|
||||
getEnv,
|
||||
writePidWithMtime,
|
||||
getExtensionsDir,
|
||||
} = require('./chrome_utils.js');
|
||||
|
||||
// Extractor metadata
|
||||
@@ -115,8 +116,7 @@ async function main() {
|
||||
if (version) console.error(`[*] Version: ${version}`);
|
||||
|
||||
// Load installed extensions
|
||||
const extensionsDir = getEnv('CHROME_EXTENSIONS_DIR') ||
|
||||
path.join(getEnv('DATA_DIR', '.'), 'personas', getEnv('ACTIVE_PERSONA', 'Default'), 'chrome_extensions');
|
||||
const extensionsDir = getExtensionsDir();
|
||||
const userDataDir = getEnv('CHROME_USER_DATA_DIR');
|
||||
|
||||
if (userDataDir) {
|
||||
|
||||
276
archivebox/plugins/chrome/tests/chrome_test_helpers.py
Normal file
276
archivebox/plugins/chrome/tests/chrome_test_helpers.py
Normal file
@@ -0,0 +1,276 @@
|
||||
"""
|
||||
Shared Chrome test helpers for plugin integration tests.
|
||||
|
||||
This module provides common utilities for Chrome-based plugin tests, reducing
|
||||
duplication across test files. It uses the JavaScript utilities from chrome_utils.js
|
||||
where appropriate.
|
||||
|
||||
Usage:
|
||||
from archivebox.plugins.chrome.tests.chrome_test_helpers import (
|
||||
get_test_env,
|
||||
setup_chrome_session,
|
||||
cleanup_chrome,
|
||||
find_chromium_binary,
|
||||
get_node_modules_dir,
|
||||
)
|
||||
"""
|
||||
|
||||
import os
|
||||
import signal
|
||||
import subprocess
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Tuple, Optional
|
||||
from contextlib import contextmanager
|
||||
|
||||
|
||||
# Plugin directory locations
|
||||
CHROME_PLUGIN_DIR = Path(__file__).parent.parent
|
||||
PLUGINS_ROOT = CHROME_PLUGIN_DIR.parent
|
||||
|
||||
# Hook script locations
|
||||
CHROME_LAUNCH_HOOK = CHROME_PLUGIN_DIR / 'on_Crawl__30_chrome_launch.bg.js'
|
||||
CHROME_TAB_HOOK = CHROME_PLUGIN_DIR / 'on_Snapshot__20_chrome_tab.bg.js'
|
||||
CHROME_NAVIGATE_HOOK = next(CHROME_PLUGIN_DIR.glob('on_Snapshot__*_chrome_navigate.*'), None)
|
||||
CHROME_UTILS = CHROME_PLUGIN_DIR / 'chrome_utils.js'
|
||||
|
||||
|
||||
def get_node_modules_dir() -> Path:
|
||||
"""Get NODE_MODULES_DIR for tests, checking env first.
|
||||
|
||||
Returns the path to the node_modules directory, checking:
|
||||
1. NODE_MODULES_DIR environment variable
|
||||
2. Computed from LIB_DIR via ArchiveBox config
|
||||
"""
|
||||
if os.environ.get('NODE_MODULES_DIR'):
|
||||
return Path(os.environ['NODE_MODULES_DIR'])
|
||||
# Otherwise compute from LIB_DIR
|
||||
from archivebox.config.common import STORAGE_CONFIG
|
||||
lib_dir = Path(os.environ.get('LIB_DIR') or str(STORAGE_CONFIG.LIB_DIR))
|
||||
return lib_dir / 'npm' / 'node_modules'
|
||||
|
||||
|
||||
def get_test_env() -> dict:
|
||||
"""Get environment dict with NODE_MODULES_DIR set correctly for tests.
|
||||
|
||||
Returns a copy of os.environ with NODE_MODULES_DIR added/updated.
|
||||
Use this for all subprocess calls in plugin tests.
|
||||
"""
|
||||
env = os.environ.copy()
|
||||
env['NODE_MODULES_DIR'] = str(get_node_modules_dir())
|
||||
return env
|
||||
|
||||
|
||||
def find_chromium_binary(data_dir: Optional[str] = None) -> Optional[str]:
|
||||
"""Find the Chromium binary using chrome_utils.js findChromium().
|
||||
|
||||
This uses the centralized findChromium() function which checks:
|
||||
- CHROME_BINARY env var
|
||||
- @puppeteer/browsers install locations
|
||||
- System Chromium locations
|
||||
- Falls back to Chrome (with warning)
|
||||
|
||||
Args:
|
||||
data_dir: Directory where chromium was installed (contains chromium/ subdir)
|
||||
|
||||
Returns:
|
||||
Path to Chromium binary or None if not found
|
||||
"""
|
||||
search_dir = data_dir or os.environ.get('DATA_DIR', '.')
|
||||
result = subprocess.run(
|
||||
['node', str(CHROME_UTILS), 'findChromium', str(search_dir)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10
|
||||
)
|
||||
if result.returncode == 0 and result.stdout.strip():
|
||||
return result.stdout.strip()
|
||||
return None
|
||||
|
||||
|
||||
def get_extensions_dir() -> str:
|
||||
"""Get the Chrome extensions directory using chrome_utils.js getExtensionsDir().
|
||||
|
||||
This uses the centralized path calculation from chrome_utils.js which checks:
|
||||
- CHROME_EXTENSIONS_DIR env var
|
||||
- DATA_DIR/personas/ACTIVE_PERSONA/chrome_extensions
|
||||
|
||||
Returns:
|
||||
Path to extensions directory
|
||||
"""
|
||||
result = subprocess.run(
|
||||
['node', str(CHROME_UTILS), 'getExtensionsDir'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10,
|
||||
env=get_test_env()
|
||||
)
|
||||
if result.returncode == 0 and result.stdout.strip():
|
||||
return result.stdout.strip()
|
||||
# Fallback to default computation if JS call fails
|
||||
data_dir = os.environ.get('DATA_DIR', './data')
|
||||
persona = os.environ.get('ACTIVE_PERSONA', 'Default')
|
||||
return str(Path(data_dir) / 'personas' / persona / 'chrome_extensions')
|
||||
|
||||
|
||||
def setup_chrome_session(
|
||||
tmpdir: Path,
|
||||
crawl_id: str = 'test-crawl',
|
||||
snapshot_id: str = 'test-snapshot',
|
||||
test_url: str = 'about:blank',
|
||||
navigate: bool = True,
|
||||
timeout: int = 15,
|
||||
) -> Tuple[subprocess.Popen, int, Path]:
|
||||
"""Set up a Chrome session with tab and optional navigation.
|
||||
|
||||
Creates the directory structure, launches Chrome, creates a tab,
|
||||
and optionally navigates to the test URL.
|
||||
|
||||
Args:
|
||||
tmpdir: Temporary directory for test files
|
||||
crawl_id: ID to use for the crawl
|
||||
snapshot_id: ID to use for the snapshot
|
||||
test_url: URL to navigate to (if navigate=True)
|
||||
navigate: Whether to navigate to the URL after creating tab
|
||||
timeout: Seconds to wait for Chrome to start
|
||||
|
||||
Returns:
|
||||
Tuple of (chrome_launch_process, chrome_pid, snapshot_chrome_dir)
|
||||
|
||||
Raises:
|
||||
RuntimeError: If Chrome fails to start or tab creation fails
|
||||
"""
|
||||
crawl_dir = Path(tmpdir) / 'crawl'
|
||||
crawl_dir.mkdir(exist_ok=True)
|
||||
chrome_dir = crawl_dir / 'chrome'
|
||||
chrome_dir.mkdir(exist_ok=True)
|
||||
|
||||
env = get_test_env()
|
||||
env['CHROME_HEADLESS'] = 'true'
|
||||
|
||||
# Launch Chrome at crawl level
|
||||
chrome_launch_process = subprocess.Popen(
|
||||
['node', str(CHROME_LAUNCH_HOOK), f'--crawl-id={crawl_id}'],
|
||||
cwd=str(chrome_dir),
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
env=env
|
||||
)
|
||||
|
||||
# Wait for Chrome to launch
|
||||
for i in range(timeout):
|
||||
if chrome_launch_process.poll() is not None:
|
||||
stdout, stderr = chrome_launch_process.communicate()
|
||||
raise RuntimeError(f"Chrome launch failed:\nStdout: {stdout}\nStderr: {stderr}")
|
||||
if (chrome_dir / 'cdp_url.txt').exists():
|
||||
break
|
||||
time.sleep(1)
|
||||
|
||||
if not (chrome_dir / 'cdp_url.txt').exists():
|
||||
raise RuntimeError(f"Chrome CDP URL not found after {timeout}s")
|
||||
|
||||
chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
|
||||
|
||||
# Create snapshot directory structure
|
||||
snapshot_dir = Path(tmpdir) / 'snapshot'
|
||||
snapshot_dir.mkdir(exist_ok=True)
|
||||
snapshot_chrome_dir = snapshot_dir / 'chrome'
|
||||
snapshot_chrome_dir.mkdir(exist_ok=True)
|
||||
|
||||
# Create tab
|
||||
tab_env = env.copy()
|
||||
tab_env['CRAWL_OUTPUT_DIR'] = str(crawl_dir)
|
||||
result = subprocess.run(
|
||||
['node', str(CHROME_TAB_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}', f'--crawl-id={crawl_id}'],
|
||||
cwd=str(snapshot_chrome_dir),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60,
|
||||
env=tab_env
|
||||
)
|
||||
if result.returncode != 0:
|
||||
cleanup_chrome(chrome_launch_process, chrome_pid)
|
||||
raise RuntimeError(f"Tab creation failed: {result.stderr}")
|
||||
|
||||
# Navigate to URL if requested
|
||||
if navigate and CHROME_NAVIGATE_HOOK and test_url != 'about:blank':
|
||||
result = subprocess.run(
|
||||
['node', str(CHROME_NAVIGATE_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
|
||||
cwd=str(snapshot_chrome_dir),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120,
|
||||
env=env
|
||||
)
|
||||
if result.returncode != 0:
|
||||
cleanup_chrome(chrome_launch_process, chrome_pid)
|
||||
raise RuntimeError(f"Navigation failed: {result.stderr}")
|
||||
|
||||
return chrome_launch_process, chrome_pid, snapshot_chrome_dir
|
||||
|
||||
|
||||
def cleanup_chrome(chrome_launch_process: subprocess.Popen, chrome_pid: int) -> None:
|
||||
"""Clean up Chrome processes.
|
||||
|
||||
Sends SIGTERM to the chrome_launch_process and SIGKILL to the Chrome PID.
|
||||
Ignores errors if processes are already dead.
|
||||
|
||||
Args:
|
||||
chrome_launch_process: The Popen object for the chrome launch hook
|
||||
chrome_pid: The PID of the Chrome process
|
||||
"""
|
||||
try:
|
||||
chrome_launch_process.send_signal(signal.SIGTERM)
|
||||
chrome_launch_process.wait(timeout=5)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
os.kill(chrome_pid, signal.SIGKILL)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
@contextmanager
|
||||
def chrome_session(
|
||||
tmpdir: Path,
|
||||
crawl_id: str = 'test-crawl',
|
||||
snapshot_id: str = 'test-snapshot',
|
||||
test_url: str = 'about:blank',
|
||||
navigate: bool = True,
|
||||
timeout: int = 15,
|
||||
):
|
||||
"""Context manager for Chrome sessions with automatic cleanup.
|
||||
|
||||
Usage:
|
||||
with chrome_session(tmpdir, test_url='https://example.com') as (process, pid, chrome_dir):
|
||||
# Run tests with chrome session
|
||||
pass
|
||||
# Chrome automatically cleaned up
|
||||
|
||||
Args:
|
||||
tmpdir: Temporary directory for test files
|
||||
crawl_id: ID to use for the crawl
|
||||
snapshot_id: ID to use for the snapshot
|
||||
test_url: URL to navigate to (if navigate=True)
|
||||
navigate: Whether to navigate to the URL after creating tab
|
||||
timeout: Seconds to wait for Chrome to start
|
||||
|
||||
Yields:
|
||||
Tuple of (chrome_launch_process, chrome_pid, snapshot_chrome_dir)
|
||||
"""
|
||||
chrome_launch_process = None
|
||||
chrome_pid = None
|
||||
try:
|
||||
chrome_launch_process, chrome_pid, snapshot_chrome_dir = setup_chrome_session(
|
||||
tmpdir=tmpdir,
|
||||
crawl_id=crawl_id,
|
||||
snapshot_id=snapshot_id,
|
||||
test_url=test_url,
|
||||
navigate=navigate,
|
||||
timeout=timeout,
|
||||
)
|
||||
yield chrome_launch_process, chrome_pid, snapshot_chrome_dir
|
||||
finally:
|
||||
if chrome_launch_process and chrome_pid:
|
||||
cleanup_chrome(chrome_launch_process, chrome_pid)
|
||||
@@ -14,7 +14,6 @@ Tests verify:
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import signal
|
||||
import subprocess
|
||||
import time
|
||||
import tempfile
|
||||
@@ -22,37 +21,19 @@ from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
# Import shared Chrome test helpers
|
||||
from archivebox.plugins.chrome.tests.chrome_test_helpers import (
|
||||
get_test_env,
|
||||
setup_chrome_session,
|
||||
cleanup_chrome,
|
||||
)
|
||||
|
||||
|
||||
PLUGIN_DIR = Path(__file__).parent.parent
|
||||
PLUGINS_ROOT = PLUGIN_DIR.parent
|
||||
INFINISCROLL_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_infiniscroll.*'), None)
|
||||
CHROME_LAUNCH_HOOK = PLUGINS_ROOT / 'chrome' / 'on_Crawl__30_chrome_launch.bg.js'
|
||||
CHROME_TAB_HOOK = PLUGINS_ROOT / 'chrome' / 'on_Snapshot__20_chrome_tab.bg.js'
|
||||
CHROME_NAVIGATE_HOOK = next((PLUGINS_ROOT / 'chrome').glob('on_Snapshot__*_chrome_navigate.*'), None)
|
||||
TEST_URL = 'https://www.singsing.movie/'
|
||||
|
||||
|
||||
def get_node_modules_dir():
|
||||
"""Get NODE_MODULES_DIR for tests, checking env first."""
|
||||
# Check if NODE_MODULES_DIR is already set in environment
|
||||
if os.environ.get('NODE_MODULES_DIR'):
|
||||
return Path(os.environ['NODE_MODULES_DIR'])
|
||||
# Otherwise compute from LIB_DIR
|
||||
from archivebox.config.common import STORAGE_CONFIG
|
||||
lib_dir = Path(os.environ.get('LIB_DIR') or str(STORAGE_CONFIG.LIB_DIR))
|
||||
return lib_dir / 'npm' / 'node_modules'
|
||||
|
||||
|
||||
NODE_MODULES_DIR = get_node_modules_dir()
|
||||
|
||||
|
||||
def get_test_env():
|
||||
"""Get environment with NODE_MODULES_DIR set correctly."""
|
||||
env = os.environ.copy()
|
||||
env['NODE_MODULES_DIR'] = str(NODE_MODULES_DIR)
|
||||
return env
|
||||
|
||||
|
||||
def test_hook_script_exists():
|
||||
"""Verify on_Snapshot hook exists."""
|
||||
assert INFINISCROLL_HOOK is not None, "Infiniscroll hook not found"
|
||||
@@ -117,95 +98,18 @@ def test_fails_gracefully_without_chrome_session():
|
||||
f"Should mention chrome/CDP/puppeteer in error: {result.stderr}"
|
||||
|
||||
|
||||
def setup_chrome_session(tmpdir):
|
||||
"""Helper to set up Chrome session with tab and navigation."""
|
||||
crawl_dir = Path(tmpdir) / 'crawl'
|
||||
crawl_dir.mkdir()
|
||||
chrome_dir = crawl_dir / 'chrome'
|
||||
chrome_dir.mkdir()
|
||||
|
||||
env = get_test_env()
|
||||
env['CHROME_HEADLESS'] = 'true'
|
||||
|
||||
# Launch Chrome at crawl level
|
||||
chrome_launch_process = subprocess.Popen(
|
||||
['node', str(CHROME_LAUNCH_HOOK), '--crawl-id=test-infiniscroll'],
|
||||
cwd=str(chrome_dir),
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
env=env
|
||||
)
|
||||
|
||||
# Wait for Chrome to launch
|
||||
for i in range(15):
|
||||
if chrome_launch_process.poll() is not None:
|
||||
stdout, stderr = chrome_launch_process.communicate()
|
||||
raise RuntimeError(f"Chrome launch failed:\nStdout: {stdout}\nStderr: {stderr}")
|
||||
if (chrome_dir / 'cdp_url.txt').exists():
|
||||
break
|
||||
time.sleep(1)
|
||||
|
||||
if not (chrome_dir / 'cdp_url.txt').exists():
|
||||
raise RuntimeError("Chrome CDP URL not found after 15s")
|
||||
|
||||
chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
|
||||
|
||||
# Create snapshot directory structure
|
||||
snapshot_dir = Path(tmpdir) / 'snapshot'
|
||||
snapshot_dir.mkdir()
|
||||
snapshot_chrome_dir = snapshot_dir / 'chrome'
|
||||
snapshot_chrome_dir.mkdir()
|
||||
|
||||
# Create tab
|
||||
tab_env = env.copy()
|
||||
tab_env['CRAWL_OUTPUT_DIR'] = str(crawl_dir)
|
||||
result = subprocess.run(
|
||||
['node', str(CHROME_TAB_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-infiniscroll', '--crawl-id=test-infiniscroll'],
|
||||
cwd=str(snapshot_chrome_dir),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60,
|
||||
env=tab_env
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"Tab creation failed: {result.stderr}")
|
||||
|
||||
# Navigate to URL
|
||||
result = subprocess.run(
|
||||
['node', str(CHROME_NAVIGATE_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-infiniscroll'],
|
||||
cwd=str(snapshot_chrome_dir),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120,
|
||||
env=env
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"Navigation failed: {result.stderr}")
|
||||
|
||||
return chrome_launch_process, chrome_pid, snapshot_chrome_dir
|
||||
|
||||
|
||||
def cleanup_chrome(chrome_launch_process, chrome_pid):
|
||||
"""Helper to clean up Chrome processes."""
|
||||
try:
|
||||
chrome_launch_process.send_signal(signal.SIGTERM)
|
||||
chrome_launch_process.wait(timeout=5)
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
os.kill(chrome_pid, signal.SIGKILL)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def test_scrolls_page_and_outputs_stats():
|
||||
"""Integration test: scroll page and verify JSONL output format."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
chrome_launch_process = None
|
||||
chrome_pid = None
|
||||
try:
|
||||
chrome_launch_process, chrome_pid, snapshot_chrome_dir = setup_chrome_session(tmpdir)
|
||||
chrome_launch_process, chrome_pid, snapshot_chrome_dir = setup_chrome_session(
|
||||
Path(tmpdir),
|
||||
crawl_id='test-infiniscroll',
|
||||
snapshot_id='snap-infiniscroll',
|
||||
test_url=TEST_URL,
|
||||
)
|
||||
|
||||
# Create infiniscroll output directory (sibling to chrome)
|
||||
infiniscroll_dir = snapshot_chrome_dir.parent / 'infiniscroll'
|
||||
@@ -265,7 +169,12 @@ def test_config_scroll_limit_honored():
|
||||
chrome_launch_process = None
|
||||
chrome_pid = None
|
||||
try:
|
||||
chrome_launch_process, chrome_pid, snapshot_chrome_dir = setup_chrome_session(tmpdir)
|
||||
chrome_launch_process, chrome_pid, snapshot_chrome_dir = setup_chrome_session(
|
||||
Path(tmpdir),
|
||||
crawl_id='test-scroll-limit',
|
||||
snapshot_id='snap-limit',
|
||||
test_url=TEST_URL,
|
||||
)
|
||||
|
||||
infiniscroll_dir = snapshot_chrome_dir.parent / 'infiniscroll'
|
||||
infiniscroll_dir.mkdir()
|
||||
@@ -317,7 +226,12 @@ def test_config_timeout_honored():
|
||||
chrome_launch_process = None
|
||||
chrome_pid = None
|
||||
try:
|
||||
chrome_launch_process, chrome_pid, snapshot_chrome_dir = setup_chrome_session(tmpdir)
|
||||
chrome_launch_process, chrome_pid, snapshot_chrome_dir = setup_chrome_session(
|
||||
Path(tmpdir),
|
||||
crawl_id='test-timeout',
|
||||
snapshot_id='snap-timeout',
|
||||
test_url=TEST_URL,
|
||||
)
|
||||
|
||||
infiniscroll_dir = snapshot_chrome_dir.parent / 'infiniscroll'
|
||||
infiniscroll_dir.mkdir()
|
||||
|
||||
@@ -17,11 +17,8 @@
|
||||
* - Works on thousands of websites out of the box
|
||||
*/
|
||||
|
||||
const path = require('path');
|
||||
const fs = require('fs');
|
||||
|
||||
// Import extension utilities
|
||||
const extensionUtils = require('../chrome/chrome_utils.js');
|
||||
const { installExtensionWithCache } = require('../chrome/chrome_utils.js');
|
||||
|
||||
// Extension metadata
|
||||
const EXTENSION = {
|
||||
@@ -29,69 +26,17 @@ const EXTENSION = {
|
||||
name: 'istilldontcareaboutcookies',
|
||||
};
|
||||
|
||||
// Get extensions directory from environment or use default
|
||||
const EXTENSIONS_DIR = process.env.CHROME_EXTENSIONS_DIR ||
|
||||
path.join(process.env.DATA_DIR || './data', 'personas', process.env.ACTIVE_PERSONA || 'Default', 'chrome_extensions');
|
||||
|
||||
/**
|
||||
* Install the I Still Don't Care About Cookies extension
|
||||
*/
|
||||
async function installCookiesExtension() {
|
||||
console.log('[*] Installing I Still Don\'t Care About Cookies extension...');
|
||||
|
||||
// Install the extension
|
||||
const extension = await extensionUtils.loadOrInstallExtension(EXTENSION, EXTENSIONS_DIR);
|
||||
|
||||
if (!extension) {
|
||||
console.error('[❌] Failed to install I Still Don\'t Care About Cookies extension');
|
||||
return null;
|
||||
}
|
||||
|
||||
console.log('[+] I Still Don\'t Care About Cookies extension installed');
|
||||
console.log('[+] Cookie banners will be automatically dismissed during archiving');
|
||||
|
||||
return extension;
|
||||
}
|
||||
|
||||
/**
|
||||
* Main entry point - install extension before archiving
|
||||
*
|
||||
* Note: This extension works out of the box with no configuration needed.
|
||||
* It automatically detects and dismisses cookie banners on page load.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Main entry point - install extension before archiving
|
||||
*/
|
||||
async function main() {
|
||||
// Check if extension is already cached
|
||||
const cacheFile = path.join(EXTENSIONS_DIR, 'istilldontcareaboutcookies.extension.json');
|
||||
const extension = await installExtensionWithCache(EXTENSION);
|
||||
|
||||
if (fs.existsSync(cacheFile)) {
|
||||
try {
|
||||
const cached = JSON.parse(fs.readFileSync(cacheFile, 'utf-8'));
|
||||
const manifestPath = path.join(cached.unpacked_path, 'manifest.json');
|
||||
|
||||
if (fs.existsSync(manifestPath)) {
|
||||
console.log('[*] I Still Don\'t Care About Cookies extension already installed (using cache)');
|
||||
return cached;
|
||||
}
|
||||
} catch (e) {
|
||||
// Cache file corrupted, re-install
|
||||
console.warn('[⚠️] Extension cache corrupted, re-installing...');
|
||||
}
|
||||
}
|
||||
|
||||
// Install extension
|
||||
const extension = await installCookiesExtension();
|
||||
|
||||
// Export extension metadata for chrome plugin to load
|
||||
if (extension) {
|
||||
// Write extension info to a cache file that chrome plugin can read
|
||||
await fs.promises.mkdir(EXTENSIONS_DIR, { recursive: true });
|
||||
await fs.promises.writeFile(
|
||||
cacheFile,
|
||||
JSON.stringify(extension, null, 2)
|
||||
);
|
||||
console.log(`[+] Extension metadata written to ${cacheFile}`);
|
||||
console.log('[+] Cookie banners will be automatically dismissed during archiving');
|
||||
}
|
||||
|
||||
return extension;
|
||||
@@ -100,7 +45,6 @@ async function main() {
|
||||
// Export functions for use by other plugins
|
||||
module.exports = {
|
||||
EXTENSION,
|
||||
installCookiesExtension,
|
||||
};
|
||||
|
||||
// Run if executed directly
|
||||
|
||||
@@ -22,38 +22,20 @@ from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
# Import shared Chrome test helpers
|
||||
from archivebox.plugins.chrome.tests.chrome_test_helpers import (
|
||||
get_test_env,
|
||||
setup_chrome_session,
|
||||
cleanup_chrome,
|
||||
)
|
||||
|
||||
|
||||
PLUGIN_DIR = Path(__file__).parent.parent
|
||||
PLUGINS_ROOT = PLUGIN_DIR.parent
|
||||
MODALCLOSER_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_modalcloser.*'), None)
|
||||
CHROME_LAUNCH_HOOK = PLUGINS_ROOT / 'chrome' / 'on_Crawl__30_chrome_launch.bg.js'
|
||||
CHROME_TAB_HOOK = PLUGINS_ROOT / 'chrome' / 'on_Snapshot__20_chrome_tab.bg.js'
|
||||
CHROME_NAVIGATE_HOOK = next((PLUGINS_ROOT / 'chrome').glob('on_Snapshot__*_chrome_navigate.*'), None)
|
||||
TEST_URL = 'https://www.singsing.movie/'
|
||||
COOKIE_CONSENT_TEST_URL = 'https://www.filmin.es/'
|
||||
|
||||
|
||||
def get_node_modules_dir():
|
||||
"""Get NODE_MODULES_DIR for tests, checking env first."""
|
||||
# Check if NODE_MODULES_DIR is already set in environment
|
||||
if os.environ.get('NODE_MODULES_DIR'):
|
||||
return Path(os.environ['NODE_MODULES_DIR'])
|
||||
# Otherwise compute from LIB_DIR
|
||||
from archivebox.config.common import STORAGE_CONFIG
|
||||
lib_dir = Path(os.environ.get('LIB_DIR') or str(STORAGE_CONFIG.LIB_DIR))
|
||||
return lib_dir / 'npm' / 'node_modules'
|
||||
|
||||
|
||||
NODE_MODULES_DIR = get_node_modules_dir()
|
||||
|
||||
|
||||
def get_test_env():
|
||||
"""Get environment with NODE_MODULES_DIR set correctly."""
|
||||
env = os.environ.copy()
|
||||
env['NODE_MODULES_DIR'] = str(NODE_MODULES_DIR)
|
||||
return env
|
||||
|
||||
|
||||
def test_hook_script_exists():
|
||||
"""Verify on_Snapshot hook exists."""
|
||||
assert MODALCLOSER_HOOK is not None, "Modalcloser hook not found"
|
||||
@@ -118,76 +100,6 @@ def test_fails_gracefully_without_chrome_session():
|
||||
f"Should mention chrome/CDP/puppeteer in error: {result.stderr}"
|
||||
|
||||
|
||||
def setup_chrome_session(tmpdir):
|
||||
"""Helper to set up Chrome session with tab."""
|
||||
crawl_dir = Path(tmpdir) / 'crawl'
|
||||
crawl_dir.mkdir()
|
||||
chrome_dir = crawl_dir / 'chrome'
|
||||
chrome_dir.mkdir()
|
||||
|
||||
env = get_test_env()
|
||||
env['CHROME_HEADLESS'] = 'true'
|
||||
|
||||
# Launch Chrome at crawl level
|
||||
chrome_launch_process = subprocess.Popen(
|
||||
['node', str(CHROME_LAUNCH_HOOK), '--crawl-id=test-modalcloser'],
|
||||
cwd=str(chrome_dir),
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
env=env
|
||||
)
|
||||
|
||||
# Wait for Chrome to launch
|
||||
for i in range(15):
|
||||
if chrome_launch_process.poll() is not None:
|
||||
stdout, stderr = chrome_launch_process.communicate()
|
||||
raise RuntimeError(f"Chrome launch failed:\nStdout: {stdout}\nStderr: {stderr}")
|
||||
if (chrome_dir / 'cdp_url.txt').exists():
|
||||
break
|
||||
time.sleep(1)
|
||||
|
||||
if not (chrome_dir / 'cdp_url.txt').exists():
|
||||
raise RuntimeError("Chrome CDP URL not found after 15s")
|
||||
|
||||
chrome_pid = int((chrome_dir / 'chrome.pid').read_text().strip())
|
||||
|
||||
# Create snapshot directory structure
|
||||
snapshot_dir = Path(tmpdir) / 'snapshot'
|
||||
snapshot_dir.mkdir()
|
||||
snapshot_chrome_dir = snapshot_dir / 'chrome'
|
||||
snapshot_chrome_dir.mkdir()
|
||||
|
||||
# Create tab
|
||||
tab_env = env.copy()
|
||||
tab_env['CRAWL_OUTPUT_DIR'] = str(crawl_dir)
|
||||
result = subprocess.run(
|
||||
['node', str(CHROME_TAB_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-modalcloser', '--crawl-id=test-modalcloser'],
|
||||
cwd=str(snapshot_chrome_dir),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60,
|
||||
env=tab_env
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"Tab creation failed: {result.stderr}")
|
||||
|
||||
return chrome_launch_process, chrome_pid, snapshot_chrome_dir
|
||||
|
||||
|
||||
def cleanup_chrome(chrome_launch_process, chrome_pid):
|
||||
"""Helper to clean up Chrome processes."""
|
||||
try:
|
||||
chrome_launch_process.send_signal(signal.SIGTERM)
|
||||
chrome_launch_process.wait(timeout=5)
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
os.kill(chrome_pid, signal.SIGKILL)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def test_background_script_handles_sigterm():
|
||||
"""Test that background script runs and handles SIGTERM correctly."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
@@ -195,7 +107,12 @@ def test_background_script_handles_sigterm():
|
||||
chrome_pid = None
|
||||
modalcloser_process = None
|
||||
try:
|
||||
chrome_launch_process, chrome_pid, snapshot_chrome_dir = setup_chrome_session(tmpdir)
|
||||
chrome_launch_process, chrome_pid, snapshot_chrome_dir = setup_chrome_session(
|
||||
Path(tmpdir),
|
||||
crawl_id='test-modalcloser',
|
||||
snapshot_id='snap-modalcloser',
|
||||
test_url=TEST_URL,
|
||||
)
|
||||
|
||||
# Create modalcloser output directory (sibling to chrome)
|
||||
modalcloser_dir = snapshot_chrome_dir.parent / 'modalcloser'
|
||||
@@ -265,7 +182,12 @@ def test_dialog_handler_logs_dialogs():
|
||||
chrome_pid = None
|
||||
modalcloser_process = None
|
||||
try:
|
||||
chrome_launch_process, chrome_pid, snapshot_chrome_dir = setup_chrome_session(tmpdir)
|
||||
chrome_launch_process, chrome_pid, snapshot_chrome_dir = setup_chrome_session(
|
||||
Path(tmpdir),
|
||||
crawl_id='test-dialog',
|
||||
snapshot_id='snap-dialog',
|
||||
test_url=TEST_URL,
|
||||
)
|
||||
|
||||
modalcloser_dir = snapshot_chrome_dir.parent / 'modalcloser'
|
||||
modalcloser_dir.mkdir()
|
||||
@@ -313,7 +235,12 @@ def test_config_poll_interval():
|
||||
chrome_pid = None
|
||||
modalcloser_process = None
|
||||
try:
|
||||
chrome_launch_process, chrome_pid, snapshot_chrome_dir = setup_chrome_session(tmpdir)
|
||||
chrome_launch_process, chrome_pid, snapshot_chrome_dir = setup_chrome_session(
|
||||
Path(tmpdir),
|
||||
crawl_id='test-poll',
|
||||
snapshot_id='snap-poll',
|
||||
test_url=TEST_URL,
|
||||
)
|
||||
|
||||
modalcloser_dir = snapshot_chrome_dir.parent / 'modalcloser'
|
||||
modalcloser_dir.mkdir()
|
||||
|
||||
@@ -16,11 +16,8 @@
|
||||
* - Extension will automatically solve reCAPTCHA, hCaptcha, Cloudflare Turnstile, etc.
|
||||
*/
|
||||
|
||||
const path = require('path');
|
||||
const fs = require('fs');
|
||||
|
||||
// Import extension utilities
|
||||
const extensionUtils = require('../chrome/chrome_utils.js');
|
||||
const { installExtensionWithCache } = require('../chrome/chrome_utils.js');
|
||||
|
||||
// Extension metadata
|
||||
const EXTENSION = {
|
||||
@@ -28,76 +25,25 @@ const EXTENSION = {
|
||||
name: 'twocaptcha',
|
||||
};
|
||||
|
||||
// Get extensions directory from environment or use default
|
||||
const EXTENSIONS_DIR = process.env.CHROME_EXTENSIONS_DIR ||
|
||||
path.join(process.env.DATA_DIR || './data', 'personas', process.env.ACTIVE_PERSONA || 'Default', 'chrome_extensions');
|
||||
|
||||
/**
|
||||
* Install and configure the 2captcha extension
|
||||
*/
|
||||
async function installCaptchaExtension() {
|
||||
console.log('[*] Installing 2captcha extension...');
|
||||
|
||||
// Install the extension
|
||||
const extension = await extensionUtils.loadOrInstallExtension(EXTENSION, EXTENSIONS_DIR);
|
||||
|
||||
if (!extension) {
|
||||
console.error('[❌] Failed to install 2captcha extension');
|
||||
return null;
|
||||
}
|
||||
|
||||
// Check if API key is configured
|
||||
const apiKey = process.env.TWOCAPTCHA_API_KEY || process.env.API_KEY_2CAPTCHA;
|
||||
if (!apiKey || apiKey === 'YOUR_API_KEY_HERE') {
|
||||
console.warn('[⚠️] 2captcha extension installed but TWOCAPTCHA_API_KEY not configured');
|
||||
console.warn('[⚠️] Set TWOCAPTCHA_API_KEY environment variable to enable automatic CAPTCHA solving');
|
||||
} else {
|
||||
console.log('[+] 2captcha extension installed and API key configured');
|
||||
}
|
||||
|
||||
return extension;
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: 2captcha configuration is now handled by chrome plugin
|
||||
* Main entry point - install extension before archiving
|
||||
*
|
||||
* Note: 2captcha configuration is handled by on_Crawl__25_configure_twocaptcha_extension_options.js
|
||||
* during first-time browser setup to avoid repeated configuration on every snapshot.
|
||||
* The API key is injected via chrome.storage API once per browser session.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Main entry point - install extension before archiving
|
||||
*/
|
||||
async function main() {
|
||||
// Check if extension is already cached
|
||||
const cacheFile = path.join(EXTENSIONS_DIR, 'twocaptcha.extension.json');
|
||||
const extension = await installExtensionWithCache(EXTENSION);
|
||||
|
||||
if (fs.existsSync(cacheFile)) {
|
||||
try {
|
||||
const cached = JSON.parse(fs.readFileSync(cacheFile, 'utf-8'));
|
||||
const manifestPath = path.join(cached.unpacked_path, 'manifest.json');
|
||||
|
||||
if (fs.existsSync(manifestPath)) {
|
||||
console.log('[*] 2captcha extension already installed (using cache)');
|
||||
return cached;
|
||||
}
|
||||
} catch (e) {
|
||||
// Cache file corrupted, re-install
|
||||
console.warn('[⚠️] Extension cache corrupted, re-installing...');
|
||||
}
|
||||
}
|
||||
|
||||
// Install extension
|
||||
const extension = await installCaptchaExtension();
|
||||
|
||||
// Export extension metadata for chrome plugin to load
|
||||
if (extension) {
|
||||
// Write extension info to a cache file that chrome plugin can read
|
||||
await fs.promises.mkdir(EXTENSIONS_DIR, { recursive: true });
|
||||
await fs.promises.writeFile(
|
||||
cacheFile,
|
||||
JSON.stringify(extension, null, 2)
|
||||
);
|
||||
console.log(`[+] Extension metadata written to ${cacheFile}`);
|
||||
// Check if API key is configured
|
||||
const apiKey = process.env.TWOCAPTCHA_API_KEY || process.env.API_KEY_2CAPTCHA;
|
||||
if (!apiKey || apiKey === 'YOUR_API_KEY_HERE') {
|
||||
console.warn('[⚠️] 2captcha extension installed but TWOCAPTCHA_API_KEY not configured');
|
||||
console.warn('[⚠️] Set TWOCAPTCHA_API_KEY environment variable to enable automatic CAPTCHA solving');
|
||||
} else {
|
||||
console.log('[+] 2captcha extension installed and API key configured');
|
||||
}
|
||||
}
|
||||
|
||||
return extension;
|
||||
@@ -106,7 +52,6 @@ async function main() {
|
||||
// Export functions for use by other plugins
|
||||
module.exports = {
|
||||
EXTENSION,
|
||||
installCaptchaExtension,
|
||||
};
|
||||
|
||||
// Run if executed directly
|
||||
|
||||
@@ -18,11 +18,8 @@
|
||||
* - Uses efficient blocking with filter lists
|
||||
*/
|
||||
|
||||
const path = require('path');
|
||||
const fs = require('fs');
|
||||
|
||||
// Import extension utilities
|
||||
const extensionUtils = require('../chrome/chrome_utils.js');
|
||||
const { installExtensionWithCache } = require('../chrome/chrome_utils.js');
|
||||
|
||||
// Extension metadata
|
||||
const EXTENSION = {
|
||||
@@ -30,69 +27,17 @@ const EXTENSION = {
|
||||
name: 'ublock',
|
||||
};
|
||||
|
||||
// Get extensions directory from environment or use default
|
||||
const EXTENSIONS_DIR = process.env.CHROME_EXTENSIONS_DIR ||
|
||||
path.join(process.env.DATA_DIR || './data', 'personas', process.env.ACTIVE_PERSONA || 'Default', 'chrome_extensions');
|
||||
|
||||
/**
|
||||
* Install the uBlock Origin extension
|
||||
*/
|
||||
async function installUblockExtension() {
|
||||
console.log('[*] Installing uBlock Origin extension...');
|
||||
|
||||
// Install the extension
|
||||
const extension = await extensionUtils.loadOrInstallExtension(EXTENSION, EXTENSIONS_DIR);
|
||||
|
||||
if (!extension) {
|
||||
console.error('[❌] Failed to install uBlock Origin extension');
|
||||
return null;
|
||||
}
|
||||
|
||||
console.log('[+] uBlock Origin extension installed');
|
||||
console.log('[+] Ads and trackers will be blocked during archiving');
|
||||
|
||||
return extension;
|
||||
}
|
||||
|
||||
/**
|
||||
* Main entry point - install extension before archiving
|
||||
*
|
||||
* Note: uBlock Origin works automatically with default filter lists.
|
||||
* No configuration needed - blocks ads, trackers, and malware domains out of the box.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Main entry point - install extension before archiving
|
||||
*/
|
||||
async function main() {
|
||||
// Check if extension is already cached
|
||||
const cacheFile = path.join(EXTENSIONS_DIR, 'ublock.extension.json');
|
||||
const extension = await installExtensionWithCache(EXTENSION);
|
||||
|
||||
if (fs.existsSync(cacheFile)) {
|
||||
try {
|
||||
const cached = JSON.parse(fs.readFileSync(cacheFile, 'utf-8'));
|
||||
const manifestPath = path.join(cached.unpacked_path, 'manifest.json');
|
||||
|
||||
if (fs.existsSync(manifestPath)) {
|
||||
console.log('[*] uBlock Origin extension already installed (using cache)');
|
||||
return cached;
|
||||
}
|
||||
} catch (e) {
|
||||
// Cache file corrupted, re-install
|
||||
console.warn('[⚠️] Extension cache corrupted, re-installing...');
|
||||
}
|
||||
}
|
||||
|
||||
// Install extension
|
||||
const extension = await installUblockExtension();
|
||||
|
||||
// Export extension metadata for chrome plugin to load
|
||||
if (extension) {
|
||||
// Write extension info to a cache file that chrome plugin can read
|
||||
await fs.promises.mkdir(EXTENSIONS_DIR, { recursive: true });
|
||||
await fs.promises.writeFile(
|
||||
cacheFile,
|
||||
JSON.stringify(extension, null, 2)
|
||||
);
|
||||
console.log(`[+] Extension metadata written to ${cacheFile}`);
|
||||
console.log('[+] Ads and trackers will be blocked during archiving');
|
||||
}
|
||||
|
||||
return extension;
|
||||
@@ -101,7 +46,6 @@ async function main() {
|
||||
// Export functions for use by other plugins
|
||||
module.exports = {
|
||||
EXTENSION,
|
||||
installUblockExtension,
|
||||
};
|
||||
|
||||
// Run if executed directly
|
||||
|
||||
Reference in New Issue
Block a user