mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
Move Chrome default args to config.json CHROME_ARGS
- Add comprehensive default CHROME_ARGS in config.json with 55+ flags for deterministic rendering, security, performance, and UI suppression - Update chrome_utils.js launchChromium() to read CHROME_ARGS and CHROME_ARGS_EXTRA from environment variables (set by get_config()) - Add getEnvArray() helper to parse JSON arrays or comma-separated strings from environment variables - Separate args into three categories: 1. baseArgs: Static flags from CHROME_ARGS config (configurable) 2. dynamicArgs: Runtime-computed flags (port, sandbox, headless, etc.) 3. extraArgs: User overrides from CHROME_ARGS_EXTRA - Add CHROME_SANDBOX config option to control --no-sandbox flag Args are now configurable via: - config.json defaults - ArchiveBox.conf file - Environment variables - Per-crawl/snapshot config overrides
This commit is contained in:
@@ -56,6 +56,36 @@ function getEnvInt(name, defaultValue = 0) {
|
||||
return isNaN(val) ? defaultValue : val;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get array environment variable (JSON array or comma-separated string).
|
||||
* @param {string} name - Environment variable name
|
||||
* @param {string[]} [defaultValue=[]] - Default value if not set
|
||||
* @returns {string[]} - Array of strings
|
||||
*/
|
||||
function getEnvArray(name, defaultValue = []) {
|
||||
const val = getEnv(name, '');
|
||||
if (!val) return defaultValue;
|
||||
|
||||
// Try parsing as JSON array first
|
||||
if (val.startsWith('[')) {
|
||||
try {
|
||||
const parsed = JSON.parse(val);
|
||||
if (Array.isArray(parsed)) return parsed;
|
||||
} catch (e) {
|
||||
// Fall through to comma-separated parsing
|
||||
}
|
||||
}
|
||||
|
||||
// Parse as comma-separated (but be careful with args that contain commas)
|
||||
// For Chrome args, we split on comma followed by '--' to be safe
|
||||
if (val.includes(',--')) {
|
||||
return val.split(/,(?=--)/).map(s => s.trim()).filter(Boolean);
|
||||
}
|
||||
|
||||
// Simple comma-separated
|
||||
return val.split(',').map(s => s.trim()).filter(Boolean);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse resolution string into width/height.
|
||||
* @param {string} resolution - Resolution string like "1440,2000"
|
||||
@@ -298,6 +328,7 @@ function killZombieChrome(dataDir = null) {
|
||||
* @param {string} [options.userDataDir] - Chrome user data directory for persistent sessions
|
||||
* @param {string} [options.resolution='1440,2000'] - Window resolution
|
||||
* @param {boolean} [options.headless=true] - Run in headless mode
|
||||
* @param {boolean} [options.sandbox=true] - Enable Chrome sandbox
|
||||
* @param {boolean} [options.checkSsl=true] - Check SSL certificates
|
||||
* @param {string[]} [options.extensionPaths=[]] - Paths to unpacked extensions
|
||||
* @param {boolean} [options.killZombies=true] - Kill zombie processes first
|
||||
@@ -310,6 +341,7 @@ async function launchChromium(options = {}) {
|
||||
userDataDir = getEnv('CHROME_USER_DATA_DIR'),
|
||||
resolution = getEnv('CHROME_RESOLUTION') || getEnv('RESOLUTION', '1440,2000'),
|
||||
headless = getEnvBool('CHROME_HEADLESS', true),
|
||||
sandbox = getEnvBool('CHROME_SANDBOX', true),
|
||||
checkSsl = getEnvBool('CHROME_CHECK_SSL_VALIDITY', getEnvBool('CHECK_SSL_VALIDITY', true)),
|
||||
extensionPaths = [],
|
||||
killZombies = true,
|
||||
@@ -353,38 +385,43 @@ async function launchChromium(options = {}) {
|
||||
const debugPort = await findFreePort();
|
||||
console.error(`[*] Using debug port: ${debugPort}`);
|
||||
|
||||
// Build Chrome arguments
|
||||
const chromiumArgs = [
|
||||
// Get base Chrome args from config (static flags from CHROME_ARGS env var)
|
||||
// These come from config.json defaults, merged by get_config() in Python
|
||||
const baseArgs = getEnvArray('CHROME_ARGS', []);
|
||||
|
||||
// Get extra user-provided args
|
||||
const extraArgs = getEnvArray('CHROME_ARGS_EXTRA', []);
|
||||
|
||||
// Build dynamic Chrome arguments (these must be computed at runtime)
|
||||
const dynamicArgs = [
|
||||
// Remote debugging setup
|
||||
`--remote-debugging-port=${debugPort}`,
|
||||
'--remote-debugging-address=127.0.0.1',
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
|
||||
// Sandbox settings (disable in Docker)
|
||||
...(sandbox ? [] : ['--no-sandbox', '--disable-setuid-sandbox']),
|
||||
|
||||
// Docker-specific workarounds
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-gpu',
|
||||
'--disable-sync',
|
||||
'--no-first-run',
|
||||
'--no-default-browser-check',
|
||||
'--disable-default-apps',
|
||||
'--disable-infobars',
|
||||
'--disable-blink-features=AutomationControlled',
|
||||
'--disable-component-update',
|
||||
'--disable-domain-reliability',
|
||||
'--disable-breakpad',
|
||||
'--disable-background-networking',
|
||||
'--disable-background-timer-throttling',
|
||||
'--disable-backgrounding-occluded-windows',
|
||||
'--disable-renderer-backgrounding',
|
||||
'--disable-ipc-flooding-protection',
|
||||
'--password-store=basic',
|
||||
'--use-mock-keychain',
|
||||
'--font-render-hinting=none',
|
||||
'--force-color-profile=srgb',
|
||||
|
||||
// Window size
|
||||
`--window-size=${width},${height}`,
|
||||
|
||||
// User data directory (for persistent sessions with persona)
|
||||
...(userDataDir ? [`--user-data-dir=${userDataDir}`] : []),
|
||||
|
||||
// Headless mode
|
||||
...(headless ? ['--headless=new'] : []),
|
||||
|
||||
// SSL certificate checking
|
||||
...(checkSsl ? [] : ['--ignore-certificate-errors']),
|
||||
];
|
||||
|
||||
// Combine all args: base (from config) + dynamic (runtime) + extra (user overrides)
|
||||
// Dynamic args come after base so they can override if needed
|
||||
const chromiumArgs = [...baseArgs, ...dynamicArgs, ...extraArgs];
|
||||
|
||||
// Add extension loading flags
|
||||
if (extensionPaths.length > 0) {
|
||||
const extPathsArg = extensionPaths.join(',');
|
||||
|
||||
@@ -42,7 +42,7 @@
|
||||
"CHROME_USER_DATA_DIR": {
|
||||
"type": "string",
|
||||
"default": "",
|
||||
"description": "Path to Chrome user data directory for persistent sessions"
|
||||
"description": "Path to Chrome user data directory for persistent sessions (derived from ACTIVE_PERSONA if not set)"
|
||||
},
|
||||
"CHROME_USER_AGENT": {
|
||||
"type": "string",
|
||||
@@ -53,16 +53,74 @@
|
||||
"CHROME_ARGS": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"default": [],
|
||||
"default": [
|
||||
"--no-first-run",
|
||||
"--no-default-browser-check",
|
||||
"--disable-default-apps",
|
||||
"--disable-sync",
|
||||
"--disable-infobars",
|
||||
"--disable-blink-features=AutomationControlled",
|
||||
"--disable-component-update",
|
||||
"--disable-domain-reliability",
|
||||
"--disable-breakpad",
|
||||
"--disable-client-side-phishing-detection",
|
||||
"--disable-hang-monitor",
|
||||
"--disable-speech-synthesis-api",
|
||||
"--disable-speech-api",
|
||||
"--disable-print-preview",
|
||||
"--disable-notifications",
|
||||
"--disable-desktop-notifications",
|
||||
"--disable-popup-blocking",
|
||||
"--disable-prompt-on-repost",
|
||||
"--disable-external-intent-requests",
|
||||
"--disable-session-crashed-bubble",
|
||||
"--disable-search-engine-choice-screen",
|
||||
"--disable-datasaver-prompt",
|
||||
"--ash-no-nudges",
|
||||
"--hide-crash-restore-bubble",
|
||||
"--suppress-message-center-popups",
|
||||
"--noerrdialogs",
|
||||
"--no-pings",
|
||||
"--silent-debugger-extension-api",
|
||||
"--deny-permission-prompts",
|
||||
"--safebrowsing-disable-auto-update",
|
||||
"--metrics-recording-only",
|
||||
"--password-store=basic",
|
||||
"--use-mock-keychain",
|
||||
"--disable-cookie-encryption",
|
||||
"--font-render-hinting=none",
|
||||
"--force-color-profile=srgb",
|
||||
"--disable-partial-raster",
|
||||
"--disable-skia-runtime-opts",
|
||||
"--disable-2d-canvas-clip-aa",
|
||||
"--enable-webgl",
|
||||
"--hide-scrollbars",
|
||||
"--export-tagged-pdf",
|
||||
"--generate-pdf-document-outline",
|
||||
"--disable-lazy-loading",
|
||||
"--disable-renderer-backgrounding",
|
||||
"--disable-background-networking",
|
||||
"--disable-background-timer-throttling",
|
||||
"--disable-backgrounding-occluded-windows",
|
||||
"--disable-ipc-flooding-protection",
|
||||
"--disable-extensions-http-throttling",
|
||||
"--disable-field-trial-config",
|
||||
"--disable-back-forward-cache",
|
||||
"--autoplay-policy=no-user-gesture-required",
|
||||
"--disable-gesture-requirement-for-media-playback",
|
||||
"--lang=en-US,en;q=0.9",
|
||||
"--log-level=2",
|
||||
"--enable-logging=stderr"
|
||||
],
|
||||
"x-aliases": ["CHROME_DEFAULT_ARGS"],
|
||||
"description": "Default Chrome command-line arguments"
|
||||
"description": "Default Chrome command-line arguments (static flags only, dynamic args like --user-data-dir are added at runtime)"
|
||||
},
|
||||
"CHROME_ARGS_EXTRA": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"default": [],
|
||||
"x-aliases": ["CHROME_EXTRA_ARGS"],
|
||||
"description": "Extra arguments to append to Chrome command"
|
||||
"description": "Extra arguments to append to Chrome command (for user customization)"
|
||||
},
|
||||
"CHROME_PAGELOAD_TIMEOUT": {
|
||||
"type": "integer",
|
||||
|
||||
Reference in New Issue
Block a user