mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-01-04 09:55:33 +10:00
fix cubic comments
This commit is contained in:
@@ -621,18 +621,6 @@ class Process(ModelWithHealthStats):
|
|||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def is_alive(self) -> bool:
|
|
||||||
"""Check if this process is still running."""
|
|
||||||
from archivebox.misc.process_utils import validate_pid_file
|
|
||||||
|
|
||||||
if self.status == self.StatusChoices.EXITED:
|
|
||||||
return False
|
|
||||||
|
|
||||||
if not self.pid:
|
|
||||||
return False
|
|
||||||
|
|
||||||
return validate_pid_file(self.pid_file, self.cmd_file)
|
|
||||||
|
|
||||||
def kill(self, signal_num: int = 15) -> bool:
|
def kill(self, signal_num: int = 15) -> bool:
|
||||||
"""
|
"""
|
||||||
Kill this process and update status.
|
Kill this process and update status.
|
||||||
@@ -712,7 +700,7 @@ class Process(ModelWithHealthStats):
|
|||||||
Wait for process to exit, polling periodically.
|
Wait for process to exit, polling periodically.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
timeout: Max seconds to wait (None = use self.timeout)
|
timeout: Max seconds to wait (None = use self.timeout, or config.TIMEOUT * 5 if that's also None)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
exit_code
|
exit_code
|
||||||
@@ -721,8 +709,10 @@ class Process(ModelWithHealthStats):
|
|||||||
TimeoutError if process doesn't exit in time
|
TimeoutError if process doesn't exit in time
|
||||||
"""
|
"""
|
||||||
import time
|
import time
|
||||||
|
from archivebox import config
|
||||||
|
|
||||||
timeout = timeout or self.timeout
|
# Require a timeout - default to config.TIMEOUT * 5 (typically 300s)
|
||||||
|
timeout = timeout or self.timeout or (config.TIMEOUT * 5)
|
||||||
start = time.time()
|
start = time.time()
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
|||||||
@@ -120,6 +120,7 @@ class BaseConfigSet(BaseSettings):
|
|||||||
def get_config(
|
def get_config(
|
||||||
scope: str = "global",
|
scope: str = "global",
|
||||||
defaults: Optional[Dict] = None,
|
defaults: Optional[Dict] = None,
|
||||||
|
persona: Any = None,
|
||||||
user: Any = None,
|
user: Any = None,
|
||||||
crawl: Any = None,
|
crawl: Any = None,
|
||||||
snapshot: Any = None,
|
snapshot: Any = None,
|
||||||
@@ -131,14 +132,16 @@ def get_config(
|
|||||||
1. Per-snapshot config (snapshot.config JSON field)
|
1. Per-snapshot config (snapshot.config JSON field)
|
||||||
2. Per-crawl config (crawl.config JSON field)
|
2. Per-crawl config (crawl.config JSON field)
|
||||||
3. Per-user config (user.config JSON field)
|
3. Per-user config (user.config JSON field)
|
||||||
4. Environment variables
|
4. Per-persona config (persona.get_derived_config() - includes CHROME_USER_DATA_DIR etc.)
|
||||||
5. Config file (ArchiveBox.conf)
|
5. Environment variables
|
||||||
6. Plugin schema defaults (config.json)
|
6. Config file (ArchiveBox.conf)
|
||||||
7. Core config defaults
|
7. Plugin schema defaults (config.json)
|
||||||
|
8. Core config defaults
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
scope: Config scope ('global', 'crawl', 'snapshot', etc.)
|
scope: Config scope ('global', 'crawl', 'snapshot', etc.)
|
||||||
defaults: Default values to start with
|
defaults: Default values to start with
|
||||||
|
persona: Persona object (provides derived paths like CHROME_USER_DATA_DIR)
|
||||||
user: User object with config JSON field
|
user: User object with config JSON field
|
||||||
crawl: Crawl object with config JSON field
|
crawl: Crawl object with config JSON field
|
||||||
snapshot: Snapshot object with config JSON field
|
snapshot: Snapshot object with config JSON field
|
||||||
@@ -205,6 +208,10 @@ def get_config(
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# Apply persona config overrides (includes derived paths like CHROME_USER_DATA_DIR)
|
||||||
|
if persona and hasattr(persona, "get_derived_config"):
|
||||||
|
config.update(persona.get_derived_config())
|
||||||
|
|
||||||
# Apply user config overrides
|
# Apply user config overrides
|
||||||
if user and hasattr(user, "config") and user.config:
|
if user and hasattr(user, "config") and user.config:
|
||||||
config.update(user.config)
|
config.update(user.config)
|
||||||
|
|||||||
@@ -480,12 +480,39 @@ for url_str, num_urls in _test_url_strs.items():
|
|||||||
|
|
||||||
def chrome_cleanup():
|
def chrome_cleanup():
|
||||||
"""
|
"""
|
||||||
Cleans up any state or runtime files that chrome leaves behind when killed by
|
Cleans up any state or runtime files that Chrome leaves behind when killed by
|
||||||
a timeout or other error
|
a timeout or other error. Handles:
|
||||||
|
- All persona chrome_user_data directories (via Persona.cleanup_chrome_all())
|
||||||
|
- Explicit CHROME_USER_DATA_DIR from config
|
||||||
|
- Legacy Docker chromium path
|
||||||
"""
|
"""
|
||||||
import os
|
import os
|
||||||
|
from pathlib import Path
|
||||||
from archivebox.config.permissions import IN_DOCKER
|
from archivebox.config.permissions import IN_DOCKER
|
||||||
|
|
||||||
|
# Clean up all persona chrome directories using Persona class
|
||||||
|
try:
|
||||||
|
from archivebox.personas.models import Persona
|
||||||
|
|
||||||
|
# Clean up all personas
|
||||||
|
Persona.cleanup_chrome_all()
|
||||||
|
|
||||||
|
# Also clean up the active persona's explicit CHROME_USER_DATA_DIR if set
|
||||||
|
# (in case it's a custom path not under PERSONAS_DIR)
|
||||||
|
from archivebox.config.configset import get_config
|
||||||
|
config = get_config()
|
||||||
|
chrome_user_data_dir = config.get('CHROME_USER_DATA_DIR')
|
||||||
|
if chrome_user_data_dir:
|
||||||
|
singleton_lock = Path(chrome_user_data_dir) / 'SingletonLock'
|
||||||
|
if os.path.lexists(singleton_lock):
|
||||||
|
try:
|
||||||
|
singleton_lock.unlink()
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
except Exception:
|
||||||
|
pass # Persona/config not available during early startup
|
||||||
|
|
||||||
|
# Legacy Docker cleanup (for backwards compatibility)
|
||||||
if IN_DOCKER:
|
if IN_DOCKER:
|
||||||
singleton_lock = "/home/archivebox/.config/chromium/SingletonLock"
|
singleton_lock = "/home/archivebox/.config/chromium/SingletonLock"
|
||||||
if os.path.lexists(singleton_lock):
|
if os.path.lexists(singleton_lock):
|
||||||
|
|||||||
@@ -1,59 +1,155 @@
|
|||||||
# from django.db import models
|
"""
|
||||||
|
Persona management for ArchiveBox.
|
||||||
|
|
||||||
# from django.conf import settings
|
A Persona represents a browser profile/identity used for archiving.
|
||||||
|
Each persona has its own:
|
||||||
|
- Chrome user data directory (for cookies, localStorage, extensions, etc.)
|
||||||
|
- Chrome extensions directory
|
||||||
|
- Cookies file
|
||||||
|
- Config overrides
|
||||||
|
"""
|
||||||
|
|
||||||
|
__package__ = 'archivebox.personas'
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import TYPE_CHECKING, Iterator
|
||||||
|
|
||||||
|
from django.db import models
|
||||||
|
from django.conf import settings
|
||||||
|
from django.utils import timezone
|
||||||
|
|
||||||
|
from archivebox.base_models.models import ModelWithConfig, get_or_create_system_user_pk
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from django.db.models import QuerySet
|
||||||
|
|
||||||
|
|
||||||
# class Persona(models.Model):
|
class Persona(ModelWithConfig):
|
||||||
# """Aka a "SessionType", its a template for a crawler browsing session containing some config."""
|
"""
|
||||||
|
Browser persona/profile for archiving sessions.
|
||||||
|
|
||||||
# id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
|
Each persona provides:
|
||||||
|
- CHROME_USER_DATA_DIR: Chrome profile directory
|
||||||
# created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False)
|
- CHROME_EXTENSIONS_DIR: Installed extensions directory
|
||||||
# created_at = AutoDateTimeField(default=None, null=False, db_index=True)
|
- COOKIES_FILE: Cookies file for wget/curl
|
||||||
# modified_at = models.DateTimeField(auto_now=True)
|
- config: JSON field with persona-specific config overrides
|
||||||
|
|
||||||
# name = models.CharField(max_length=100, blank=False, null=False, editable=False)
|
|
||||||
|
|
||||||
# persona_dir = models.FilePathField(path=settings.PERSONAS_DIR, allow_files=False, allow_folders=True, blank=True, null=False, editable=False)
|
|
||||||
# config = models.JSONField(default=dict)
|
|
||||||
# # e.g. {
|
|
||||||
# # USER_AGENT: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
|
|
||||||
# # COOKIES_TXT_FILE: '/path/to/cookies.txt',
|
|
||||||
# # CHROME_USER_DATA_DIR: '/path/to/chrome/user/data/dir',
|
|
||||||
# # CHECK_SSL_VALIDITY: False,
|
|
||||||
# # SAVE_ARCHIVEDOTORG: True,
|
|
||||||
# # CHROME_BINARY: 'chromium'
|
|
||||||
# # ...
|
|
||||||
# # }
|
|
||||||
# # domain_allowlist = models.CharField(max_length=1024, blank=True, null=False, default='')
|
|
||||||
# # domain_denylist = models.CharField(max_length=1024, blank=True, null=False, default='')
|
|
||||||
|
|
||||||
# class Meta:
|
|
||||||
# app_label = 'personas'
|
|
||||||
# verbose_name = 'Session Type'
|
|
||||||
# verbose_name_plural = 'Session Types'
|
|
||||||
# unique_together = (('created_by', 'name'),)
|
|
||||||
|
|
||||||
|
|
||||||
# def clean(self):
|
Usage:
|
||||||
# self.persona_dir = settings.PERSONAS_DIR / self.name
|
# Get persona and its derived config
|
||||||
# assert self.persona_dir == settings.PERSONAS_DIR / self.name, f'Persona dir {self.persona_dir} must match settings.PERSONAS_DIR / self.name'
|
config = get_config(persona=crawl.persona, crawl=crawl, snapshot=snapshot)
|
||||||
|
chrome_dir = config['CHROME_USER_DATA_DIR']
|
||||||
|
|
||||||
# # make sure config keys all exist in FLAT_CONFIG
|
# Or access directly from persona
|
||||||
# # make sure config values all match expected types
|
persona = Persona.objects.get(name='Default')
|
||||||
# pass
|
persona.CHROME_USER_DATA_DIR # -> Path to chrome_user_data
|
||||||
|
"""
|
||||||
# def save(self, *args, **kwargs):
|
|
||||||
# self.full_clean()
|
name = models.CharField(max_length=64, unique=True)
|
||||||
|
created_at = models.DateTimeField(default=timezone.now, db_index=True)
|
||||||
# # make sure basic file structure is present in persona_dir:
|
created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk)
|
||||||
# # - PERSONAS_DIR / self.name /
|
|
||||||
# # - chrome_profile/
|
class Meta:
|
||||||
# # - chrome_downloads/
|
app_label = 'personas'
|
||||||
# # - chrome_extensions/
|
|
||||||
# # - cookies.txt
|
def __str__(self) -> str:
|
||||||
# # - auth.json
|
return self.name
|
||||||
# # - config.json # json dump of the model
|
|
||||||
|
@property
|
||||||
# super().save(*args, **kwargs)
|
def path(self) -> Path:
|
||||||
|
"""Path to persona directory under PERSONAS_DIR."""
|
||||||
|
from archivebox.config.constants import CONSTANTS
|
||||||
|
return CONSTANTS.PERSONAS_DIR / self.name
|
||||||
|
|
||||||
|
@property
|
||||||
|
def CHROME_USER_DATA_DIR(self) -> str:
|
||||||
|
"""Derived path to Chrome user data directory for this persona."""
|
||||||
|
return str(self.path / 'chrome_user_data')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def CHROME_EXTENSIONS_DIR(self) -> str:
|
||||||
|
"""Derived path to Chrome extensions directory for this persona."""
|
||||||
|
return str(self.path / 'chrome_extensions')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def COOKIES_FILE(self) -> str:
|
||||||
|
"""Derived path to cookies.txt file for this persona (if exists)."""
|
||||||
|
cookies_path = self.path / 'cookies.txt'
|
||||||
|
return str(cookies_path) if cookies_path.exists() else ''
|
||||||
|
|
||||||
|
def get_derived_config(self) -> dict:
|
||||||
|
"""
|
||||||
|
Get config dict with derived paths filled in.
|
||||||
|
|
||||||
|
Returns dict with:
|
||||||
|
- All values from self.config JSONField
|
||||||
|
- CHROME_USER_DATA_DIR (derived from persona path)
|
||||||
|
- CHROME_EXTENSIONS_DIR (derived from persona path)
|
||||||
|
- COOKIES_FILE (derived from persona path, if file exists)
|
||||||
|
- ACTIVE_PERSONA (set to this persona's name)
|
||||||
|
"""
|
||||||
|
derived = dict(self.config or {})
|
||||||
|
|
||||||
|
# Add derived paths (don't override if explicitly set in config)
|
||||||
|
if 'CHROME_USER_DATA_DIR' not in derived:
|
||||||
|
derived['CHROME_USER_DATA_DIR'] = self.CHROME_USER_DATA_DIR
|
||||||
|
if 'CHROME_EXTENSIONS_DIR' not in derived:
|
||||||
|
derived['CHROME_EXTENSIONS_DIR'] = self.CHROME_EXTENSIONS_DIR
|
||||||
|
if 'COOKIES_FILE' not in derived and self.COOKIES_FILE:
|
||||||
|
derived['COOKIES_FILE'] = self.COOKIES_FILE
|
||||||
|
|
||||||
|
# Always set ACTIVE_PERSONA to this persona's name
|
||||||
|
derived['ACTIVE_PERSONA'] = self.name
|
||||||
|
|
||||||
|
return derived
|
||||||
|
|
||||||
|
def ensure_dirs(self) -> None:
|
||||||
|
"""Create persona directories if they don't exist."""
|
||||||
|
self.path.mkdir(parents=True, exist_ok=True)
|
||||||
|
(self.path / 'chrome_user_data').mkdir(parents=True, exist_ok=True)
|
||||||
|
(self.path / 'chrome_extensions').mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
def cleanup_chrome(self) -> bool:
|
||||||
|
"""
|
||||||
|
Clean up Chrome state files (SingletonLock, etc.) for this persona.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if cleanup was performed, False if no cleanup needed
|
||||||
|
"""
|
||||||
|
cleaned = False
|
||||||
|
chrome_dir = self.path / 'chrome_user_data'
|
||||||
|
|
||||||
|
if not chrome_dir.exists():
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Clean up SingletonLock files
|
||||||
|
for lock_file in chrome_dir.glob('**/SingletonLock'):
|
||||||
|
try:
|
||||||
|
lock_file.unlink()
|
||||||
|
cleaned = True
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Clean up SingletonSocket files
|
||||||
|
for socket_file in chrome_dir.glob('**/SingletonSocket'):
|
||||||
|
try:
|
||||||
|
socket_file.unlink()
|
||||||
|
cleaned = True
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return cleaned
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_or_create_default(cls) -> 'Persona':
|
||||||
|
"""Get or create the Default persona."""
|
||||||
|
persona, _ = cls.objects.get_or_create(name='Default')
|
||||||
|
return persona
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def cleanup_chrome_all(cls) -> int:
|
||||||
|
"""Clean up Chrome state files for all personas."""
|
||||||
|
cleaned = 0
|
||||||
|
for persona in cls.objects.all():
|
||||||
|
if persona.cleanup_chrome():
|
||||||
|
cleaned += 1
|
||||||
|
return cleaned
|
||||||
|
|||||||
@@ -56,6 +56,40 @@ function getEnvInt(name, defaultValue = 0) {
|
|||||||
return isNaN(val) ? defaultValue : val;
|
return isNaN(val) ? defaultValue : val;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get array environment variable (JSON array or comma-separated string).
|
||||||
|
*
|
||||||
|
* Parsing strategy:
|
||||||
|
* - If value starts with '[', parse as JSON array
|
||||||
|
* - Otherwise, parse as comma-separated values
|
||||||
|
*
|
||||||
|
* This prevents incorrect splitting of arguments that contain internal commas.
|
||||||
|
* For arguments with commas, use JSON format:
|
||||||
|
* CHROME_ARGS='["--user-data-dir=/path/with,comma", "--window-size=1440,900"]'
|
||||||
|
*
|
||||||
|
* @param {string} name - Environment variable name
|
||||||
|
* @param {string[]} [defaultValue=[]] - Default value if not set
|
||||||
|
* @returns {string[]} - Array of strings
|
||||||
|
*/
|
||||||
|
function getEnvArray(name, defaultValue = []) {
|
||||||
|
const val = getEnv(name, '');
|
||||||
|
if (!val) return defaultValue;
|
||||||
|
|
||||||
|
// If starts with '[', parse as JSON array
|
||||||
|
if (val.startsWith('[')) {
|
||||||
|
try {
|
||||||
|
const parsed = JSON.parse(val);
|
||||||
|
if (Array.isArray(parsed)) return parsed;
|
||||||
|
} catch (e) {
|
||||||
|
console.error(`[!] Failed to parse ${name} as JSON array: ${e.message}`);
|
||||||
|
// Fall through to comma-separated parsing
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse as comma-separated values
|
||||||
|
return val.split(',').map(s => s.trim()).filter(Boolean);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse resolution string into width/height.
|
* Parse resolution string into width/height.
|
||||||
* @param {string} resolution - Resolution string like "1440,2000"
|
* @param {string} resolution - Resolution string like "1440,2000"
|
||||||
@@ -257,6 +291,31 @@ function killZombieChrome(dataDir = null) {
|
|||||||
console.error('[+] No zombies found');
|
console.error('[+] No zombies found');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Clean up stale SingletonLock files from persona chrome_user_data directories
|
||||||
|
const personasDir = path.join(dataDir, 'personas');
|
||||||
|
if (fs.existsSync(personasDir)) {
|
||||||
|
try {
|
||||||
|
const personas = fs.readdirSync(personasDir, { withFileTypes: true });
|
||||||
|
for (const persona of personas) {
|
||||||
|
if (!persona.isDirectory()) continue;
|
||||||
|
|
||||||
|
const userDataDir = path.join(personasDir, persona.name, 'chrome_user_data');
|
||||||
|
const singletonLock = path.join(userDataDir, 'SingletonLock');
|
||||||
|
|
||||||
|
if (fs.existsSync(singletonLock)) {
|
||||||
|
try {
|
||||||
|
fs.unlinkSync(singletonLock);
|
||||||
|
console.error(`[+] Removed stale SingletonLock: ${singletonLock}`);
|
||||||
|
} catch (e) {
|
||||||
|
// Ignore - may be in use by active Chrome
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
// Ignore errors scanning personas directory
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return killed;
|
return killed;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -270,8 +329,10 @@ function killZombieChrome(dataDir = null) {
|
|||||||
* @param {Object} options - Launch options
|
* @param {Object} options - Launch options
|
||||||
* @param {string} [options.binary] - Chrome binary path (auto-detected if not provided)
|
* @param {string} [options.binary] - Chrome binary path (auto-detected if not provided)
|
||||||
* @param {string} [options.outputDir='chrome'] - Directory for output files
|
* @param {string} [options.outputDir='chrome'] - Directory for output files
|
||||||
|
* @param {string} [options.userDataDir] - Chrome user data directory for persistent sessions
|
||||||
* @param {string} [options.resolution='1440,2000'] - Window resolution
|
* @param {string} [options.resolution='1440,2000'] - Window resolution
|
||||||
* @param {boolean} [options.headless=true] - Run in headless mode
|
* @param {boolean} [options.headless=true] - Run in headless mode
|
||||||
|
* @param {boolean} [options.sandbox=true] - Enable Chrome sandbox
|
||||||
* @param {boolean} [options.checkSsl=true] - Check SSL certificates
|
* @param {boolean} [options.checkSsl=true] - Check SSL certificates
|
||||||
* @param {string[]} [options.extensionPaths=[]] - Paths to unpacked extensions
|
* @param {string[]} [options.extensionPaths=[]] - Paths to unpacked extensions
|
||||||
* @param {boolean} [options.killZombies=true] - Kill zombie processes first
|
* @param {boolean} [options.killZombies=true] - Kill zombie processes first
|
||||||
@@ -281,8 +342,10 @@ async function launchChromium(options = {}) {
|
|||||||
const {
|
const {
|
||||||
binary = findChromium(),
|
binary = findChromium(),
|
||||||
outputDir = 'chrome',
|
outputDir = 'chrome',
|
||||||
|
userDataDir = getEnv('CHROME_USER_DATA_DIR'),
|
||||||
resolution = getEnv('CHROME_RESOLUTION') || getEnv('RESOLUTION', '1440,2000'),
|
resolution = getEnv('CHROME_RESOLUTION') || getEnv('RESOLUTION', '1440,2000'),
|
||||||
headless = getEnvBool('CHROME_HEADLESS', true),
|
headless = getEnvBool('CHROME_HEADLESS', true),
|
||||||
|
sandbox = getEnvBool('CHROME_SANDBOX', true),
|
||||||
checkSsl = getEnvBool('CHROME_CHECK_SSL_VALIDITY', getEnvBool('CHECK_SSL_VALIDITY', true)),
|
checkSsl = getEnvBool('CHROME_CHECK_SSL_VALIDITY', getEnvBool('CHECK_SSL_VALIDITY', true)),
|
||||||
extensionPaths = [],
|
extensionPaths = [],
|
||||||
killZombies = true,
|
killZombies = true,
|
||||||
@@ -304,41 +367,65 @@ async function launchChromium(options = {}) {
|
|||||||
fs.mkdirSync(outputDir, { recursive: true });
|
fs.mkdirSync(outputDir, { recursive: true });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Create user data directory if specified and doesn't exist
|
||||||
|
if (userDataDir) {
|
||||||
|
if (!fs.existsSync(userDataDir)) {
|
||||||
|
fs.mkdirSync(userDataDir, { recursive: true });
|
||||||
|
console.error(`[*] Created user data directory: ${userDataDir}`);
|
||||||
|
}
|
||||||
|
// Clean up any stale SingletonLock file from previous crashed sessions
|
||||||
|
const singletonLock = path.join(userDataDir, 'SingletonLock');
|
||||||
|
if (fs.existsSync(singletonLock)) {
|
||||||
|
try {
|
||||||
|
fs.unlinkSync(singletonLock);
|
||||||
|
console.error(`[*] Removed stale SingletonLock: ${singletonLock}`);
|
||||||
|
} catch (e) {
|
||||||
|
console.error(`[!] Failed to remove SingletonLock: ${e.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Find a free port
|
// Find a free port
|
||||||
const debugPort = await findFreePort();
|
const debugPort = await findFreePort();
|
||||||
console.error(`[*] Using debug port: ${debugPort}`);
|
console.error(`[*] Using debug port: ${debugPort}`);
|
||||||
|
|
||||||
// Build Chrome arguments
|
// Get base Chrome args from config (static flags from CHROME_ARGS env var)
|
||||||
const chromiumArgs = [
|
// These come from config.json defaults, merged by get_config() in Python
|
||||||
|
const baseArgs = getEnvArray('CHROME_ARGS', []);
|
||||||
|
|
||||||
|
// Get extra user-provided args
|
||||||
|
const extraArgs = getEnvArray('CHROME_ARGS_EXTRA', []);
|
||||||
|
|
||||||
|
// Build dynamic Chrome arguments (these must be computed at runtime)
|
||||||
|
const dynamicArgs = [
|
||||||
|
// Remote debugging setup
|
||||||
`--remote-debugging-port=${debugPort}`,
|
`--remote-debugging-port=${debugPort}`,
|
||||||
'--remote-debugging-address=127.0.0.1',
|
'--remote-debugging-address=127.0.0.1',
|
||||||
'--no-sandbox',
|
|
||||||
'--disable-setuid-sandbox',
|
// Sandbox settings (disable in Docker)
|
||||||
|
...(sandbox ? [] : ['--no-sandbox', '--disable-setuid-sandbox']),
|
||||||
|
|
||||||
|
// Docker-specific workarounds
|
||||||
'--disable-dev-shm-usage',
|
'--disable-dev-shm-usage',
|
||||||
'--disable-gpu',
|
'--disable-gpu',
|
||||||
'--disable-sync',
|
|
||||||
'--no-first-run',
|
// Window size
|
||||||
'--no-default-browser-check',
|
|
||||||
'--disable-default-apps',
|
|
||||||
'--disable-infobars',
|
|
||||||
'--disable-blink-features=AutomationControlled',
|
|
||||||
'--disable-component-update',
|
|
||||||
'--disable-domain-reliability',
|
|
||||||
'--disable-breakpad',
|
|
||||||
'--disable-background-networking',
|
|
||||||
'--disable-background-timer-throttling',
|
|
||||||
'--disable-backgrounding-occluded-windows',
|
|
||||||
'--disable-renderer-backgrounding',
|
|
||||||
'--disable-ipc-flooding-protection',
|
|
||||||
'--password-store=basic',
|
|
||||||
'--use-mock-keychain',
|
|
||||||
'--font-render-hinting=none',
|
|
||||||
'--force-color-profile=srgb',
|
|
||||||
`--window-size=${width},${height}`,
|
`--window-size=${width},${height}`,
|
||||||
|
|
||||||
|
// User data directory (for persistent sessions with persona)
|
||||||
|
...(userDataDir ? [`--user-data-dir=${userDataDir}`] : []),
|
||||||
|
|
||||||
|
// Headless mode
|
||||||
...(headless ? ['--headless=new'] : []),
|
...(headless ? ['--headless=new'] : []),
|
||||||
|
|
||||||
|
// SSL certificate checking
|
||||||
...(checkSsl ? [] : ['--ignore-certificate-errors']),
|
...(checkSsl ? [] : ['--ignore-certificate-errors']),
|
||||||
];
|
];
|
||||||
|
|
||||||
|
// Combine all args: base (from config) + dynamic (runtime) + extra (user overrides)
|
||||||
|
// Dynamic args come after base so they can override if needed
|
||||||
|
const chromiumArgs = [...baseArgs, ...dynamicArgs, ...extraArgs];
|
||||||
|
|
||||||
// Add extension loading flags
|
// Add extension loading flags
|
||||||
if (extensionPaths.length > 0) {
|
if (extensionPaths.length > 0) {
|
||||||
const extPathsArg = extensionPaths.join(',');
|
const extPathsArg = extensionPaths.join(',');
|
||||||
@@ -1231,6 +1318,7 @@ module.exports = {
|
|||||||
getEnv,
|
getEnv,
|
||||||
getEnvBool,
|
getEnvBool,
|
||||||
getEnvInt,
|
getEnvInt,
|
||||||
|
getEnvArray,
|
||||||
parseResolution,
|
parseResolution,
|
||||||
// PID file management
|
// PID file management
|
||||||
writePidWithMtime,
|
writePidWithMtime,
|
||||||
|
|||||||
@@ -42,7 +42,7 @@
|
|||||||
"CHROME_USER_DATA_DIR": {
|
"CHROME_USER_DATA_DIR": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": "",
|
"default": "",
|
||||||
"description": "Path to Chrome user data directory for persistent sessions"
|
"description": "Path to Chrome user data directory for persistent sessions (derived from ACTIVE_PERSONA if not set)"
|
||||||
},
|
},
|
||||||
"CHROME_USER_AGENT": {
|
"CHROME_USER_AGENT": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
@@ -53,16 +53,74 @@
|
|||||||
"CHROME_ARGS": {
|
"CHROME_ARGS": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {"type": "string"},
|
"items": {"type": "string"},
|
||||||
"default": [],
|
"default": [
|
||||||
|
"--no-first-run",
|
||||||
|
"--no-default-browser-check",
|
||||||
|
"--disable-default-apps",
|
||||||
|
"--disable-sync",
|
||||||
|
"--disable-infobars",
|
||||||
|
"--disable-blink-features=AutomationControlled",
|
||||||
|
"--disable-component-update",
|
||||||
|
"--disable-domain-reliability",
|
||||||
|
"--disable-breakpad",
|
||||||
|
"--disable-client-side-phishing-detection",
|
||||||
|
"--disable-hang-monitor",
|
||||||
|
"--disable-speech-synthesis-api",
|
||||||
|
"--disable-speech-api",
|
||||||
|
"--disable-print-preview",
|
||||||
|
"--disable-notifications",
|
||||||
|
"--disable-desktop-notifications",
|
||||||
|
"--disable-popup-blocking",
|
||||||
|
"--disable-prompt-on-repost",
|
||||||
|
"--disable-external-intent-requests",
|
||||||
|
"--disable-session-crashed-bubble",
|
||||||
|
"--disable-search-engine-choice-screen",
|
||||||
|
"--disable-datasaver-prompt",
|
||||||
|
"--ash-no-nudges",
|
||||||
|
"--hide-crash-restore-bubble",
|
||||||
|
"--suppress-message-center-popups",
|
||||||
|
"--noerrdialogs",
|
||||||
|
"--no-pings",
|
||||||
|
"--silent-debugger-extension-api",
|
||||||
|
"--deny-permission-prompts",
|
||||||
|
"--safebrowsing-disable-auto-update",
|
||||||
|
"--metrics-recording-only",
|
||||||
|
"--password-store=basic",
|
||||||
|
"--use-mock-keychain",
|
||||||
|
"--disable-cookie-encryption",
|
||||||
|
"--font-render-hinting=none",
|
||||||
|
"--force-color-profile=srgb",
|
||||||
|
"--disable-partial-raster",
|
||||||
|
"--disable-skia-runtime-opts",
|
||||||
|
"--disable-2d-canvas-clip-aa",
|
||||||
|
"--enable-webgl",
|
||||||
|
"--hide-scrollbars",
|
||||||
|
"--export-tagged-pdf",
|
||||||
|
"--generate-pdf-document-outline",
|
||||||
|
"--disable-lazy-loading",
|
||||||
|
"--disable-renderer-backgrounding",
|
||||||
|
"--disable-background-networking",
|
||||||
|
"--disable-background-timer-throttling",
|
||||||
|
"--disable-backgrounding-occluded-windows",
|
||||||
|
"--disable-ipc-flooding-protection",
|
||||||
|
"--disable-extensions-http-throttling",
|
||||||
|
"--disable-field-trial-config",
|
||||||
|
"--disable-back-forward-cache",
|
||||||
|
"--autoplay-policy=no-user-gesture-required",
|
||||||
|
"--disable-gesture-requirement-for-media-playback",
|
||||||
|
"--lang=en-US,en;q=0.9",
|
||||||
|
"--log-level=2",
|
||||||
|
"--enable-logging=stderr"
|
||||||
|
],
|
||||||
"x-aliases": ["CHROME_DEFAULT_ARGS"],
|
"x-aliases": ["CHROME_DEFAULT_ARGS"],
|
||||||
"description": "Default Chrome command-line arguments"
|
"description": "Default Chrome command-line arguments (static flags only, dynamic args like --user-data-dir are added at runtime)"
|
||||||
},
|
},
|
||||||
"CHROME_ARGS_EXTRA": {
|
"CHROME_ARGS_EXTRA": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {"type": "string"},
|
"items": {"type": "string"},
|
||||||
"default": [],
|
"default": [],
|
||||||
"x-aliases": ["CHROME_EXTRA_ARGS"],
|
"x-aliases": ["CHROME_EXTRA_ARGS"],
|
||||||
"description": "Extra arguments to append to Chrome command"
|
"description": "Extra arguments to append to Chrome command (for user customization)"
|
||||||
},
|
},
|
||||||
"CHROME_PAGELOAD_TIMEOUT": {
|
"CHROME_PAGELOAD_TIMEOUT": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
|
|||||||
@@ -117,6 +117,11 @@ async function main() {
|
|||||||
// Load installed extensions
|
// Load installed extensions
|
||||||
const extensionsDir = getEnv('CHROME_EXTENSIONS_DIR') ||
|
const extensionsDir = getEnv('CHROME_EXTENSIONS_DIR') ||
|
||||||
path.join(getEnv('DATA_DIR', '.'), 'personas', getEnv('ACTIVE_PERSONA', 'Default'), 'chrome_extensions');
|
path.join(getEnv('DATA_DIR', '.'), 'personas', getEnv('ACTIVE_PERSONA', 'Default'), 'chrome_extensions');
|
||||||
|
const userDataDir = getEnv('CHROME_USER_DATA_DIR');
|
||||||
|
|
||||||
|
if (userDataDir) {
|
||||||
|
console.error(`[*] Using user data dir: ${userDataDir}`);
|
||||||
|
}
|
||||||
|
|
||||||
const installedExtensions = [];
|
const installedExtensions = [];
|
||||||
const extensionPaths = [];
|
const extensionPaths = [];
|
||||||
@@ -150,9 +155,11 @@ async function main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Launch Chromium using consolidated function
|
// Launch Chromium using consolidated function
|
||||||
|
// userDataDir is derived from ACTIVE_PERSONA by get_config() if not explicitly set
|
||||||
const result = await launchChromium({
|
const result = await launchChromium({
|
||||||
binary,
|
binary,
|
||||||
outputDir: OUTPUT_DIR,
|
outputDir: OUTPUT_DIR,
|
||||||
|
userDataDir,
|
||||||
extensionPaths,
|
extensionPaths,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user