mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
fix extensions dir paths add personas migration
This commit is contained in:
11
CLAUDE.md
11
CLAUDE.md
@@ -27,6 +27,17 @@ uv sync --dev --all-extras # Always use uv, never pip directly
|
||||
source .venv/bin/activate
|
||||
```
|
||||
|
||||
### Generate and Apply Migrations
|
||||
```bash
|
||||
# Generate migrations (run from archivebox subdirectory)
|
||||
cd archivebox
|
||||
./manage.py makemigrations
|
||||
|
||||
# Apply migrations to test database
|
||||
cd data/
|
||||
archivebox init
|
||||
```
|
||||
|
||||
## Running Tests
|
||||
|
||||
### CRITICAL: Never Run as Root
|
||||
|
||||
@@ -10,8 +10,8 @@ import archivebox.base_models.models
|
||||
|
||||
def cleanup_extra_columns(apps, schema_editor):
|
||||
"""
|
||||
Remove extra columns that were needed for v0.7.2/v0.8.6rc0 migration but don't exist in final models.
|
||||
The actual models use @property methods to access these values from the process FK.
|
||||
Create Process records from old cmd/pwd/cmd_version columns and remove those columns.
|
||||
This preserves the execution details by moving them to the Process model.
|
||||
"""
|
||||
with schema_editor.connection.cursor() as cursor:
|
||||
# Check if cmd column exists (means we came from v0.7.2/v0.8.6rc0)
|
||||
@@ -19,8 +19,41 @@ def cleanup_extra_columns(apps, schema_editor):
|
||||
has_cmd = cursor.fetchone()[0] > 0
|
||||
|
||||
if has_cmd:
|
||||
print(" Cleaning up temporary columns from core_archiveresult...")
|
||||
# Rebuild table without the extra columns
|
||||
print(" Migrating cmd/pwd/cmd_version data to Process records...")
|
||||
|
||||
# For each ArchiveResult, create a Process record with cmd/pwd data
|
||||
# Note: cmd_version from old schema is not preserved (it's now derived from Binary)
|
||||
cursor.execute("""
|
||||
SELECT id, cmd, pwd, binary_id, iface_id, start_ts, end_ts, status
|
||||
FROM core_archiveresult
|
||||
""")
|
||||
archive_results = cursor.fetchall()
|
||||
|
||||
from archivebox.uuid_compat import uuid7
|
||||
from archivebox.base_models.models import get_or_create_system_user_pk
|
||||
|
||||
machine_id = cursor.execute("SELECT id FROM machine_machine LIMIT 1").fetchone()[0]
|
||||
|
||||
for ar_id, cmd, pwd, binary_id, iface_id, start_ts, end_ts, status in archive_results:
|
||||
# Create Process record
|
||||
process_id = str(uuid7())
|
||||
cursor.execute("""
|
||||
INSERT INTO machine_process (
|
||||
id, created_at, modified_at,
|
||||
machine_id, binary_id, iface_id,
|
||||
pwd, cmd, env, timeout,
|
||||
pid, exit_code, stdout, stderr,
|
||||
started_at, ended_at, url, status, retry_at
|
||||
) VALUES (?, datetime('now'), datetime('now'), ?, ?, ?, ?, ?, '{}', 120, NULL, NULL, '', '', ?, ?, '', ?, NULL)
|
||||
""", (process_id, machine_id, binary_id, iface_id, pwd or '', cmd or '[]', start_ts, end_ts, status or 'queued'))
|
||||
|
||||
# Update ArchiveResult to point to new Process
|
||||
cursor.execute("UPDATE core_archiveresult SET process_id = ? WHERE id = ?", (process_id, ar_id))
|
||||
|
||||
print(f" ✓ Created {len(archive_results)} Process records from ArchiveResult data")
|
||||
|
||||
# Now rebuild table without the extra columns
|
||||
print(" Rebuilding core_archiveresult table...")
|
||||
cursor.execute("""
|
||||
CREATE TABLE core_archiveresult_final (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
@@ -48,14 +81,14 @@ def cleanup_extra_columns(apps, schema_editor):
|
||||
num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
|
||||
num_uses_failed INTEGER NOT NULL DEFAULT 0,
|
||||
|
||||
process_id TEXT,
|
||||
process_id TEXT NOT NULL,
|
||||
|
||||
FOREIGN KEY (snapshot_id) REFERENCES core_snapshot(id) ON DELETE CASCADE,
|
||||
FOREIGN KEY (process_id) REFERENCES machine_process(id) ON DELETE RESTRICT
|
||||
)
|
||||
""")
|
||||
|
||||
# Copy data (cmd, pwd, etc. are now accessed via process FK)
|
||||
# Copy data (cmd, pwd, etc. are now in Process records)
|
||||
cursor.execute("""
|
||||
INSERT INTO core_archiveresult_final SELECT
|
||||
id, uuid, created_at, modified_at,
|
||||
|
||||
@@ -0,0 +1,108 @@
|
||||
# Generated by Django 6.0 on 2025-12-31 09:04
|
||||
|
||||
import django.db.models.deletion
|
||||
import django.utils.timezone
|
||||
import uuid
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0026_final_field_adjustments'),
|
||||
('crawls', '0002_upgrade_to_0_9_0'),
|
||||
('machine', '0001_initial'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='hook_name',
|
||||
field=models.CharField(blank=True, db_index=True, default='', help_text='Full filename of the hook that executed (e.g., on_Snapshot__50_wget.py)', max_length=255),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='id',
|
||||
field=models.AutoField(editable=False, primary_key=True, serialize=False),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='output_files',
|
||||
field=models.JSONField(default=dict, help_text='Dict of {relative_path: {metadata}}'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='output_json',
|
||||
field=models.JSONField(blank=True, default=None, help_text='Structured metadata (headers, redirects, etc.)', null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='output_mimetypes',
|
||||
field=models.CharField(blank=True, default='', help_text='CSV of mimetypes sorted by size', max_length=512),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='output_size',
|
||||
field=models.BigIntegerField(default=0, help_text='Total bytes of all output files'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='output_str',
|
||||
field=models.TextField(blank=True, default='', help_text='Human-readable output summary'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='plugin',
|
||||
field=models.CharField(db_index=True, default='', max_length=32),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='process',
|
||||
field=models.OneToOneField(help_text='Process execution details for this archive result', on_delete=django.db.models.deletion.PROTECT, related_name='archiveresult', to='machine.process'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='retry_at',
|
||||
field=models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='status',
|
||||
field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('backoff', 'Waiting to retry'), ('succeeded', 'Succeeded'), ('failed', 'Failed'), ('skipped', 'Skipped')], db_index=True, default='queued', max_length=15),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='uuid',
|
||||
field=models.UUIDField(blank=True, db_index=True, default=uuid.uuid7, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='config',
|
||||
field=models.JSONField(default=dict),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='crawl',
|
||||
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to='crawls.crawl'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='current_step',
|
||||
field=models.PositiveSmallIntegerField(db_index=True, default=0, help_text='Current hook step being executed (0-9). Used for sequential hook execution.'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='depth',
|
||||
field=models.PositiveSmallIntegerField(db_index=True, default=0),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshot',
|
||||
name='id',
|
||||
field=models.UUIDField(default=uuid.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='snapshottag',
|
||||
name='id',
|
||||
field=models.AutoField(primary_key=True, serialize=False),
|
||||
),
|
||||
]
|
||||
@@ -2321,7 +2321,7 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
|
||||
process = models.OneToOneField(
|
||||
'machine.Process',
|
||||
on_delete=models.PROTECT,
|
||||
null=False, # Required after migration 4
|
||||
null=False,
|
||||
related_name='archiveresult',
|
||||
help_text='Process execution details for this archive result'
|
||||
)
|
||||
|
||||
@@ -144,7 +144,7 @@ class BinaryAdmin(BaseModelAdmin):
|
||||
|
||||
|
||||
class ProcessAdmin(BaseModelAdmin):
|
||||
list_display = ('id', 'created_at', 'machine_info', 'archiveresult_link', 'cmd_str', 'status', 'exit_code', 'pid', 'binary_info', 'health')
|
||||
list_display = ('id', 'created_at', 'machine_info', 'archiveresult_link', 'cmd_str', 'status', 'exit_code', 'pid', 'binary_info')
|
||||
sort_fields = ('id', 'created_at', 'status', 'exit_code', 'pid')
|
||||
search_fields = ('id', 'machine__id', 'binary__name', 'cmd', 'pwd', 'stdout', 'stderr')
|
||||
|
||||
@@ -171,10 +171,6 @@ class ProcessAdmin(BaseModelAdmin):
|
||||
'fields': ('stdout', 'stderr'),
|
||||
'classes': ('card', 'wide', 'collapse'),
|
||||
}),
|
||||
('Usage', {
|
||||
'fields': ('num_uses_succeeded', 'num_uses_failed'),
|
||||
'classes': ('card',),
|
||||
}),
|
||||
('Timestamps', {
|
||||
'fields': ('created_at', 'modified_at'),
|
||||
'classes': ('card',),
|
||||
|
||||
@@ -234,8 +234,6 @@ class Migration(migrations.Migration):
|
||||
('id', models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True)),
|
||||
('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
|
||||
('modified_at', models.DateTimeField(auto_now=True)),
|
||||
('num_uses_succeeded', models.PositiveIntegerField(default=0)),
|
||||
('num_uses_failed', models.PositiveIntegerField(default=0)),
|
||||
('pwd', models.CharField(blank=True, default='', help_text='Working directory for process execution', max_length=512)),
|
||||
('cmd', models.JSONField(blank=True, default=list, help_text='Command as array of arguments')),
|
||||
('env', models.JSONField(blank=True, default=dict, help_text='Environment variables for process')),
|
||||
|
||||
@@ -625,7 +625,7 @@ class ProcessManager(models.Manager):
|
||||
return process
|
||||
|
||||
|
||||
class Process(ModelWithHealthStats):
|
||||
class Process(models.Model):
|
||||
"""
|
||||
Tracks a single OS process execution.
|
||||
|
||||
|
||||
29
archivebox/personas/migrations/0001_initial.py
Normal file
29
archivebox/personas/migrations/0001_initial.py
Normal file
@@ -0,0 +1,29 @@
|
||||
# Generated by Django 6.0 on 2025-12-31 09:06
|
||||
|
||||
import archivebox.base_models.models
|
||||
import django.db.models.deletion
|
||||
import django.utils.timezone
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = [
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='Persona',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('config', models.JSONField(blank=True, default=dict, null=True)),
|
||||
('name', models.CharField(max_length=64, unique=True)),
|
||||
('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
|
||||
('created_by', models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
|
||||
],
|
||||
),
|
||||
]
|
||||
@@ -203,86 +203,115 @@ function waitForDebugPort(port, timeout = 30000) {
|
||||
|
||||
/**
|
||||
* Kill zombie Chrome processes from stale crawls.
|
||||
* Scans DATA_DIR/crawls/<crawl_id>/chrome/<name>.pid for stale processes.
|
||||
* Recursively scans DATA_DIR for any */chrome/*.pid files from stale crawls.
|
||||
* Does not assume specific directory structure - works with nested paths.
|
||||
* @param {string} [dataDir] - Data directory (defaults to DATA_DIR env or '.')
|
||||
* @returns {number} - Number of zombies killed
|
||||
*/
|
||||
function killZombieChrome(dataDir = null) {
|
||||
dataDir = dataDir || getEnv('DATA_DIR', '.');
|
||||
const crawlsDir = path.join(dataDir, 'crawls');
|
||||
const now = Date.now();
|
||||
const fiveMinutesAgo = now - 300000;
|
||||
let killed = 0;
|
||||
|
||||
console.error('[*] Checking for zombie Chrome processes...');
|
||||
|
||||
if (!fs.existsSync(crawlsDir)) {
|
||||
console.error('[+] No crawls directory found');
|
||||
if (!fs.existsSync(dataDir)) {
|
||||
console.error('[+] No data directory found');
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively find all chrome/.pid files in directory tree
|
||||
* @param {string} dir - Directory to search
|
||||
* @param {number} depth - Current recursion depth (limit to 10)
|
||||
* @returns {Array<{pidFile: string, crawlDir: string}>} - Array of PID file info
|
||||
*/
|
||||
function findChromePidFiles(dir, depth = 0) {
|
||||
if (depth > 10) return []; // Prevent infinite recursion
|
||||
|
||||
const results = [];
|
||||
try {
|
||||
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
||||
|
||||
for (const entry of entries) {
|
||||
if (!entry.isDirectory()) continue;
|
||||
|
||||
const fullPath = path.join(dir, entry.name);
|
||||
|
||||
// Found a chrome directory - check for .pid files
|
||||
if (entry.name === 'chrome') {
|
||||
try {
|
||||
const pidFiles = fs.readdirSync(fullPath).filter(f => f.endsWith('.pid'));
|
||||
const crawlDir = dir; // Parent of chrome/ is the crawl dir
|
||||
|
||||
for (const pidFileName of pidFiles) {
|
||||
results.push({
|
||||
pidFile: path.join(fullPath, pidFileName),
|
||||
crawlDir: crawlDir,
|
||||
});
|
||||
}
|
||||
} catch (e) {
|
||||
// Skip if can't read chrome dir
|
||||
}
|
||||
} else {
|
||||
// Recurse into subdirectory (skip hidden dirs and node_modules)
|
||||
if (!entry.name.startsWith('.') && entry.name !== 'node_modules') {
|
||||
results.push(...findChromePidFiles(fullPath, depth + 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
// Skip if can't read directory
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
try {
|
||||
const crawls = fs.readdirSync(crawlsDir, { withFileTypes: true });
|
||||
|
||||
for (const crawl of crawls) {
|
||||
if (!crawl.isDirectory()) continue;
|
||||
|
||||
const crawlDir = path.join(crawlsDir, crawl.name);
|
||||
const chromeDir = path.join(crawlDir, 'chrome');
|
||||
|
||||
if (!fs.existsSync(chromeDir)) continue;
|
||||
const chromePids = findChromePidFiles(dataDir);
|
||||
|
||||
for (const {pidFile, crawlDir} of chromePids) {
|
||||
// Check if crawl was modified recently (still active)
|
||||
try {
|
||||
const crawlStats = fs.statSync(crawlDir);
|
||||
if (crawlStats.mtimeMs > fiveMinutesAgo) {
|
||||
continue;
|
||||
continue; // Crawl is active, skip
|
||||
}
|
||||
} catch (e) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Crawl is stale, check for PIDs
|
||||
// Crawl is stale, check PID
|
||||
try {
|
||||
const pidFiles = fs.readdirSync(chromeDir).filter(f => f.endsWith('.pid'));
|
||||
const pid = parseInt(fs.readFileSync(pidFile, 'utf8').trim(), 10);
|
||||
if (isNaN(pid) || pid <= 0) continue;
|
||||
|
||||
for (const pidFileName of pidFiles) {
|
||||
const pidFile = path.join(chromeDir, pidFileName);
|
||||
// Check if process exists
|
||||
try {
|
||||
process.kill(pid, 0);
|
||||
} catch (e) {
|
||||
// Process dead, remove stale PID file
|
||||
try { fs.unlinkSync(pidFile); } catch (e) {}
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
const pid = parseInt(fs.readFileSync(pidFile, 'utf8').trim(), 10);
|
||||
if (isNaN(pid) || pid <= 0) continue;
|
||||
// Process alive and crawl is stale - zombie!
|
||||
console.error(`[!] Found zombie (PID ${pid}) from stale crawl ${path.basename(crawlDir)}`);
|
||||
|
||||
// Check if process exists
|
||||
try {
|
||||
process.kill(pid, 0);
|
||||
} catch (e) {
|
||||
// Process dead, remove stale PID file
|
||||
try { fs.unlinkSync(pidFile); } catch (e) {}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Process alive and crawl is stale - zombie!
|
||||
console.error(`[!] Found zombie (PID ${pid}) from stale crawl ${crawl.name}`);
|
||||
|
||||
try {
|
||||
try { process.kill(-pid, 'SIGKILL'); } catch (e) { process.kill(pid, 'SIGKILL'); }
|
||||
killed++;
|
||||
console.error(`[+] Killed zombie (PID ${pid})`);
|
||||
try { fs.unlinkSync(pidFile); } catch (e) {}
|
||||
} catch (e) {
|
||||
console.error(`[!] Failed to kill PID ${pid}: ${e.message}`);
|
||||
}
|
||||
} catch (e) {
|
||||
// Skip invalid PID files
|
||||
}
|
||||
try {
|
||||
try { process.kill(-pid, 'SIGKILL'); } catch (e) { process.kill(pid, 'SIGKILL'); }
|
||||
killed++;
|
||||
console.error(`[+] Killed zombie (PID ${pid})`);
|
||||
try { fs.unlinkSync(pidFile); } catch (e) {}
|
||||
} catch (e) {
|
||||
console.error(`[!] Failed to kill PID ${pid}: ${e.message}`);
|
||||
}
|
||||
} catch (e) {
|
||||
// Skip if can't read chrome dir
|
||||
// Skip invalid PID files
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.error(`[!] Error scanning crawls: ${e.message}`);
|
||||
console.error(`[!] Error scanning for Chrome processes: ${e.message}`);
|
||||
}
|
||||
|
||||
if (killed > 0) {
|
||||
@@ -1327,7 +1356,7 @@ function findChromium() {
|
||||
* @returns {string} - Absolute path to extensions directory
|
||||
*/
|
||||
function getExtensionsDir() {
|
||||
const dataDir = getEnv('DATA_DIR', './data');
|
||||
const dataDir = getEnv('DATA_DIR', '.');
|
||||
const persona = getEnv('ACTIVE_PERSONA', 'Default');
|
||||
return getEnv('CHROME_EXTENSIONS_DIR') ||
|
||||
path.join(dataDir, 'personas', persona, 'chrome_extensions');
|
||||
@@ -1459,7 +1488,7 @@ async function installExtensionWithCache(extension, options = {}) {
|
||||
|
||||
const installedExt = await loadOrInstallExtension(extension, extensionsDir);
|
||||
|
||||
if (!installedExt) {
|
||||
if (!installedExt?.version) {
|
||||
console.error(`[❌] Failed to install ${extension.name} extension`);
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -214,12 +214,15 @@ def get_extensions_dir() -> str:
|
||||
|
||||
Tries chrome_utils.js first, falls back to Python computation.
|
||||
"""
|
||||
returncode, stdout, stderr = _call_chrome_utils('getExtensionsDir')
|
||||
if returncode == 0 and stdout.strip():
|
||||
return stdout.strip()
|
||||
try:
|
||||
returncode, stdout, stderr = _call_chrome_utils('getExtensionsDir')
|
||||
if returncode == 0 and stdout.strip():
|
||||
return stdout.strip()
|
||||
except subprocess.TimeoutExpired:
|
||||
pass # Fall through to default computation
|
||||
|
||||
# Fallback to default computation if JS call fails
|
||||
data_dir = os.environ.get('DATA_DIR', './data')
|
||||
data_dir = os.environ.get('DATA_DIR', '.')
|
||||
persona = os.environ.get('ACTIVE_PERSONA', 'Default')
|
||||
return str(Path(data_dir) / 'personas' / persona / 'chrome_extensions')
|
||||
|
||||
@@ -760,31 +763,39 @@ def setup_chrome_session(
|
||||
# Create tab
|
||||
tab_env = env.copy()
|
||||
tab_env['CRAWL_OUTPUT_DIR'] = str(crawl_dir)
|
||||
result = subprocess.run(
|
||||
['node', str(CHROME_TAB_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}', f'--crawl-id={crawl_id}'],
|
||||
cwd=str(snapshot_chrome_dir),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60,
|
||||
env=tab_env
|
||||
)
|
||||
if result.returncode != 0:
|
||||
cleanup_chrome(chrome_launch_process, chrome_pid)
|
||||
raise RuntimeError(f"Tab creation failed: {result.stderr}")
|
||||
|
||||
# Navigate to URL if requested
|
||||
if navigate and CHROME_NAVIGATE_HOOK and test_url != 'about:blank':
|
||||
try:
|
||||
result = subprocess.run(
|
||||
['node', str(CHROME_NAVIGATE_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
|
||||
['node', str(CHROME_TAB_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}', f'--crawl-id={crawl_id}'],
|
||||
cwd=str(snapshot_chrome_dir),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120,
|
||||
env=env
|
||||
timeout=60,
|
||||
env=tab_env
|
||||
)
|
||||
if result.returncode != 0:
|
||||
cleanup_chrome(chrome_launch_process, chrome_pid)
|
||||
raise RuntimeError(f"Navigation failed: {result.stderr}")
|
||||
raise RuntimeError(f"Tab creation failed: {result.stderr}")
|
||||
except subprocess.TimeoutExpired:
|
||||
cleanup_chrome(chrome_launch_process, chrome_pid)
|
||||
raise RuntimeError("Tab creation timed out after 60s")
|
||||
|
||||
# Navigate to URL if requested
|
||||
if navigate and CHROME_NAVIGATE_HOOK and test_url != 'about:blank':
|
||||
try:
|
||||
result = subprocess.run(
|
||||
['node', str(CHROME_NAVIGATE_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
|
||||
cwd=str(snapshot_chrome_dir),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120,
|
||||
env=env
|
||||
)
|
||||
if result.returncode != 0:
|
||||
cleanup_chrome(chrome_launch_process, chrome_pid)
|
||||
raise RuntimeError(f"Navigation failed: {result.stderr}")
|
||||
except subprocess.TimeoutExpired:
|
||||
cleanup_chrome(chrome_launch_process, chrome_pid)
|
||||
raise RuntimeError("Navigation timed out after 120s")
|
||||
|
||||
return chrome_launch_process, chrome_pid, snapshot_chrome_dir
|
||||
|
||||
|
||||
Reference in New Issue
Block a user