centralize chrome pid and zombie logic in chrome_utils

This commit is contained in:
Nick Sweeting
2025-12-29 17:57:23 -08:00
parent 4ba3e8d120
commit b670612685
8 changed files with 75 additions and 213 deletions

View File

@@ -45,6 +45,17 @@ function getEnvBool(name, defaultValue = false) {
return defaultValue;
}
/**
* Get integer environment variable.
* @param {string} name - Environment variable name
* @param {number} [defaultValue=0] - Default value if not set
* @returns {number} - Integer value
*/
function getEnvInt(name, defaultValue = 0) {
const val = parseInt(getEnv(name, String(defaultValue)), 10);
return isNaN(val) ? defaultValue : val;
}
/**
* Parse resolution string into width/height.
* @param {string} resolution - Resolution string like "1440,2000"
@@ -1004,6 +1015,7 @@ module.exports = {
// Environment helpers
getEnv,
getEnvBool,
getEnvInt,
parseResolution,
// PID file management
writePidWithMtime,

View File

@@ -25,12 +25,18 @@
const fs = require('fs');
const path = require('path');
const { spawn } = require('child_process');
const http = require('http');
// Add NODE_MODULES_DIR to module resolution paths if set
if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
const puppeteer = require('puppeteer-core');
const { findChromium } = require('./chrome_utils.js');
const {
findChromium,
getEnv,
getEnvBool,
parseResolution,
findFreePort,
waitForDebugPort,
} = require('./chrome_utils.js');
// Extractor metadata
const PLUGIN_NAME = 'chrome_tab';
@@ -50,18 +56,6 @@ function parseArgs() {
return args;
}
// Get environment variable with default
function getEnv(name, defaultValue = '') {
return (process.env[name] || defaultValue).trim();
}
function getEnvBool(name, defaultValue = false) {
const val = getEnv(name, '').toLowerCase();
if (['true', '1', 'yes', 'on'].includes(val)) return true;
if (['false', '0', 'no', 'off'].includes(val)) return false;
return defaultValue;
}
// Cleanup handler for SIGTERM - close this snapshot's tab
async function cleanup() {
try {
@@ -91,63 +85,6 @@ async function cleanup() {
process.on('SIGTERM', cleanup);
process.on('SIGINT', cleanup);
// Parse resolution string
function parseResolution(resolution) {
const [width, height] = resolution.split(',').map(x => parseInt(x.trim(), 10));
return { width: width || 1440, height: height || 2000 };
}
// Find a free port
function findFreePort() {
return new Promise((resolve, reject) => {
const server = require('net').createServer();
server.unref();
server.on('error', reject);
server.listen(0, () => {
const port = server.address().port;
server.close(() => resolve(port));
});
});
}
// Wait for Chrome's DevTools port to be ready
function waitForDebugPort(port, timeout = 30000) {
const startTime = Date.now();
return new Promise((resolve, reject) => {
const tryConnect = () => {
if (Date.now() - startTime > timeout) {
reject(new Error(`Timeout waiting for Chrome debug port ${port}`));
return;
}
const req = http.get(`http://127.0.0.1:${port}/json/version`, (res) => {
let data = '';
res.on('data', chunk => data += chunk);
res.on('end', () => {
try {
const info = JSON.parse(data);
resolve(info);
} catch (e) {
setTimeout(tryConnect, 100);
}
});
});
req.on('error', () => {
setTimeout(tryConnect, 100);
});
req.setTimeout(1000, () => {
req.destroy();
setTimeout(tryConnect, 100);
});
};
tryConnect();
});
}
// Try to find the crawl's Chrome session
function findCrawlChromeSession(crawlId) {
if (!crawlId) return null;

View File

@@ -95,7 +95,7 @@ def find_chromium_binary():
@pytest.fixture(scope="session", autouse=True)
def ensure_chromium_and_puppeteer_installed():
"""Ensure Chromium and puppeteer are installed before running tests."""
from abx_pkg import Binary, NpmProvider
from abx_pkg import Binary, NpmProvider, BinProviderOverrides
# Rebuild pydantic models
NpmProvider.model_rebuild()