#!/usr/bin/env node /** * Navigate the Chrome browser to the target URL. * * This is a simple hook that ONLY navigates - nothing else. * Pre-load hooks (21-29) should set up their own CDP listeners. * Post-load hooks (31+) can then read from the loaded page. * * Usage: on_Snapshot__30_chrome_navigate.js --url= --snapshot-id= * Output: Writes page_loaded.txt marker when navigation completes * * Environment variables: * CHROME_PAGELOAD_TIMEOUT: Timeout in seconds (default: 60) * CHROME_DELAY_AFTER_LOAD: Extra delay after load in seconds (default: 0) * CHROME_WAIT_FOR: Wait condition (default: networkidle2) */ const fs = require('fs'); const path = require('path'); const puppeteer = require('puppeteer-core'); const EXTRACTOR_NAME = 'chrome_navigate'; const CHROME_SESSION_DIR = '../chrome_session'; const OUTPUT_DIR = '.'; function parseArgs() { const args = {}; process.argv.slice(2).forEach(arg => { if (arg.startsWith('--')) { const [key, ...valueParts] = arg.slice(2).split('='); args[key.replace(/-/g, '_')] = valueParts.join('=') || true; } }); return args; } function getEnv(name, defaultValue = '') { return (process.env[name] || defaultValue).trim(); } function getEnvInt(name, defaultValue = 0) { const val = parseInt(getEnv(name, String(defaultValue)), 10); return isNaN(val) ? defaultValue : val; } function getEnvFloat(name, defaultValue = 0) { const val = parseFloat(getEnv(name, String(defaultValue))); return isNaN(val) ? defaultValue : val; } function getCdpUrl() { const cdpFile = path.join(CHROME_SESSION_DIR, 'cdp_url.txt'); if (!fs.existsSync(cdpFile)) return null; return fs.readFileSync(cdpFile, 'utf8').trim(); } function getPageId() { const pageIdFile = path.join(CHROME_SESSION_DIR, 'page_id.txt'); if (!fs.existsSync(pageIdFile)) return null; return fs.readFileSync(pageIdFile, 'utf8').trim(); } function getWaitCondition() { const waitFor = getEnv('CHROME_WAIT_FOR', 'networkidle2').toLowerCase(); const valid = ['domcontentloaded', 'load', 'networkidle0', 'networkidle2']; return valid.includes(waitFor) ? waitFor : 'networkidle2'; } function sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } async function navigate(url, cdpUrl) { const timeout = (getEnvInt('CHROME_PAGELOAD_TIMEOUT') || getEnvInt('CHROME_TIMEOUT') || getEnvInt('TIMEOUT', 60)) * 1000; const delayAfterLoad = getEnvFloat('CHROME_DELAY_AFTER_LOAD', 0) * 1000; const waitUntil = getWaitCondition(); const pageId = getPageId(); let browser = null; try { browser = await puppeteer.connect({ browserWSEndpoint: cdpUrl }); const pages = await browser.pages(); if (pages.length === 0) { return { success: false, error: 'No pages found in browser' }; } // Find page by target ID if available let page = null; if (pageId) { page = pages.find(p => { const target = p.target(); return target && target._targetId === pageId; }); } if (!page) { page = pages[pages.length - 1]; } // Navigate console.log(`Navigating to ${url} (wait: ${waitUntil}, timeout: ${timeout}ms)`); const response = await page.goto(url, { waitUntil, timeout }); // Optional delay if (delayAfterLoad > 0) { console.log(`Waiting ${delayAfterLoad}ms after load...`); await sleep(delayAfterLoad); } const finalUrl = page.url(); const status = response ? response.status() : null; // Write marker file fs.writeFileSync(path.join(OUTPUT_DIR, 'page_loaded.txt'), new Date().toISOString()); fs.writeFileSync(path.join(OUTPUT_DIR, 'final_url.txt'), finalUrl); browser.disconnect(); return { success: true, finalUrl, status }; } catch (e) { if (browser) browser.disconnect(); return { success: false, error: `${e.name}: ${e.message}` }; } } async function main() { const args = parseArgs(); const url = args.url; const snapshotId = args.snapshot_id; if (!url || !snapshotId) { console.error('Usage: on_Snapshot__30_chrome_navigate.js --url= --snapshot-id='); process.exit(1); } const startTs = new Date(); let status = 'failed'; let output = null; let error = ''; const cdpUrl = getCdpUrl(); if (!cdpUrl) { console.error('ERROR: chrome_session not found'); process.exit(1); } const result = await navigate(url, cdpUrl); if (result.success) { status = 'succeeded'; output = OUTPUT_DIR; console.log(`Page loaded: ${result.finalUrl} (HTTP ${result.status})`); } else { error = result.error; } const endTs = new Date(); const duration = (endTs - startTs) / 1000; console.log(`START_TS=${startTs.toISOString()}`); console.log(`END_TS=${endTs.toISOString()}`); console.log(`DURATION=${duration.toFixed(2)}`); if (output) console.log(`OUTPUT=${output}`); console.log(`STATUS=${status}`); if (error) console.error(`ERROR=${error}`); console.log(`RESULT_JSON=${JSON.stringify({ extractor: EXTRACTOR_NAME, url, snapshot_id: snapshotId, status, start_ts: startTs.toISOString(), end_ts: endTs.toISOString(), duration: Math.round(duration * 100) / 100, output, error: error || null, })}`); process.exit(status === 'succeeded' ? 0 : 1); } main().catch(e => { console.error(`Fatal error: ${e.message}`); process.exit(1); });