#!/usr/bin/env node /** * Chrome Extension Management Utilities * * Handles downloading, installing, and managing Chrome extensions for browser automation. * Ported from the TypeScript implementation in archivebox.ts */ const fs = require('fs'); const path = require('path'); const crypto = require('crypto'); const { exec } = require('child_process'); const { promisify } = require('util'); const { Readable } = require('stream'); const { finished } = require('stream/promises'); const execAsync = promisify(exec); // Try to import unzipper, fallback to system unzip if not available let unzip = null; try { const unzipper = require('unzipper'); unzip = async (sourcePath, destPath) => { const stream = fs.createReadStream(sourcePath).pipe(unzipper.Extract({ path: destPath })); return stream.promise(); }; } catch (err) { // Will use system unzip command as fallback } /** * Compute the extension ID from the unpacked path. * Chrome uses a SHA256 hash of the unpacked extension directory path to compute a dynamic id. * * @param {string} unpacked_path - Path to the unpacked extension directory * @returns {string} - 32-character extension ID */ function getExtensionId(unpacked_path) { // Chrome uses a SHA256 hash of the unpacked extension directory path const hash = crypto.createHash('sha256'); hash.update(Buffer.from(unpacked_path, 'utf-8')); // Convert first 32 hex chars to characters in the range 'a'-'p' const detected_extension_id = Array.from(hash.digest('hex')) .slice(0, 32) .map(i => String.fromCharCode(parseInt(i, 16) + 'a'.charCodeAt(0))) .join(''); return detected_extension_id; } /** * Download and install a Chrome extension from the Chrome Web Store. * * @param {Object} extension - Extension metadata object * @param {string} extension.webstore_id - Chrome Web Store extension ID * @param {string} extension.name - Human-readable extension name * @param {string} extension.crx_url - URL to download the CRX file * @param {string} extension.crx_path - Local path to save the CRX file * @param {string} extension.unpacked_path - Path to extract the extension * @returns {Promise} - True if installation succeeded */ async function installExtension(extension) { const manifest_path = path.join(extension.unpacked_path, 'manifest.json'); // Download CRX file if not already downloaded if (!fs.existsSync(manifest_path) && !fs.existsSync(extension.crx_path)) { console.log(`[🛠️] Downloading missing extension ${extension.name} ${extension.webstore_id} -> ${extension.crx_path}`); try { // Ensure parent directory exists const crxDir = path.dirname(extension.crx_path); if (!fs.existsSync(crxDir)) { fs.mkdirSync(crxDir, { recursive: true }); } // Download CRX file from Chrome Web Store const response = await fetch(extension.crx_url); if (!response.ok) { console.warn(`[⚠️] Failed to download extension ${extension.name}: HTTP ${response.status}`); return false; } if (response.body) { const crx_file = fs.createWriteStream(extension.crx_path); const crx_stream = Readable.fromWeb(response.body); await finished(crx_stream.pipe(crx_file)); } else { console.warn(`[⚠️] Failed to download extension ${extension.name}: No response body`); return false; } } catch (err) { console.error(`[❌] Failed to download extension ${extension.name}:`, err); return false; } } // Unzip CRX file to unpacked_path await fs.promises.mkdir(extension.unpacked_path, { recursive: true }); try { // Try system unzip command first await execAsync(`/usr/bin/unzip -o ${extension.crx_path} -d ${extension.unpacked_path}`); } catch (err1) { if (unzip) { // Fallback to unzipper library try { await unzip(extension.crx_path, extension.unpacked_path); } catch (err2) { console.error(`[❌] Failed to unzip ${extension.crx_path}:`, err1.message); return false; } } else { console.error(`[❌] Failed to unzip ${extension.crx_path}:`, err1.message); return false; } } if (!fs.existsSync(manifest_path)) { console.error(`[❌] Failed to install ${extension.crx_path}: could not find manifest.json in unpacked_path`); return false; } return true; } /** * Load or install a Chrome extension, computing all metadata. * * @param {Object} ext - Partial extension metadata (at minimum: webstore_id or unpacked_path) * @param {string} [ext.webstore_id] - Chrome Web Store extension ID * @param {string} [ext.name] - Human-readable extension name * @param {string} [ext.unpacked_path] - Path to unpacked extension * @param {string} [extensions_dir] - Directory to store extensions * @returns {Promise} - Complete extension metadata object */ async function loadOrInstallExtension(ext, extensions_dir = null) { if (!(ext.webstore_id || ext.unpacked_path)) { throw new Error('Extension must have either {webstore_id} or {unpacked_path}'); } // Determine extensions directory const EXTENSIONS_DIR = extensions_dir || process.env.CHROME_EXTENSIONS_DIR || './data/chrome_extensions'; // Set statically computable extension metadata ext.webstore_id = ext.webstore_id || ext.id; ext.name = ext.name || ext.webstore_id; ext.webstore_url = ext.webstore_url || `https://chromewebstore.google.com/detail/${ext.webstore_id}`; ext.crx_url = ext.crx_url || `https://clients2.google.com/service/update2/crx?response=redirect&prodversion=1230&acceptformat=crx3&x=id%3D${ext.webstore_id}%26uc`; ext.crx_path = ext.crx_path || path.join(EXTENSIONS_DIR, `${ext.webstore_id}__${ext.name}.crx`); ext.unpacked_path = ext.unpacked_path || path.join(EXTENSIONS_DIR, `${ext.webstore_id}__${ext.name}`); const manifest_path = path.join(ext.unpacked_path, 'manifest.json'); ext.read_manifest = () => JSON.parse(fs.readFileSync(manifest_path, 'utf-8')); ext.read_version = () => fs.existsSync(manifest_path) && ext.read_manifest()?.version || null; // If extension is not installed, download and unpack it if (!ext.read_version()) { await installExtension(ext); } // Autodetect ID from filesystem path (unpacked extensions don't have stable IDs) ext.id = getExtensionId(ext.unpacked_path); ext.version = ext.read_version(); if (!ext.version) { console.warn(`[❌] Unable to detect ID and version of installed extension ${ext.unpacked_path}`); } else { console.log(`[➕] Installed extension ${ext.name} (${ext.version})... ${ext.unpacked_path}`); } return ext; } /** * Check if a Puppeteer target is an extension background page/service worker. * * @param {Object} target - Puppeteer target object * @returns {Promise} - Object with target_is_bg, extension_id, manifest_version, etc. */ async function isTargetExtension(target) { let target_type; let target_ctx; let target_url; try { target_type = target.type(); target_ctx = (await target.worker()) || (await target.page()) || null; target_url = target.url() || target_ctx?.url() || null; } catch (err) { if (String(err).includes('No target with given id found')) { // Target closed during check, ignore harmless race condition target_type = 'closed'; target_ctx = null; target_url = 'about:closed'; } else { throw err; } } // Check if this is an extension background page or service worker const is_chrome_extension = target_url?.startsWith('chrome-extension://'); const is_background_page = target_type === 'background_page'; const is_service_worker = target_type === 'service_worker'; const target_is_bg = is_chrome_extension && (is_background_page || is_service_worker); let extension_id = null; let manifest_version = null; const target_is_extension = is_chrome_extension || target_is_bg; if (target_is_extension) { try { extension_id = target_url?.split('://')[1]?.split('/')[0] || null; if (target_ctx) { const manifest = await target_ctx.evaluate(() => chrome.runtime.getManifest()); manifest_version = manifest?.manifest_version || null; } } catch (err) { // Failed to get extension metadata } } return { target_is_extension, target_is_bg, target_type, target_ctx, target_url, extension_id, manifest_version, }; } /** * Load extension metadata and connection handlers from a browser target. * * @param {Array} extensions - Array of extension metadata objects to update * @param {Object} target - Puppeteer target object * @returns {Promise} - Updated extension object or null if not an extension */ async function loadExtensionFromTarget(extensions, target) { const { target_is_bg, target_is_extension, target_type, target_ctx, target_url, extension_id, manifest_version, } = await isTargetExtension(target); if (!(target_is_bg && extension_id && target_ctx)) { return null; } // Find matching extension in our list const extension = extensions.find(ext => ext.id === extension_id); if (!extension) { console.warn(`[⚠️] Found loaded extension ${extension_id} that's not in CHROME_EXTENSIONS list`); return null; } // Load manifest from the extension context let manifest = null; try { manifest = await target_ctx.evaluate(() => chrome.runtime.getManifest()); } catch (err) { console.error(`[❌] Failed to read manifest for extension ${extension_id}:`, err); return null; } // Create dispatch methods for communicating with the extension const new_extension = { ...extension, target, target_type, target_url, manifest, manifest_version, // Trigger extension toolbar button click dispatchAction: async (tab) => { return await target_ctx.evaluate((tabId) => { return new Promise((resolve) => { chrome.action.onClicked.addListener((tab) => { resolve({ success: true, tab }); }); chrome.action.openPopup(); }); }, tab?.id || null); }, // Send message to extension dispatchMessage: async (message, options = {}) => { return await target_ctx.evaluate((msg, opts) => { return new Promise((resolve) => { chrome.runtime.sendMessage(msg, opts, (response) => { resolve(response); }); }); }, message, options); }, // Trigger extension command (keyboard shortcut) dispatchCommand: async (command) => { return await target_ctx.evaluate((cmd) => { return new Promise((resolve) => { chrome.commands.onCommand.addListener((receivedCommand) => { if (receivedCommand === cmd) { resolve({ success: true, command: receivedCommand }); } }); // Note: Actually triggering commands programmatically is not directly supported // This would need to be done via CDP or keyboard simulation }); }, command); }, }; // Update the extension in the array Object.assign(extension, new_extension); console.log(`[🔌] Connected to extension ${extension.name} (${extension.version})`); return new_extension; } /** * Install all extensions in the list if not already installed. * * @param {Array} extensions - Array of extension metadata objects * @param {string} [extensions_dir] - Directory to store extensions * @returns {Promise} - Array of installed extension objects */ async function installAllExtensions(extensions, extensions_dir = null) { console.log(`[⚙️] Installing ${extensions.length} chrome extensions...`); for (const extension of extensions) { await loadOrInstallExtension(extension, extensions_dir); } return extensions; } /** * Load and connect to all extensions from a running browser. * * @param {Object} browser - Puppeteer browser instance * @param {Array} extensions - Array of extension metadata objects * @returns {Promise} - Array of loaded extension objects with connection handlers */ async function loadAllExtensionsFromBrowser(browser, extensions) { console.log(`[⚙️] Loading ${extensions.length} chrome extensions from browser...`); // Find loaded extensions at runtime by examining browser targets for (const target of browser.targets()) { await loadExtensionFromTarget(extensions, target); } return extensions; } /** * Load extension manifest.json file * * @param {string} unpacked_path - Path to unpacked extension directory * @returns {object|null} - Parsed manifest object or null if not found/invalid */ function loadExtensionManifest(unpacked_path) { const manifest_path = path.join(unpacked_path, 'manifest.json'); if (!fs.existsSync(manifest_path)) { return null; } try { const manifest_content = fs.readFileSync(manifest_path, 'utf-8'); return JSON.parse(manifest_content); } catch (error) { // Invalid JSON or read error return null; } } /** * Generate Chrome launch arguments for loading extensions. * * @param {Array} extensions - Array of extension metadata objects * @returns {Array} - Chrome CLI arguments for loading extensions */ function getExtensionLaunchArgs(extensions) { if (!extensions || extensions.length === 0) { return []; } // Filter out extensions without unpacked_path first const validExtensions = extensions.filter(ext => ext.unpacked_path); const unpacked_paths = validExtensions.map(ext => ext.unpacked_path); const webstore_ids = validExtensions.map(ext => ext.webstore_id || ext.id); return [ `--load-extension=${unpacked_paths.join(',')}`, `--allowlisted-extension-id=${webstore_ids.join(',')}`, '--allow-legacy-extension-manifests', '--disable-extensions-auto-update', ]; } // Export all functions module.exports = { getExtensionId, loadExtensionManifest, installExtension, loadOrInstallExtension, isTargetExtension, loadExtensionFromTarget, installAllExtensions, loadAllExtensionsFromBrowser, getExtensionLaunchArgs, }; // CLI usage if (require.main === module) { const args = process.argv.slice(2); if (args.length === 0) { console.log('Usage: chrome_extension_utils.js [args...]'); console.log(''); console.log('Commands:'); console.log(' getExtensionId '); console.log(' loadExtensionManifest '); console.log(' getExtensionLaunchArgs '); console.log(' loadOrInstallExtension [extensions_dir]'); process.exit(1); } const [command, ...commandArgs] = args; (async () => { try { switch (command) { case 'getExtensionId': { const [unpacked_path] = commandArgs; const id = getExtensionId(unpacked_path); console.log(id); break; } case 'loadExtensionManifest': { const [unpacked_path] = commandArgs; const manifest = loadExtensionManifest(unpacked_path); console.log(JSON.stringify(manifest)); break; } case 'getExtensionLaunchArgs': { const [extensions_json] = commandArgs; const extensions = JSON.parse(extensions_json); const args = getExtensionLaunchArgs(extensions); console.log(JSON.stringify(args)); break; } case 'loadOrInstallExtension': { const [webstore_id, name, extensions_dir] = commandArgs; const ext = await loadOrInstallExtension({ webstore_id, name }, extensions_dir); console.log(JSON.stringify(ext, null, 2)); break; } default: console.error(`Unknown command: ${command}`); process.exit(1); } } catch (error) { console.error(`Error: ${error.message}`); process.exit(1); } })(); }