#!/usr/bin/env node /** * Export cookies and open-tab storage from a Chromium profile or live CDP URL. * * Environment variables: * ARCHIVEBOX_ABX_PLUGINS_DIR Absolute path to abx_plugins/plugins * CHROME_USER_DATA_DIR Local Chromium user-data directory to launch * CHROME_CDP_URL Existing browser CDP URL to attach to * COOKIES_OUTPUT_FILE Optional output path for Netscape cookies.txt * AUTH_STORAGE_OUTPUT_FILE Optional output path for auth.json * CHROME_BINARY Optional browser binary override * NODE_MODULES_DIR Optional node_modules path for puppeteer-core */ const fs = require('fs'); const os = require('os'); const path = require('path'); const pluginsDir = process.env.ARCHIVEBOX_ABX_PLUGINS_DIR || process.env.ABX_PLUGINS_DIR; if (!pluginsDir) { console.error('ARCHIVEBOX_ABX_PLUGINS_DIR is required'); process.exit(1); } const baseUtils = require(path.join(pluginsDir, 'base', 'utils.js')); baseUtils.ensureNodeModuleResolution(module); const chromeUtils = require(path.join(pluginsDir, 'chrome', 'chrome_utils.js')); const puppeteer = require('puppeteer-core'); function cookieToNetscape(cookie) { let domain = cookie.domain; if (!domain.startsWith('.') && !cookie.hostOnly) { domain = '.' + domain; } const includeSubdomains = domain.startsWith('.') ? 'TRUE' : 'FALSE'; const cookiePath = cookie.path || '/'; const secure = cookie.secure ? 'TRUE' : 'FALSE'; const expiry = cookie.expires && cookie.expires > 0 ? Math.floor(cookie.expires).toString() : '0'; return `${domain}\t${includeSubdomains}\t${cookiePath}\t${secure}\t${expiry}\t${cookie.name}\t${cookie.value}`; } function writeCookiesFile(cookies, outputPath) { const lines = [ '# Netscape HTTP Cookie File', '# https://curl.se/docs/http-cookies.html', '# This file was generated by ArchiveBox persona cookie extraction', '#', '# Format: domain\\tincludeSubdomains\\tpath\\tsecure\\texpiry\\tname\\tvalue', '', ]; for (const cookie of cookies) { lines.push(cookieToNetscape(cookie)); } fs.mkdirSync(path.dirname(outputPath), { recursive: true }); fs.writeFileSync(outputPath, lines.join('\n') + '\n'); } async function collectStorage(browser) { const localStorage = {}; const sessionStorage = {}; const pages = await browser.pages(); for (const page of pages) { try { const url = page.url(); if (!url || url === 'about:blank') continue; if (url.startsWith('chrome:') || url.startsWith('edge:') || url.startsWith('devtools:')) continue; const payload = await page.evaluate(() => ({ origin: window.location.origin, localStorage: Object.fromEntries(Object.entries(window.localStorage)), sessionStorage: Object.fromEntries(Object.entries(window.sessionStorage)), })); if (!payload.origin || payload.origin === 'null') continue; if (Object.keys(payload.localStorage || {}).length > 0) { localStorage[payload.origin] = payload.localStorage; } if (Object.keys(payload.sessionStorage || {}).length > 0) { sessionStorage[payload.origin] = payload.sessionStorage; } } catch (error) { // Ignore pages that cannot be inspected via evaluate(). } } return { localStorage, sessionStorage }; } async function openBrowser() { const cdpUrl = process.env.CHROME_CDP_URL || ''; if (cdpUrl) { const browser = await chromeUtils.connectToBrowserEndpoint(puppeteer, cdpUrl, { defaultViewport: null }); return { browser, async cleanup() { try { await browser.disconnect(); } catch (error) {} }, sourceDescription: cdpUrl, }; } const userDataDir = process.env.CHROME_USER_DATA_DIR; if (!userDataDir) { throw new Error('Either CHROME_USER_DATA_DIR or CHROME_CDP_URL is required'); } if (!fs.existsSync(userDataDir)) { throw new Error(`User data directory does not exist: ${userDataDir}`); } const outputDir = fs.mkdtempSync(path.join(os.tmpdir(), 'abx-browser-state-')); const binary = process.env.CHROME_BINARY || chromeUtils.findAnyChromiumBinary(); if (!binary) { throw new Error('Could not find a Chromium binary for browser state export'); } const launched = await chromeUtils.launchChromium({ binary, outputDir, userDataDir, headless: true, killZombies: false, }); if (!launched.success) { throw new Error(launched.error || 'Chrome launch failed'); } const browser = await chromeUtils.connectToBrowserEndpoint(puppeteer, launched.cdpUrl, { defaultViewport: null }); return { browser, async cleanup() { try { await browser.disconnect(); } catch (error) {} try { await chromeUtils.killChrome(launched.pid, outputDir); } catch (error) {} try { fs.rmSync(outputDir, { recursive: true, force: true }); } catch (error) {} }, sourceDescription: userDataDir, }; } async function main() { const cookiesOutput = process.env.COOKIES_OUTPUT_FILE || ''; const authOutput = process.env.AUTH_STORAGE_OUTPUT_FILE || ''; if (!cookiesOutput && !authOutput) { throw new Error('COOKIES_OUTPUT_FILE or AUTH_STORAGE_OUTPUT_FILE is required'); } const { browser, cleanup, sourceDescription } = await openBrowser(); try { const session = await browser.target().createCDPSession(); const browserVersion = await session.send('Browser.getVersion'); const cookieResult = await session.send('Storage.getCookies'); const cookies = cookieResult?.cookies || []; const { localStorage, sessionStorage } = await collectStorage(browser); const userAgent = browserVersion?.userAgent || ''; if (cookiesOutput) { writeCookiesFile(cookies, cookiesOutput); } if (authOutput) { fs.mkdirSync(path.dirname(authOutput), { recursive: true }); fs.writeFileSync( authOutput, JSON.stringify( { TYPE: 'auth', SOURCE: sourceDescription, captured_at: new Date().toISOString(), user_agent: userAgent, cookies, localStorage, sessionStorage, }, null, 2, ) + '\n', ); } console.error( `[+] Exported ${cookies.length} cookies` + `${authOutput ? ` and ${Object.keys(localStorage).length + Object.keys(sessionStorage).length} storage origins` : ''}` + `${userAgent ? ' with browser USER_AGENT' : ''}` + ` from ${sourceDescription}`, ); } finally { await cleanup(); } } main().catch((error) => { console.error(`ERROR: ${error.message}`); process.exit(1); });