Files
ArchiveBox/archivebox/personas/export_browser_state.js
2026-03-22 20:25:18 -07:00

211 lines
7.2 KiB
JavaScript

#!/usr/bin/env node
/**
* Export cookies and open-tab storage from a Chromium profile or live CDP URL.
*
* Environment variables:
* ARCHIVEBOX_ABX_PLUGINS_DIR Absolute path to abx_plugins/plugins
* CHROME_USER_DATA_DIR Local Chromium user-data directory to launch
* CHROME_CDP_URL Existing browser CDP URL to attach to
* COOKIES_OUTPUT_FILE Optional output path for Netscape cookies.txt
* AUTH_STORAGE_OUTPUT_FILE Optional output path for auth.json
* CHROME_BINARY Optional browser binary override
* NODE_MODULES_DIR Optional node_modules path for puppeteer-core
*/
const fs = require('fs');
const os = require('os');
const path = require('path');
const pluginsDir = process.env.ARCHIVEBOX_ABX_PLUGINS_DIR || process.env.ABX_PLUGINS_DIR;
if (!pluginsDir) {
console.error('ARCHIVEBOX_ABX_PLUGINS_DIR is required');
process.exit(1);
}
const baseUtils = require(path.join(pluginsDir, 'base', 'utils.js'));
baseUtils.ensureNodeModuleResolution(module);
const chromeUtils = require(path.join(pluginsDir, 'chrome', 'chrome_utils.js'));
const puppeteer = require('puppeteer-core');
function cookieToNetscape(cookie) {
let domain = cookie.domain;
if (!domain.startsWith('.') && !cookie.hostOnly) {
domain = '.' + domain;
}
const includeSubdomains = domain.startsWith('.') ? 'TRUE' : 'FALSE';
const cookiePath = cookie.path || '/';
const secure = cookie.secure ? 'TRUE' : 'FALSE';
const expiry = cookie.expires && cookie.expires > 0 ? Math.floor(cookie.expires).toString() : '0';
return `${domain}\t${includeSubdomains}\t${cookiePath}\t${secure}\t${expiry}\t${cookie.name}\t${cookie.value}`;
}
function writeCookiesFile(cookies, outputPath) {
const lines = [
'# Netscape HTTP Cookie File',
'# https://curl.se/docs/http-cookies.html',
'# This file was generated by ArchiveBox persona cookie extraction',
'#',
'# Format: domain\\tincludeSubdomains\\tpath\\tsecure\\texpiry\\tname\\tvalue',
'',
];
for (const cookie of cookies) {
lines.push(cookieToNetscape(cookie));
}
fs.mkdirSync(path.dirname(outputPath), { recursive: true });
fs.writeFileSync(outputPath, lines.join('\n') + '\n');
}
async function collectStorage(browser) {
const localStorage = {};
const sessionStorage = {};
const pages = await browser.pages();
for (const page of pages) {
try {
const url = page.url();
if (!url || url === 'about:blank') continue;
if (url.startsWith('chrome:') || url.startsWith('edge:') || url.startsWith('devtools:')) continue;
const payload = await page.evaluate(() => ({
origin: window.location.origin,
localStorage: Object.fromEntries(Object.entries(window.localStorage)),
sessionStorage: Object.fromEntries(Object.entries(window.sessionStorage)),
}));
if (!payload.origin || payload.origin === 'null') continue;
if (Object.keys(payload.localStorage || {}).length > 0) {
localStorage[payload.origin] = payload.localStorage;
}
if (Object.keys(payload.sessionStorage || {}).length > 0) {
sessionStorage[payload.origin] = payload.sessionStorage;
}
} catch (error) {
// Ignore pages that cannot be inspected via evaluate().
}
}
return { localStorage, sessionStorage };
}
async function openBrowser() {
const cdpUrl = process.env.CHROME_CDP_URL || '';
if (cdpUrl) {
const browser = await chromeUtils.connectToBrowserEndpoint(puppeteer, cdpUrl, { defaultViewport: null });
return {
browser,
async cleanup() {
try {
await browser.disconnect();
} catch (error) {}
},
sourceDescription: cdpUrl,
};
}
const userDataDir = process.env.CHROME_USER_DATA_DIR;
if (!userDataDir) {
throw new Error('Either CHROME_USER_DATA_DIR or CHROME_CDP_URL is required');
}
if (!fs.existsSync(userDataDir)) {
throw new Error(`User data directory does not exist: ${userDataDir}`);
}
const outputDir = fs.mkdtempSync(path.join(os.tmpdir(), 'abx-browser-state-'));
const binary = process.env.CHROME_BINARY || chromeUtils.findAnyChromiumBinary();
if (!binary) {
throw new Error('Could not find a Chromium binary for browser state export');
}
const launched = await chromeUtils.launchChromium({
binary,
outputDir,
userDataDir,
headless: true,
killZombies: false,
});
if (!launched.success) {
throw new Error(launched.error || 'Chrome launch failed');
}
const browser = await chromeUtils.connectToBrowserEndpoint(puppeteer, launched.cdpUrl, { defaultViewport: null });
return {
browser,
async cleanup() {
try {
await browser.disconnect();
} catch (error) {}
try {
await chromeUtils.killChrome(launched.pid, outputDir);
} catch (error) {}
try {
fs.rmSync(outputDir, { recursive: true, force: true });
} catch (error) {}
},
sourceDescription: userDataDir,
};
}
async function main() {
const cookiesOutput = process.env.COOKIES_OUTPUT_FILE || '';
const authOutput = process.env.AUTH_STORAGE_OUTPUT_FILE || '';
if (!cookiesOutput && !authOutput) {
throw new Error('COOKIES_OUTPUT_FILE or AUTH_STORAGE_OUTPUT_FILE is required');
}
const { browser, cleanup, sourceDescription } = await openBrowser();
try {
const session = await browser.target().createCDPSession();
const browserVersion = await session.send('Browser.getVersion');
const cookieResult = await session.send('Storage.getCookies');
const cookies = cookieResult?.cookies || [];
const { localStorage, sessionStorage } = await collectStorage(browser);
const userAgent = browserVersion?.userAgent || '';
if (cookiesOutput) {
writeCookiesFile(cookies, cookiesOutput);
}
if (authOutput) {
fs.mkdirSync(path.dirname(authOutput), { recursive: true });
fs.writeFileSync(
authOutput,
JSON.stringify(
{
TYPE: 'auth',
SOURCE: sourceDescription,
captured_at: new Date().toISOString(),
user_agent: userAgent,
cookies,
localStorage,
sessionStorage,
},
null,
2,
) + '\n',
);
}
console.error(
`[+] Exported ${cookies.length} cookies` +
`${authOutput ? ` and ${Object.keys(localStorage).length + Object.keys(sessionStorage).length} storage origins` : ''}` +
`${userAgent ? ' with browser USER_AGENT' : ''}` +
` from ${sourceDescription}`,
);
} finally {
await cleanup();
}
}
main().catch((error) => {
console.error(`ERROR: ${error.message}`);
process.exit(1);
});