mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
211 lines
7.2 KiB
JavaScript
211 lines
7.2 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* Export cookies and open-tab storage from a Chromium profile or live CDP URL.
|
|
*
|
|
* Environment variables:
|
|
* ARCHIVEBOX_ABX_PLUGINS_DIR Absolute path to abx_plugins/plugins
|
|
* CHROME_USER_DATA_DIR Local Chromium user-data directory to launch
|
|
* CHROME_CDP_URL Existing browser CDP URL to attach to
|
|
* COOKIES_OUTPUT_FILE Optional output path for Netscape cookies.txt
|
|
* AUTH_STORAGE_OUTPUT_FILE Optional output path for auth.json
|
|
* CHROME_BINARY Optional browser binary override
|
|
* NODE_MODULES_DIR Optional node_modules path for puppeteer-core
|
|
*/
|
|
|
|
const fs = require('fs');
|
|
const os = require('os');
|
|
const path = require('path');
|
|
|
|
const pluginsDir = process.env.ARCHIVEBOX_ABX_PLUGINS_DIR || process.env.ABX_PLUGINS_DIR;
|
|
if (!pluginsDir) {
|
|
console.error('ARCHIVEBOX_ABX_PLUGINS_DIR is required');
|
|
process.exit(1);
|
|
}
|
|
|
|
const baseUtils = require(path.join(pluginsDir, 'base', 'utils.js'));
|
|
baseUtils.ensureNodeModuleResolution(module);
|
|
|
|
const chromeUtils = require(path.join(pluginsDir, 'chrome', 'chrome_utils.js'));
|
|
const puppeteer = require('puppeteer-core');
|
|
|
|
function cookieToNetscape(cookie) {
|
|
let domain = cookie.domain;
|
|
if (!domain.startsWith('.') && !cookie.hostOnly) {
|
|
domain = '.' + domain;
|
|
}
|
|
|
|
const includeSubdomains = domain.startsWith('.') ? 'TRUE' : 'FALSE';
|
|
const cookiePath = cookie.path || '/';
|
|
const secure = cookie.secure ? 'TRUE' : 'FALSE';
|
|
const expiry = cookie.expires && cookie.expires > 0 ? Math.floor(cookie.expires).toString() : '0';
|
|
|
|
return `${domain}\t${includeSubdomains}\t${cookiePath}\t${secure}\t${expiry}\t${cookie.name}\t${cookie.value}`;
|
|
}
|
|
|
|
function writeCookiesFile(cookies, outputPath) {
|
|
const lines = [
|
|
'# Netscape HTTP Cookie File',
|
|
'# https://curl.se/docs/http-cookies.html',
|
|
'# This file was generated by ArchiveBox persona cookie extraction',
|
|
'#',
|
|
'# Format: domain\\tincludeSubdomains\\tpath\\tsecure\\texpiry\\tname\\tvalue',
|
|
'',
|
|
];
|
|
|
|
for (const cookie of cookies) {
|
|
lines.push(cookieToNetscape(cookie));
|
|
}
|
|
|
|
fs.mkdirSync(path.dirname(outputPath), { recursive: true });
|
|
fs.writeFileSync(outputPath, lines.join('\n') + '\n');
|
|
}
|
|
|
|
async function collectStorage(browser) {
|
|
const localStorage = {};
|
|
const sessionStorage = {};
|
|
const pages = await browser.pages();
|
|
|
|
for (const page of pages) {
|
|
try {
|
|
const url = page.url();
|
|
if (!url || url === 'about:blank') continue;
|
|
if (url.startsWith('chrome:') || url.startsWith('edge:') || url.startsWith('devtools:')) continue;
|
|
|
|
const payload = await page.evaluate(() => ({
|
|
origin: window.location.origin,
|
|
localStorage: Object.fromEntries(Object.entries(window.localStorage)),
|
|
sessionStorage: Object.fromEntries(Object.entries(window.sessionStorage)),
|
|
}));
|
|
|
|
if (!payload.origin || payload.origin === 'null') continue;
|
|
if (Object.keys(payload.localStorage || {}).length > 0) {
|
|
localStorage[payload.origin] = payload.localStorage;
|
|
}
|
|
if (Object.keys(payload.sessionStorage || {}).length > 0) {
|
|
sessionStorage[payload.origin] = payload.sessionStorage;
|
|
}
|
|
} catch (error) {
|
|
// Ignore pages that cannot be inspected via evaluate().
|
|
}
|
|
}
|
|
|
|
return { localStorage, sessionStorage };
|
|
}
|
|
|
|
async function openBrowser() {
|
|
const cdpUrl = process.env.CHROME_CDP_URL || '';
|
|
if (cdpUrl) {
|
|
const browser = await chromeUtils.connectToBrowserEndpoint(puppeteer, cdpUrl, { defaultViewport: null });
|
|
return {
|
|
browser,
|
|
async cleanup() {
|
|
try {
|
|
await browser.disconnect();
|
|
} catch (error) {}
|
|
},
|
|
sourceDescription: cdpUrl,
|
|
};
|
|
}
|
|
|
|
const userDataDir = process.env.CHROME_USER_DATA_DIR;
|
|
if (!userDataDir) {
|
|
throw new Error('Either CHROME_USER_DATA_DIR or CHROME_CDP_URL is required');
|
|
}
|
|
if (!fs.existsSync(userDataDir)) {
|
|
throw new Error(`User data directory does not exist: ${userDataDir}`);
|
|
}
|
|
|
|
const outputDir = fs.mkdtempSync(path.join(os.tmpdir(), 'abx-browser-state-'));
|
|
const binary = process.env.CHROME_BINARY || chromeUtils.findAnyChromiumBinary();
|
|
if (!binary) {
|
|
throw new Error('Could not find a Chromium binary for browser state export');
|
|
}
|
|
|
|
const launched = await chromeUtils.launchChromium({
|
|
binary,
|
|
outputDir,
|
|
userDataDir,
|
|
headless: true,
|
|
killZombies: false,
|
|
});
|
|
|
|
if (!launched.success) {
|
|
throw new Error(launched.error || 'Chrome launch failed');
|
|
}
|
|
|
|
const browser = await chromeUtils.connectToBrowserEndpoint(puppeteer, launched.cdpUrl, { defaultViewport: null });
|
|
|
|
return {
|
|
browser,
|
|
async cleanup() {
|
|
try {
|
|
await browser.disconnect();
|
|
} catch (error) {}
|
|
try {
|
|
await chromeUtils.killChrome(launched.pid, outputDir);
|
|
} catch (error) {}
|
|
try {
|
|
fs.rmSync(outputDir, { recursive: true, force: true });
|
|
} catch (error) {}
|
|
},
|
|
sourceDescription: userDataDir,
|
|
};
|
|
}
|
|
|
|
async function main() {
|
|
const cookiesOutput = process.env.COOKIES_OUTPUT_FILE || '';
|
|
const authOutput = process.env.AUTH_STORAGE_OUTPUT_FILE || '';
|
|
if (!cookiesOutput && !authOutput) {
|
|
throw new Error('COOKIES_OUTPUT_FILE or AUTH_STORAGE_OUTPUT_FILE is required');
|
|
}
|
|
|
|
const { browser, cleanup, sourceDescription } = await openBrowser();
|
|
|
|
try {
|
|
const session = await browser.target().createCDPSession();
|
|
const browserVersion = await session.send('Browser.getVersion');
|
|
const cookieResult = await session.send('Storage.getCookies');
|
|
const cookies = cookieResult?.cookies || [];
|
|
const { localStorage, sessionStorage } = await collectStorage(browser);
|
|
const userAgent = browserVersion?.userAgent || '';
|
|
|
|
if (cookiesOutput) {
|
|
writeCookiesFile(cookies, cookiesOutput);
|
|
}
|
|
|
|
if (authOutput) {
|
|
fs.mkdirSync(path.dirname(authOutput), { recursive: true });
|
|
fs.writeFileSync(
|
|
authOutput,
|
|
JSON.stringify(
|
|
{
|
|
TYPE: 'auth',
|
|
SOURCE: sourceDescription,
|
|
captured_at: new Date().toISOString(),
|
|
user_agent: userAgent,
|
|
cookies,
|
|
localStorage,
|
|
sessionStorage,
|
|
},
|
|
null,
|
|
2,
|
|
) + '\n',
|
|
);
|
|
}
|
|
|
|
console.error(
|
|
`[+] Exported ${cookies.length} cookies` +
|
|
`${authOutput ? ` and ${Object.keys(localStorage).length + Object.keys(sessionStorage).length} storage origins` : ''}` +
|
|
`${userAgent ? ' with browser USER_AGENT' : ''}` +
|
|
` from ${sourceDescription}`,
|
|
);
|
|
} finally {
|
|
await cleanup();
|
|
}
|
|
}
|
|
|
|
main().catch((error) => {
|
|
console.error(`ERROR: ${error.message}`);
|
|
process.exit(1);
|
|
});
|