Files
ArchiveBox/archivebox/plugins/chrome/extract_cookies.js
Claude 73425fa984 Add persona CLI command with browser cookie import
- Add `archivebox persona create/list/update/delete` commands
- Support `--import=chrome|firefox|brave` to copy browser profile
- Extract cookies via CDP to generate cookies.txt for non-browser tools
- Fix JSDoc comment parsing issue in chrome_utils.js
2025-12-31 12:13:07 +00:00

255 lines
8.3 KiB
JavaScript

#!/usr/bin/env node
/**
* Extract cookies from Chrome via CDP and write to Netscape cookies.txt format.
*
* This script launches Chrome with a given user data directory, connects via CDP,
* extracts all cookies, and writes them to a cookies.txt file in Netscape format.
*
* Usage:
* CHROME_USER_DATA_DIR=/path/to/profile COOKIES_OUTPUT_FILE=/path/to/cookies.txt node extract_cookies.js
*
* Environment variables:
* CHROME_USER_DATA_DIR: Path to Chrome user data directory (required)
* COOKIES_OUTPUT_FILE: Path to output cookies.txt file (required)
* CHROME_HEADLESS: Run in headless mode (default: true)
* NODE_MODULES_DIR: Path to node_modules for module resolution
*/
// Add NODE_MODULES_DIR to module resolution paths if set
if (process.env.NODE_MODULES_DIR) {
module.paths.unshift(process.env.NODE_MODULES_DIR);
}
const fs = require('fs');
const path = require('path');
const {
findChromium,
launchChromium,
killChrome,
getEnv,
} = require('./chrome_utils.js');
/**
* Convert a cookie object to Netscape cookies.txt format line.
*
* Format: domain includeSubdomains path secure expiry name value
*
* @param {Object} cookie - CDP cookie object
* @returns {string} - Netscape format cookie line
*/
function cookieToNetscape(cookie) {
// Domain: prefix with . for domain cookies (not host-only)
let domain = cookie.domain;
if (!domain.startsWith('.') && !cookie.hostOnly) {
domain = '.' + domain;
}
// Include subdomains: TRUE if domain cookie (starts with .)
const includeSubdomains = domain.startsWith('.') ? 'TRUE' : 'FALSE';
// Path
const cookiePath = cookie.path || '/';
// Secure flag
const secure = cookie.secure ? 'TRUE' : 'FALSE';
// Expiry timestamp (0 for session cookies)
let expiry = '0';
if (cookie.expires && cookie.expires > 0) {
// CDP returns expiry in seconds since epoch
expiry = Math.floor(cookie.expires).toString();
}
// Name and value
const name = cookie.name;
const value = cookie.value;
return `${domain}\t${includeSubdomains}\t${cookiePath}\t${secure}\t${expiry}\t${name}\t${value}`;
}
/**
* Write cookies to Netscape cookies.txt format file.
*
* @param {Array} cookies - Array of CDP cookie objects
* @param {string} outputPath - Path to output file
*/
function writeCookiesFile(cookies, outputPath) {
const lines = [
'# Netscape HTTP Cookie File',
'# https://curl.se/docs/http-cookies.html',
'# This file was generated by ArchiveBox persona cookie extraction',
'#',
'# Format: domain\\tincludeSubdomains\\tpath\\tsecure\\texpiry\\tname\\tvalue',
'',
];
for (const cookie of cookies) {
lines.push(cookieToNetscape(cookie));
}
fs.writeFileSync(outputPath, lines.join('\n') + '\n');
}
async function main() {
const userDataDir = getEnv('CHROME_USER_DATA_DIR');
const outputFile = getEnv('COOKIES_OUTPUT_FILE');
if (!userDataDir) {
console.error('ERROR: CHROME_USER_DATA_DIR environment variable is required');
process.exit(1);
}
if (!outputFile) {
console.error('ERROR: COOKIES_OUTPUT_FILE environment variable is required');
process.exit(1);
}
if (!fs.existsSync(userDataDir)) {
console.error(`ERROR: User data directory does not exist: ${userDataDir}`);
process.exit(1);
}
const binary = findChromium();
if (!binary) {
console.error('ERROR: Chromium binary not found');
process.exit(1);
}
console.error(`[*] Extracting cookies from: ${userDataDir}`);
console.error(`[*] Output file: ${outputFile}`);
console.error(`[*] Using browser: ${binary}`);
// Create a temporary output directory for Chrome files
const outputDir = fs.mkdtempSync(path.join(require('os').tmpdir(), 'chrome-cookies-'));
let chromePid = null;
try {
// Launch Chrome with the user data directory
const result = await launchChromium({
binary,
outputDir,
userDataDir,
headless: true,
killZombies: false, // Don't kill other Chrome instances
});
if (!result.success) {
console.error(`ERROR: Failed to launch Chrome: ${result.error}`);
process.exit(1);
}
chromePid = result.pid;
const cdpUrl = result.cdpUrl;
const port = result.port;
console.error(`[*] Chrome launched (PID: ${chromePid})`);
console.error(`[*] CDP URL: ${cdpUrl}`);
// Connect to CDP and get cookies
const http = require('http');
// Use CDP directly via HTTP to get all cookies
const getCookies = () => {
return new Promise((resolve, reject) => {
const req = http.request(
{
hostname: '127.0.0.1',
port: port,
path: '/json/list',
method: 'GET',
},
(res) => {
let data = '';
res.on('data', (chunk) => (data += chunk));
res.on('end', () => {
try {
const targets = JSON.parse(data);
// Find a page target
const pageTarget = targets.find(t => t.type === 'page') || targets[0];
if (!pageTarget) {
reject(new Error('No page target found'));
return;
}
// Connect via WebSocket and send CDP command
const WebSocket = require('ws');
const ws = new WebSocket(pageTarget.webSocketDebuggerUrl);
ws.on('open', () => {
ws.send(JSON.stringify({
id: 1,
method: 'Network.getAllCookies',
}));
});
ws.on('message', (message) => {
const response = JSON.parse(message);
if (response.id === 1) {
ws.close();
if (response.result && response.result.cookies) {
resolve(response.result.cookies);
} else {
reject(new Error('Failed to get cookies: ' + JSON.stringify(response)));
}
}
});
ws.on('error', (err) => {
reject(err);
});
} catch (e) {
reject(e);
}
});
}
);
req.on('error', reject);
req.end();
});
};
// Wait a moment for the browser to fully initialize
await new Promise(r => setTimeout(r, 2000));
console.error('[*] Fetching cookies via CDP...');
const cookies = await getCookies();
console.error(`[+] Retrieved ${cookies.length} cookies`);
// Write cookies to file
writeCookiesFile(cookies, outputFile);
console.error(`[+] Wrote cookies to: ${outputFile}`);
// Clean up
await killChrome(chromePid, outputDir);
chromePid = null;
// Remove temp directory
fs.rmSync(outputDir, { recursive: true, force: true });
console.error('[+] Cookie extraction complete');
process.exit(0);
} catch (error) {
console.error(`ERROR: ${error.message}`);
// Clean up on error
if (chromePid) {
await killChrome(chromePid, outputDir);
}
try {
fs.rmSync(outputDir, { recursive: true, force: true });
} catch (e) {}
process.exit(1);
}
}
main().catch((e) => {
console.error(`Fatal error: ${e.message}`);
process.exit(1);
});