mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-01-03 01:15:57 +10:00
- Add `archivebox persona create/list/update/delete` commands - Support `--import=chrome|firefox|brave` to copy browser profile - Extract cookies via CDP to generate cookies.txt for non-browser tools - Fix JSDoc comment parsing issue in chrome_utils.js
255 lines
8.3 KiB
JavaScript
255 lines
8.3 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* Extract cookies from Chrome via CDP and write to Netscape cookies.txt format.
|
|
*
|
|
* This script launches Chrome with a given user data directory, connects via CDP,
|
|
* extracts all cookies, and writes them to a cookies.txt file in Netscape format.
|
|
*
|
|
* Usage:
|
|
* CHROME_USER_DATA_DIR=/path/to/profile COOKIES_OUTPUT_FILE=/path/to/cookies.txt node extract_cookies.js
|
|
*
|
|
* Environment variables:
|
|
* CHROME_USER_DATA_DIR: Path to Chrome user data directory (required)
|
|
* COOKIES_OUTPUT_FILE: Path to output cookies.txt file (required)
|
|
* CHROME_HEADLESS: Run in headless mode (default: true)
|
|
* NODE_MODULES_DIR: Path to node_modules for module resolution
|
|
*/
|
|
|
|
// Add NODE_MODULES_DIR to module resolution paths if set
|
|
if (process.env.NODE_MODULES_DIR) {
|
|
module.paths.unshift(process.env.NODE_MODULES_DIR);
|
|
}
|
|
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
const {
|
|
findChromium,
|
|
launchChromium,
|
|
killChrome,
|
|
getEnv,
|
|
} = require('./chrome_utils.js');
|
|
|
|
/**
|
|
* Convert a cookie object to Netscape cookies.txt format line.
|
|
*
|
|
* Format: domain includeSubdomains path secure expiry name value
|
|
*
|
|
* @param {Object} cookie - CDP cookie object
|
|
* @returns {string} - Netscape format cookie line
|
|
*/
|
|
function cookieToNetscape(cookie) {
|
|
// Domain: prefix with . for domain cookies (not host-only)
|
|
let domain = cookie.domain;
|
|
if (!domain.startsWith('.') && !cookie.hostOnly) {
|
|
domain = '.' + domain;
|
|
}
|
|
|
|
// Include subdomains: TRUE if domain cookie (starts with .)
|
|
const includeSubdomains = domain.startsWith('.') ? 'TRUE' : 'FALSE';
|
|
|
|
// Path
|
|
const cookiePath = cookie.path || '/';
|
|
|
|
// Secure flag
|
|
const secure = cookie.secure ? 'TRUE' : 'FALSE';
|
|
|
|
// Expiry timestamp (0 for session cookies)
|
|
let expiry = '0';
|
|
if (cookie.expires && cookie.expires > 0) {
|
|
// CDP returns expiry in seconds since epoch
|
|
expiry = Math.floor(cookie.expires).toString();
|
|
}
|
|
|
|
// Name and value
|
|
const name = cookie.name;
|
|
const value = cookie.value;
|
|
|
|
return `${domain}\t${includeSubdomains}\t${cookiePath}\t${secure}\t${expiry}\t${name}\t${value}`;
|
|
}
|
|
|
|
/**
|
|
* Write cookies to Netscape cookies.txt format file.
|
|
*
|
|
* @param {Array} cookies - Array of CDP cookie objects
|
|
* @param {string} outputPath - Path to output file
|
|
*/
|
|
function writeCookiesFile(cookies, outputPath) {
|
|
const lines = [
|
|
'# Netscape HTTP Cookie File',
|
|
'# https://curl.se/docs/http-cookies.html',
|
|
'# This file was generated by ArchiveBox persona cookie extraction',
|
|
'#',
|
|
'# Format: domain\\tincludeSubdomains\\tpath\\tsecure\\texpiry\\tname\\tvalue',
|
|
'',
|
|
];
|
|
|
|
for (const cookie of cookies) {
|
|
lines.push(cookieToNetscape(cookie));
|
|
}
|
|
|
|
fs.writeFileSync(outputPath, lines.join('\n') + '\n');
|
|
}
|
|
|
|
async function main() {
|
|
const userDataDir = getEnv('CHROME_USER_DATA_DIR');
|
|
const outputFile = getEnv('COOKIES_OUTPUT_FILE');
|
|
|
|
if (!userDataDir) {
|
|
console.error('ERROR: CHROME_USER_DATA_DIR environment variable is required');
|
|
process.exit(1);
|
|
}
|
|
|
|
if (!outputFile) {
|
|
console.error('ERROR: COOKIES_OUTPUT_FILE environment variable is required');
|
|
process.exit(1);
|
|
}
|
|
|
|
if (!fs.existsSync(userDataDir)) {
|
|
console.error(`ERROR: User data directory does not exist: ${userDataDir}`);
|
|
process.exit(1);
|
|
}
|
|
|
|
const binary = findChromium();
|
|
if (!binary) {
|
|
console.error('ERROR: Chromium binary not found');
|
|
process.exit(1);
|
|
}
|
|
|
|
console.error(`[*] Extracting cookies from: ${userDataDir}`);
|
|
console.error(`[*] Output file: ${outputFile}`);
|
|
console.error(`[*] Using browser: ${binary}`);
|
|
|
|
// Create a temporary output directory for Chrome files
|
|
const outputDir = fs.mkdtempSync(path.join(require('os').tmpdir(), 'chrome-cookies-'));
|
|
|
|
let chromePid = null;
|
|
|
|
try {
|
|
// Launch Chrome with the user data directory
|
|
const result = await launchChromium({
|
|
binary,
|
|
outputDir,
|
|
userDataDir,
|
|
headless: true,
|
|
killZombies: false, // Don't kill other Chrome instances
|
|
});
|
|
|
|
if (!result.success) {
|
|
console.error(`ERROR: Failed to launch Chrome: ${result.error}`);
|
|
process.exit(1);
|
|
}
|
|
|
|
chromePid = result.pid;
|
|
const cdpUrl = result.cdpUrl;
|
|
const port = result.port;
|
|
|
|
console.error(`[*] Chrome launched (PID: ${chromePid})`);
|
|
console.error(`[*] CDP URL: ${cdpUrl}`);
|
|
|
|
// Connect to CDP and get cookies
|
|
const http = require('http');
|
|
|
|
// Use CDP directly via HTTP to get all cookies
|
|
const getCookies = () => {
|
|
return new Promise((resolve, reject) => {
|
|
const req = http.request(
|
|
{
|
|
hostname: '127.0.0.1',
|
|
port: port,
|
|
path: '/json/list',
|
|
method: 'GET',
|
|
},
|
|
(res) => {
|
|
let data = '';
|
|
res.on('data', (chunk) => (data += chunk));
|
|
res.on('end', () => {
|
|
try {
|
|
const targets = JSON.parse(data);
|
|
// Find a page target
|
|
const pageTarget = targets.find(t => t.type === 'page') || targets[0];
|
|
if (!pageTarget) {
|
|
reject(new Error('No page target found'));
|
|
return;
|
|
}
|
|
|
|
// Connect via WebSocket and send CDP command
|
|
const WebSocket = require('ws');
|
|
const ws = new WebSocket(pageTarget.webSocketDebuggerUrl);
|
|
|
|
ws.on('open', () => {
|
|
ws.send(JSON.stringify({
|
|
id: 1,
|
|
method: 'Network.getAllCookies',
|
|
}));
|
|
});
|
|
|
|
ws.on('message', (message) => {
|
|
const response = JSON.parse(message);
|
|
if (response.id === 1) {
|
|
ws.close();
|
|
if (response.result && response.result.cookies) {
|
|
resolve(response.result.cookies);
|
|
} else {
|
|
reject(new Error('Failed to get cookies: ' + JSON.stringify(response)));
|
|
}
|
|
}
|
|
});
|
|
|
|
ws.on('error', (err) => {
|
|
reject(err);
|
|
});
|
|
} catch (e) {
|
|
reject(e);
|
|
}
|
|
});
|
|
}
|
|
);
|
|
|
|
req.on('error', reject);
|
|
req.end();
|
|
});
|
|
};
|
|
|
|
// Wait a moment for the browser to fully initialize
|
|
await new Promise(r => setTimeout(r, 2000));
|
|
|
|
console.error('[*] Fetching cookies via CDP...');
|
|
const cookies = await getCookies();
|
|
|
|
console.error(`[+] Retrieved ${cookies.length} cookies`);
|
|
|
|
// Write cookies to file
|
|
writeCookiesFile(cookies, outputFile);
|
|
console.error(`[+] Wrote cookies to: ${outputFile}`);
|
|
|
|
// Clean up
|
|
await killChrome(chromePid, outputDir);
|
|
chromePid = null;
|
|
|
|
// Remove temp directory
|
|
fs.rmSync(outputDir, { recursive: true, force: true });
|
|
|
|
console.error('[+] Cookie extraction complete');
|
|
process.exit(0);
|
|
|
|
} catch (error) {
|
|
console.error(`ERROR: ${error.message}`);
|
|
|
|
// Clean up on error
|
|
if (chromePid) {
|
|
await killChrome(chromePid, outputDir);
|
|
}
|
|
|
|
try {
|
|
fs.rmSync(outputDir, { recursive: true, force: true });
|
|
} catch (e) {}
|
|
|
|
process.exit(1);
|
|
}
|
|
}
|
|
|
|
main().catch((e) => {
|
|
console.error(`Fatal error: ${e.message}`);
|
|
process.exit(1);
|
|
});
|