diff --git a/archivebox/cli/__init__.py b/archivebox/cli/__init__.py index 675baabd..743f1626 100644 --- a/archivebox/cli/__init__.py +++ b/archivebox/cli/__init__.py @@ -36,6 +36,7 @@ class ArchiveBoxGroup(click.Group): 'binary': 'archivebox.cli.archivebox_binary.main', 'process': 'archivebox.cli.archivebox_process.main', 'machine': 'archivebox.cli.archivebox_machine.main', + 'persona': 'archivebox.cli.archivebox_persona.main', } archive_commands = { # High-level commands diff --git a/archivebox/cli/archivebox_persona.py b/archivebox/cli/archivebox_persona.py new file mode 100644 index 00000000..0eb21b86 --- /dev/null +++ b/archivebox/cli/archivebox_persona.py @@ -0,0 +1,623 @@ +#!/usr/bin/env python3 + +""" +archivebox persona [args...] [--filters] + +Manage Persona records (browser profiles for archiving). + +Actions: + create - Create Personas + list - List Personas as JSONL (with optional filters) + update - Update Personas from stdin JSONL + delete - Delete Personas from stdin JSONL + +Examples: + # Create a new persona + archivebox persona create work + archivebox persona create --import=chrome personal + + # List all personas + archivebox persona list + + # Delete a persona + archivebox persona list --name=old | archivebox persona delete --yes +""" + +__package__ = 'archivebox.cli' +__command__ = 'archivebox persona' + +import os +import sys +import shutil +import platform +import subprocess +import tempfile +from pathlib import Path +from typing import Optional, Iterable + +import rich_click as click +from rich import print as rprint + +from archivebox.cli.cli_utils import apply_filters + + +# ============================================================================= +# Browser Profile Locations +# ============================================================================= + +def get_chrome_user_data_dir() -> Optional[Path]: + """Get the default Chrome user data directory for the current platform.""" + system = platform.system() + home = Path.home() + + if system == 'Darwin': # macOS + candidates = [ + home / 'Library' / 'Application Support' / 'Google' / 'Chrome', + home / 'Library' / 'Application Support' / 'Chromium', + ] + elif system == 'Linux': + candidates = [ + home / '.config' / 'google-chrome', + home / '.config' / 'chromium', + home / '.config' / 'chrome', + home / 'snap' / 'chromium' / 'common' / 'chromium', + ] + elif system == 'Windows': + local_app_data = Path(os.environ.get('LOCALAPPDATA', home / 'AppData' / 'Local')) + candidates = [ + local_app_data / 'Google' / 'Chrome' / 'User Data', + local_app_data / 'Chromium' / 'User Data', + ] + else: + candidates = [] + + for candidate in candidates: + if candidate.exists() and (candidate / 'Default').exists(): + return candidate + + return None + + +def get_firefox_profile_dir() -> Optional[Path]: + """Get the default Firefox profile directory for the current platform.""" + system = platform.system() + home = Path.home() + + if system == 'Darwin': + profiles_dir = home / 'Library' / 'Application Support' / 'Firefox' / 'Profiles' + elif system == 'Linux': + profiles_dir = home / '.mozilla' / 'firefox' + elif system == 'Windows': + app_data = Path(os.environ.get('APPDATA', home / 'AppData' / 'Roaming')) + profiles_dir = app_data / 'Mozilla' / 'Firefox' / 'Profiles' + else: + return None + + if not profiles_dir.exists(): + return None + + # Find the default profile (usually ends with .default or .default-release) + for profile in profiles_dir.iterdir(): + if profile.is_dir() and ('default' in profile.name.lower()): + return profile + + # If no default found, return the first profile + profiles = [p for p in profiles_dir.iterdir() if p.is_dir()] + return profiles[0] if profiles else None + + +def get_brave_user_data_dir() -> Optional[Path]: + """Get the default Brave user data directory for the current platform.""" + system = platform.system() + home = Path.home() + + if system == 'Darwin': + candidates = [ + home / 'Library' / 'Application Support' / 'BraveSoftware' / 'Brave-Browser', + ] + elif system == 'Linux': + candidates = [ + home / '.config' / 'BraveSoftware' / 'Brave-Browser', + ] + elif system == 'Windows': + local_app_data = Path(os.environ.get('LOCALAPPDATA', home / 'AppData' / 'Local')) + candidates = [ + local_app_data / 'BraveSoftware' / 'Brave-Browser' / 'User Data', + ] + else: + candidates = [] + + for candidate in candidates: + if candidate.exists() and (candidate / 'Default').exists(): + return candidate + + return None + + +BROWSER_PROFILE_FINDERS = { + 'chrome': get_chrome_user_data_dir, + 'chromium': get_chrome_user_data_dir, # Same locations + 'firefox': get_firefox_profile_dir, + 'brave': get_brave_user_data_dir, +} + + +# ============================================================================= +# Cookie Extraction via CDP +# ============================================================================= + +def extract_cookies_via_cdp(user_data_dir: Path, output_file: Path) -> bool: + """ + Launch Chrome with the given user data dir and extract cookies via CDP. + + Returns True if successful, False otherwise. + """ + from archivebox.config.constants import CONSTANTS + + # Find the cookie extraction script + chrome_plugin_dir = Path(__file__).parent.parent / 'plugins' / 'chrome' + extract_script = chrome_plugin_dir / 'extract_cookies.js' + + if not extract_script.exists(): + rprint(f'[yellow]Cookie extraction script not found at {extract_script}[/yellow]', file=sys.stderr) + return False + + # Get node modules dir + node_modules_dir = CONSTANTS.LIB_DIR / 'npm' / 'node_modules' + + # Set up environment + env = os.environ.copy() + env['NODE_MODULES_DIR'] = str(node_modules_dir) + env['CHROME_USER_DATA_DIR'] = str(user_data_dir) + env['COOKIES_OUTPUT_FILE'] = str(output_file) + env['CHROME_HEADLESS'] = 'true' + + try: + result = subprocess.run( + ['node', str(extract_script)], + env=env, + capture_output=True, + text=True, + timeout=60, + ) + + if result.returncode == 0: + return True + else: + rprint(f'[yellow]Cookie extraction failed: {result.stderr}[/yellow]', file=sys.stderr) + return False + + except subprocess.TimeoutExpired: + rprint('[yellow]Cookie extraction timed out[/yellow]', file=sys.stderr) + return False + except FileNotFoundError: + rprint('[yellow]Node.js not found. Cannot extract cookies.[/yellow]', file=sys.stderr) + return False + except Exception as e: + rprint(f'[yellow]Cookie extraction error: {e}[/yellow]', file=sys.stderr) + return False + + +# ============================================================================= +# Validation Helpers +# ============================================================================= + +def validate_persona_name(name: str) -> tuple[bool, str]: + """ + Validate persona name to prevent path traversal attacks. + + Returns: + (is_valid, error_message): tuple indicating if name is valid + """ + if not name or not name.strip(): + return False, "Persona name cannot be empty" + + # Check for path separators + if '/' in name or '\\' in name: + return False, "Persona name cannot contain path separators (/ or \\)" + + # Check for parent directory references + if '..' in name: + return False, "Persona name cannot contain parent directory references (..)" + + # Check for hidden files/directories + if name.startswith('.'): + return False, "Persona name cannot start with a dot (.)" + + # Ensure name doesn't contain null bytes or other dangerous chars + if '\x00' in name or '\n' in name or '\r' in name: + return False, "Persona name contains invalid characters" + + return True, "" + + +def ensure_path_within_personas_dir(persona_path: Path) -> bool: + """ + Verify that a persona path is within PERSONAS_DIR. + + This is a safety check to prevent path traversal attacks where + a malicious persona name could cause operations on paths outside + the expected PERSONAS_DIR. + + Returns: + True if path is safe, False otherwise + """ + from archivebox.config.constants import CONSTANTS + + try: + # Resolve both paths to absolute paths + personas_dir = CONSTANTS.PERSONAS_DIR.resolve() + resolved_path = persona_path.resolve() + + # Check if resolved_path is a child of personas_dir + return resolved_path.is_relative_to(personas_dir) + except (ValueError, RuntimeError): + return False + + +# ============================================================================= +# CREATE +# ============================================================================= + +def create_personas( + names: Iterable[str], + import_from: Optional[str] = None, +) -> int: + """ + Create Personas from names. + + If --import is specified, copy the browser profile to the persona directory + and extract cookies. + + Exit codes: + 0: Success + 1: Failure + """ + from archivebox.misc.jsonl import write_record + from archivebox.personas.models import Persona + from archivebox.config.constants import CONSTANTS + + is_tty = sys.stdout.isatty() + name_list = list(names) if names else [] + + if not name_list: + rprint('[yellow]No persona names provided. Pass names as arguments.[/yellow]', file=sys.stderr) + return 1 + + # Validate import source if specified + source_profile_dir = None + if import_from: + import_from = import_from.lower() + if import_from not in BROWSER_PROFILE_FINDERS: + rprint(f'[red]Unknown browser: {import_from}[/red]', file=sys.stderr) + rprint(f'[dim]Supported browsers: {", ".join(BROWSER_PROFILE_FINDERS.keys())}[/dim]', file=sys.stderr) + return 1 + + source_profile_dir = BROWSER_PROFILE_FINDERS[import_from]() + if not source_profile_dir: + rprint(f'[red]Could not find {import_from} profile directory[/red]', file=sys.stderr) + return 1 + + rprint(f'[dim]Found {import_from} profile: {source_profile_dir}[/dim]', file=sys.stderr) + + created_count = 0 + for name in name_list: + name = name.strip() + if not name: + continue + + # Validate persona name to prevent path traversal + is_valid, error_msg = validate_persona_name(name) + if not is_valid: + rprint(f'[red]Invalid persona name "{name}": {error_msg}[/red]', file=sys.stderr) + continue + + persona, created = Persona.objects.get_or_create(name=name) + + if created: + persona.ensure_dirs() + created_count += 1 + rprint(f'[green]Created persona: {name}[/green]', file=sys.stderr) + else: + rprint(f'[dim]Persona already exists: {name}[/dim]', file=sys.stderr) + + # Import browser profile if requested + if import_from and source_profile_dir: + persona_chrome_dir = Path(persona.CHROME_USER_DATA_DIR) + + # Copy the browser profile + rprint(f'[dim]Copying browser profile to {persona_chrome_dir}...[/dim]', file=sys.stderr) + + try: + # Remove existing chrome_user_data if it exists + if persona_chrome_dir.exists(): + shutil.rmtree(persona_chrome_dir) + + # Copy the profile directory + # We copy the entire user data dir, not just Default profile + shutil.copytree( + source_profile_dir, + persona_chrome_dir, + symlinks=True, + ignore=shutil.ignore_patterns( + 'Cache', 'Code Cache', 'GPUCache', 'ShaderCache', + 'Service Worker', 'GCM Store', '*.log', 'Crashpad', + 'BrowserMetrics', 'BrowserMetrics-spare.pma', + 'SingletonLock', 'SingletonSocket', 'SingletonCookie', + ), + ) + rprint(f'[green]Copied browser profile to persona[/green]', file=sys.stderr) + + # Extract cookies via CDP + cookies_file = Path(persona.path) / 'cookies.txt' + rprint(f'[dim]Extracting cookies via CDP...[/dim]', file=sys.stderr) + + if extract_cookies_via_cdp(persona_chrome_dir, cookies_file): + rprint(f'[green]Extracted cookies to {cookies_file}[/green]', file=sys.stderr) + else: + rprint(f'[yellow]Could not extract cookies automatically.[/yellow]', file=sys.stderr) + rprint(f'[dim]You can manually export cookies using a browser extension.[/dim]', file=sys.stderr) + + except Exception as e: + rprint(f'[red]Failed to copy browser profile: {e}[/red]', file=sys.stderr) + return 1 + + if not is_tty: + write_record({ + 'id': str(persona.id) if hasattr(persona, 'id') else None, + 'name': persona.name, + 'path': str(persona.path), + 'CHROME_USER_DATA_DIR': persona.CHROME_USER_DATA_DIR, + 'COOKIES_FILE': persona.COOKIES_FILE, + }) + + rprint(f'[green]Created {created_count} new persona(s)[/green]', file=sys.stderr) + return 0 + + +# ============================================================================= +# LIST +# ============================================================================= + +def list_personas( + name: Optional[str] = None, + name__icontains: Optional[str] = None, + limit: Optional[int] = None, +) -> int: + """ + List Personas as JSONL with optional filters. + + Exit codes: + 0: Success (even if no results) + """ + from archivebox.misc.jsonl import write_record + from archivebox.personas.models import Persona + + is_tty = sys.stdout.isatty() + + queryset = Persona.objects.all().order_by('name') + + # Apply filters + filter_kwargs = { + 'name': name, + 'name__icontains': name__icontains, + } + queryset = apply_filters(queryset, filter_kwargs, limit=limit) + + count = 0 + for persona in queryset: + cookies_status = '[green]✓[/green]' if persona.COOKIES_FILE else '[dim]✗[/dim]' + chrome_status = '[green]✓[/green]' if Path(persona.CHROME_USER_DATA_DIR).exists() else '[dim]✗[/dim]' + + if is_tty: + rprint(f'[cyan]{persona.name:20}[/cyan] cookies:{cookies_status} chrome:{chrome_status} [dim]{persona.path}[/dim]') + else: + write_record({ + 'id': str(persona.id) if hasattr(persona, 'id') else None, + 'name': persona.name, + 'path': str(persona.path), + 'CHROME_USER_DATA_DIR': persona.CHROME_USER_DATA_DIR, + 'COOKIES_FILE': persona.COOKIES_FILE, + }) + count += 1 + + rprint(f'[dim]Listed {count} persona(s)[/dim]', file=sys.stderr) + return 0 + + +# ============================================================================= +# UPDATE +# ============================================================================= + +def update_personas(name: Optional[str] = None) -> int: + """ + Update Personas from stdin JSONL. + + Reads Persona records from stdin and applies updates. + Uses PATCH semantics - only specified fields are updated. + + Exit codes: + 0: Success + 1: No input or error + """ + from archivebox.misc.jsonl import read_stdin, write_record + from archivebox.personas.models import Persona + + is_tty = sys.stdout.isatty() + + records = list(read_stdin()) + if not records: + rprint('[yellow]No records provided via stdin[/yellow]', file=sys.stderr) + return 1 + + updated_count = 0 + for record in records: + persona_id = record.get('id') + old_name = record.get('name') + + if not persona_id and not old_name: + continue + + try: + if persona_id: + persona = Persona.objects.get(id=persona_id) + else: + persona = Persona.objects.get(name=old_name) + + # Apply updates from CLI flags + if name: + # Validate new name to prevent path traversal + is_valid, error_msg = validate_persona_name(name) + if not is_valid: + rprint(f'[red]Invalid new persona name "{name}": {error_msg}[/red]', file=sys.stderr) + continue + + # Rename the persona directory too + old_path = persona.path + persona.name = name + new_path = persona.path + + if old_path.exists() and old_path != new_path: + shutil.move(str(old_path), str(new_path)) + + persona.save() + + updated_count += 1 + + if not is_tty: + write_record({ + 'id': str(persona.id) if hasattr(persona, 'id') else None, + 'name': persona.name, + 'path': str(persona.path), + }) + + except Persona.DoesNotExist: + rprint(f'[yellow]Persona not found: {persona_id or old_name}[/yellow]', file=sys.stderr) + continue + + rprint(f'[green]Updated {updated_count} persona(s)[/green]', file=sys.stderr) + return 0 + + +# ============================================================================= +# DELETE +# ============================================================================= + +def delete_personas(yes: bool = False, dry_run: bool = False) -> int: + """ + Delete Personas from stdin JSONL. + + Requires --yes flag to confirm deletion. + + Exit codes: + 0: Success + 1: No input or missing --yes flag + """ + from archivebox.misc.jsonl import read_stdin + from archivebox.personas.models import Persona + + records = list(read_stdin()) + if not records: + rprint('[yellow]No records provided via stdin[/yellow]', file=sys.stderr) + return 1 + + # Collect persona IDs or names + persona_ids = [] + persona_names = [] + for r in records: + if r.get('id'): + persona_ids.append(r['id']) + elif r.get('name'): + persona_names.append(r['name']) + + if not persona_ids and not persona_names: + rprint('[yellow]No valid persona IDs or names in input[/yellow]', file=sys.stderr) + return 1 + + from django.db.models import Q + query = Q() + if persona_ids: + query |= Q(id__in=persona_ids) + if persona_names: + query |= Q(name__in=persona_names) + + personas = Persona.objects.filter(query) + count = personas.count() + + if count == 0: + rprint('[yellow]No matching personas found[/yellow]', file=sys.stderr) + return 0 + + if dry_run: + rprint(f'[yellow]Would delete {count} persona(s) (dry run)[/yellow]', file=sys.stderr) + for persona in personas: + rprint(f' {persona.name} ({persona.path})', file=sys.stderr) + return 0 + + if not yes: + rprint('[red]Use --yes to confirm deletion[/red]', file=sys.stderr) + return 1 + + # Delete persona directories and database records + deleted_count = 0 + for persona in personas: + persona_path = persona.path + + # Safety check: ensure path is within PERSONAS_DIR before deletion + if not ensure_path_within_personas_dir(persona_path): + rprint(f'[red]Security error: persona path "{persona_path}" is outside PERSONAS_DIR. Skipping deletion.[/red]', file=sys.stderr) + continue + + if persona_path.exists(): + shutil.rmtree(persona_path) + persona.delete() + deleted_count += 1 + + rprint(f'[green]Deleted {deleted_count} persona(s)[/green]', file=sys.stderr) + return 0 + + +# ============================================================================= +# CLI Commands +# ============================================================================= + +@click.group() +def main(): + """Manage Persona records (browser profiles).""" + pass + + +@main.command('create') +@click.argument('names', nargs=-1) +@click.option('--import', 'import_from', help='Import profile from browser (chrome, firefox, brave)') +def create_cmd(names: tuple, import_from: Optional[str]): + """Create Personas, optionally importing from a browser profile.""" + sys.exit(create_personas(names, import_from=import_from)) + + +@main.command('list') +@click.option('--name', help='Filter by exact name') +@click.option('--name__icontains', help='Filter by name contains') +@click.option('--limit', '-n', type=int, help='Limit number of results') +def list_cmd(name: Optional[str], name__icontains: Optional[str], limit: Optional[int]): + """List Personas as JSONL.""" + sys.exit(list_personas(name=name, name__icontains=name__icontains, limit=limit)) + + +@main.command('update') +@click.option('--name', '-n', help='Set new name') +def update_cmd(name: Optional[str]): + """Update Personas from stdin JSONL.""" + sys.exit(update_personas(name=name)) + + +@main.command('delete') +@click.option('--yes', '-y', is_flag=True, help='Confirm deletion') +@click.option('--dry-run', is_flag=True, help='Show what would be deleted') +def delete_cmd(yes: bool, dry_run: bool): + """Delete Personas from stdin JSONL.""" + sys.exit(delete_personas(yes=yes, dry_run=dry_run)) + + +if __name__ == '__main__': + main() diff --git a/archivebox/plugins/chrome/chrome_utils.js b/archivebox/plugins/chrome/chrome_utils.js index 02288067..94dd76c7 100755 --- a/archivebox/plugins/chrome/chrome_utils.js +++ b/archivebox/plugins/chrome/chrome_utils.js @@ -203,7 +203,7 @@ function waitForDebugPort(port, timeout = 30000) { /** * Kill zombie Chrome processes from stale crawls. - * Recursively scans DATA_DIR for any */chrome/*.pid files from stale crawls. + * Recursively scans DATA_DIR for any chrome/*.pid files from stale crawls. * Does not assume specific directory structure - works with nested paths. * @param {string} [dataDir] - Data directory (defaults to DATA_DIR env or '.') * @returns {number} - Number of zombies killed diff --git a/archivebox/plugins/chrome/extract_cookies.js b/archivebox/plugins/chrome/extract_cookies.js new file mode 100644 index 00000000..2a330152 --- /dev/null +++ b/archivebox/plugins/chrome/extract_cookies.js @@ -0,0 +1,254 @@ +#!/usr/bin/env node +/** + * Extract cookies from Chrome via CDP and write to Netscape cookies.txt format. + * + * This script launches Chrome with a given user data directory, connects via CDP, + * extracts all cookies, and writes them to a cookies.txt file in Netscape format. + * + * Usage: + * CHROME_USER_DATA_DIR=/path/to/profile COOKIES_OUTPUT_FILE=/path/to/cookies.txt node extract_cookies.js + * + * Environment variables: + * CHROME_USER_DATA_DIR: Path to Chrome user data directory (required) + * COOKIES_OUTPUT_FILE: Path to output cookies.txt file (required) + * CHROME_HEADLESS: Run in headless mode (default: true) + * NODE_MODULES_DIR: Path to node_modules for module resolution + */ + +// Add NODE_MODULES_DIR to module resolution paths if set +if (process.env.NODE_MODULES_DIR) { + module.paths.unshift(process.env.NODE_MODULES_DIR); +} + +const fs = require('fs'); +const path = require('path'); +const { + findChromium, + launchChromium, + killChrome, + getEnv, +} = require('./chrome_utils.js'); + +/** + * Convert a cookie object to Netscape cookies.txt format line. + * + * Format: domain includeSubdomains path secure expiry name value + * + * @param {Object} cookie - CDP cookie object + * @returns {string} - Netscape format cookie line + */ +function cookieToNetscape(cookie) { + // Domain: prefix with . for domain cookies (not host-only) + let domain = cookie.domain; + if (!domain.startsWith('.') && !cookie.hostOnly) { + domain = '.' + domain; + } + + // Include subdomains: TRUE if domain cookie (starts with .) + const includeSubdomains = domain.startsWith('.') ? 'TRUE' : 'FALSE'; + + // Path + const cookiePath = cookie.path || '/'; + + // Secure flag + const secure = cookie.secure ? 'TRUE' : 'FALSE'; + + // Expiry timestamp (0 for session cookies) + let expiry = '0'; + if (cookie.expires && cookie.expires > 0) { + // CDP returns expiry in seconds since epoch + expiry = Math.floor(cookie.expires).toString(); + } + + // Name and value + const name = cookie.name; + const value = cookie.value; + + return `${domain}\t${includeSubdomains}\t${cookiePath}\t${secure}\t${expiry}\t${name}\t${value}`; +} + +/** + * Write cookies to Netscape cookies.txt format file. + * + * @param {Array} cookies - Array of CDP cookie objects + * @param {string} outputPath - Path to output file + */ +function writeCookiesFile(cookies, outputPath) { + const lines = [ + '# Netscape HTTP Cookie File', + '# https://curl.se/docs/http-cookies.html', + '# This file was generated by ArchiveBox persona cookie extraction', + '#', + '# Format: domain\\tincludeSubdomains\\tpath\\tsecure\\texpiry\\tname\\tvalue', + '', + ]; + + for (const cookie of cookies) { + lines.push(cookieToNetscape(cookie)); + } + + fs.writeFileSync(outputPath, lines.join('\n') + '\n'); +} + +async function main() { + const userDataDir = getEnv('CHROME_USER_DATA_DIR'); + const outputFile = getEnv('COOKIES_OUTPUT_FILE'); + + if (!userDataDir) { + console.error('ERROR: CHROME_USER_DATA_DIR environment variable is required'); + process.exit(1); + } + + if (!outputFile) { + console.error('ERROR: COOKIES_OUTPUT_FILE environment variable is required'); + process.exit(1); + } + + if (!fs.existsSync(userDataDir)) { + console.error(`ERROR: User data directory does not exist: ${userDataDir}`); + process.exit(1); + } + + const binary = findChromium(); + if (!binary) { + console.error('ERROR: Chromium binary not found'); + process.exit(1); + } + + console.error(`[*] Extracting cookies from: ${userDataDir}`); + console.error(`[*] Output file: ${outputFile}`); + console.error(`[*] Using browser: ${binary}`); + + // Create a temporary output directory for Chrome files + const outputDir = fs.mkdtempSync(path.join(require('os').tmpdir(), 'chrome-cookies-')); + + let chromePid = null; + + try { + // Launch Chrome with the user data directory + const result = await launchChromium({ + binary, + outputDir, + userDataDir, + headless: true, + killZombies: false, // Don't kill other Chrome instances + }); + + if (!result.success) { + console.error(`ERROR: Failed to launch Chrome: ${result.error}`); + process.exit(1); + } + + chromePid = result.pid; + const cdpUrl = result.cdpUrl; + const port = result.port; + + console.error(`[*] Chrome launched (PID: ${chromePid})`); + console.error(`[*] CDP URL: ${cdpUrl}`); + + // Connect to CDP and get cookies + const http = require('http'); + + // Use CDP directly via HTTP to get all cookies + const getCookies = () => { + return new Promise((resolve, reject) => { + const req = http.request( + { + hostname: '127.0.0.1', + port: port, + path: '/json/list', + method: 'GET', + }, + (res) => { + let data = ''; + res.on('data', (chunk) => (data += chunk)); + res.on('end', () => { + try { + const targets = JSON.parse(data); + // Find a page target + const pageTarget = targets.find(t => t.type === 'page') || targets[0]; + if (!pageTarget) { + reject(new Error('No page target found')); + return; + } + + // Connect via WebSocket and send CDP command + const WebSocket = require('ws'); + const ws = new WebSocket(pageTarget.webSocketDebuggerUrl); + + ws.on('open', () => { + ws.send(JSON.stringify({ + id: 1, + method: 'Network.getAllCookies', + })); + }); + + ws.on('message', (message) => { + const response = JSON.parse(message); + if (response.id === 1) { + ws.close(); + if (response.result && response.result.cookies) { + resolve(response.result.cookies); + } else { + reject(new Error('Failed to get cookies: ' + JSON.stringify(response))); + } + } + }); + + ws.on('error', (err) => { + reject(err); + }); + } catch (e) { + reject(e); + } + }); + } + ); + + req.on('error', reject); + req.end(); + }); + }; + + // Wait a moment for the browser to fully initialize + await new Promise(r => setTimeout(r, 2000)); + + console.error('[*] Fetching cookies via CDP...'); + const cookies = await getCookies(); + + console.error(`[+] Retrieved ${cookies.length} cookies`); + + // Write cookies to file + writeCookiesFile(cookies, outputFile); + console.error(`[+] Wrote cookies to: ${outputFile}`); + + // Clean up + await killChrome(chromePid, outputDir); + chromePid = null; + + // Remove temp directory + fs.rmSync(outputDir, { recursive: true, force: true }); + + console.error('[+] Cookie extraction complete'); + process.exit(0); + + } catch (error) { + console.error(`ERROR: ${error.message}`); + + // Clean up on error + if (chromePid) { + await killChrome(chromePid, outputDir); + } + + try { + fs.rmSync(outputDir, { recursive: true, force: true }); + } catch (e) {} + + process.exit(1); + } +} + +main().catch((e) => { + console.error(`Fatal error: ${e.message}`); + process.exit(1); +});