"""
Persona management for ArchiveBox.

A Persona represents a browser profile/identity used for archiving.
Each persona has its own:
- Chrome user data directory (for cookies, localStorage, extensions, etc.)
- Chrome extensions directory
- Cookies file
- Config overrides
"""

__package__ = 'archivebox.personas'

import shutil
import subprocess
import sys
from contextlib import contextmanager
from pathlib import Path
from typing import TYPE_CHECKING

from django.db import models
from django.conf import settings
from django.utils import timezone

from archivebox.base_models.models import ModelWithConfig, get_or_create_system_user_pk
from archivebox.uuid_compat import uuid7

try:
    import fcntl
except ImportError:  # pragma: no cover
    fcntl = None

if TYPE_CHECKING:
    pass


VOLATILE_PROFILE_DIR_NAMES = {
    'Cache',
    'Code Cache',
    'GPUCache',
    'ShaderCache',
    'Service Worker',
    'GCM Store',
    'Crashpad',
    'BrowserMetrics',
}

VOLATILE_PROFILE_FILE_NAMES = {
    'BrowserMetrics-spare.pma',
    'SingletonCookie',
    'SingletonLock',
    'SingletonSocket',
}


class Persona(ModelWithConfig):
    """
    Browser persona/profile for archiving sessions.

    Each persona provides:
    - CHROME_USER_DATA_DIR: Chrome profile directory
    - CHROME_EXTENSIONS_DIR: Installed extensions directory
    - CHROME_DOWNLOADS_DIR: Chrome downloads directory
    - COOKIES_FILE: Cookies file for wget/curl
    - config: JSON field with persona-specific config overrides

    Usage:
        # Get persona and its derived config
        config = get_config(persona=crawl.persona, crawl=crawl, snapshot=snapshot)
        chrome_dir = config['CHROME_USER_DATA_DIR']

        # Or access directly from persona
        persona = Persona.objects.get(name='Default')
        persona.CHROME_USER_DATA_DIR  # -> Path to chrome_user_data
    """

    id = models.UUIDField(primary_key=True, default=uuid7, editable=False, unique=True)
    name = models.CharField(max_length=64, unique=True)
    created_at = models.DateTimeField(default=timezone.now, db_index=True)
    created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk)

    class Meta:
        app_label = 'personas'

    def __str__(self) -> str:
        return self.name

    @property
    def path(self) -> Path:
        """Path to persona directory under PERSONAS_DIR."""
        from archivebox.config.constants import CONSTANTS
        return CONSTANTS.PERSONAS_DIR / self.name

    @property
    def CHROME_USER_DATA_DIR(self) -> str:
        """Derived path to Chrome user data directory for this persona."""
        return str(self.path / 'chrome_user_data')

    @property
    def CHROME_EXTENSIONS_DIR(self) -> str:
        """Derived path to Chrome extensions directory for this persona."""
        return str(self.path / 'chrome_extensions')

    @property
    def CHROME_DOWNLOADS_DIR(self) -> str:
        """Derived path to Chrome downloads directory for this persona."""
        return str(self.path / 'chrome_downloads')

    @property
    def COOKIES_FILE(self) -> str:
        """Derived path to cookies.txt file for this persona (if exists)."""
        cookies_path = self.path / 'cookies.txt'
        return str(cookies_path) if cookies_path.exists() else ''

    def get_derived_config(self) -> dict:
        """
        Get config dict with derived paths filled in.

        Returns dict with:
        - All values from self.config JSONField
        - CHROME_USER_DATA_DIR (derived from persona path)
        - CHROME_EXTENSIONS_DIR (derived from persona path)
        - CHROME_DOWNLOADS_DIR (derived from persona path)
        - COOKIES_FILE (derived from persona path, if file exists)
        - ACTIVE_PERSONA (set to this persona's name)
        """
        derived = dict(self.config or {})

        # Add derived paths (don't override if explicitly set in config)
        if 'CHROME_USER_DATA_DIR' not in derived:
            derived['CHROME_USER_DATA_DIR'] = self.CHROME_USER_DATA_DIR
        if 'CHROME_EXTENSIONS_DIR' not in derived:
            derived['CHROME_EXTENSIONS_DIR'] = self.CHROME_EXTENSIONS_DIR
        if 'CHROME_DOWNLOADS_DIR' not in derived:
            derived['CHROME_DOWNLOADS_DIR'] = self.CHROME_DOWNLOADS_DIR
        if 'COOKIES_FILE' not in derived and self.COOKIES_FILE:
            derived['COOKIES_FILE'] = self.COOKIES_FILE

        # Always set ACTIVE_PERSONA to this persona's name
        derived['ACTIVE_PERSONA'] = self.name

        return derived

    def ensure_dirs(self) -> None:
        """Create persona directories if they don't exist."""
        self.path.mkdir(parents=True, exist_ok=True)
        (self.path / 'chrome_user_data').mkdir(parents=True, exist_ok=True)
        (self.path / 'chrome_extensions').mkdir(parents=True, exist_ok=True)
        (self.path / 'chrome_downloads').mkdir(parents=True, exist_ok=True)

    def cleanup_chrome_profile(self, profile_dir: Path) -> bool:
        """Remove volatile Chrome state that should never be reused across launches."""
        cleaned = False

        if not profile_dir.exists():
            return False

        for path in profile_dir.rglob('*'):
            if path.name in VOLATILE_PROFILE_FILE_NAMES:
                try:
                    path.unlink()
                    cleaned = True
                except OSError:
                    pass

        for dirname in VOLATILE_PROFILE_DIR_NAMES:
            for path in profile_dir.rglob(dirname):
                if not path.is_dir():
                    continue
                shutil.rmtree(path, ignore_errors=True)
                cleaned = True

        for path in profile_dir.rglob('*.log'):
            try:
                path.unlink()
                cleaned = True
            except OSError:
                pass

        return cleaned

    def cleanup_chrome(self) -> bool:
        """Clean up volatile Chrome state for this persona's base profile."""
        return self.cleanup_chrome_profile(self.path / 'chrome_user_data')

    @contextmanager
    def lock_runtime_for_crawl(self):
        lock_path = self.path / '.archivebox-crawl-profile.lock'
        lock_path.parent.mkdir(parents=True, exist_ok=True)

        with lock_path.open('w') as lock_file:
            if fcntl is not None:
                fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
            try:
                yield
            finally:
                if fcntl is not None:
                    fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)

    def runtime_root_for_crawl(self, crawl) -> Path:
        return Path(crawl.output_dir) / '.persona' / self.name

    def runtime_profile_dir_for_crawl(self, crawl) -> Path:
        return self.runtime_root_for_crawl(crawl) / 'chrome_user_data'

    def runtime_downloads_dir_for_crawl(self, crawl) -> Path:
        return self.runtime_root_for_crawl(crawl) / 'chrome_downloads'

    def copy_chrome_profile(self, source_dir: Path, destination_dir: Path) -> None:
        destination_dir.parent.mkdir(parents=True, exist_ok=True)
        shutil.rmtree(destination_dir, ignore_errors=True)
        destination_dir.mkdir(parents=True, exist_ok=True)

        copy_cmd: list[str] | None = None
        source_contents = f'{source_dir}/.'

        if sys.platform == 'darwin':
            copy_cmd = ['cp', '-cR', source_contents, str(destination_dir)]
        elif sys.platform.startswith('linux'):
            copy_cmd = ['cp', '-a', source_contents, str(destination_dir)]

        if copy_cmd:
            result = subprocess.run(copy_cmd, capture_output=True, text=True)
            if result.returncode == 0:
                return

            shutil.rmtree(destination_dir, ignore_errors=True)
            destination_dir.mkdir(parents=True, exist_ok=True)

        shutil.copytree(source_dir, destination_dir, symlinks=True, dirs_exist_ok=True)

    def prepare_runtime_for_crawl(self, crawl, chrome_binary: str = '') -> dict[str, str]:
        self.ensure_dirs()

        template_dir = Path(self.CHROME_USER_DATA_DIR)
        runtime_root = self.runtime_root_for_crawl(crawl)
        runtime_profile_dir = self.runtime_profile_dir_for_crawl(crawl)
        runtime_downloads_dir = self.runtime_downloads_dir_for_crawl(crawl)

        with self.lock_runtime_for_crawl():
            if not runtime_profile_dir.exists():
                if template_dir.exists() and any(template_dir.iterdir()):
                    self.copy_chrome_profile(template_dir, runtime_profile_dir)
                else:
                    runtime_profile_dir.mkdir(parents=True, exist_ok=True)

            runtime_downloads_dir.mkdir(parents=True, exist_ok=True)
            self.cleanup_chrome_profile(runtime_profile_dir)

            (runtime_root / 'persona_name.txt').write_text(self.name)
            (runtime_root / 'template_dir.txt').write_text(str(template_dir))
            if chrome_binary:
                (runtime_root / 'chrome_binary.txt').write_text(chrome_binary)

        return {
            'CHROME_USER_DATA_DIR': str(runtime_profile_dir),
            'CHROME_DOWNLOADS_DIR': str(runtime_downloads_dir),
        }

    def cleanup_runtime_for_crawl(self, crawl) -> None:
        shutil.rmtree(Path(crawl.output_dir) / '.persona', ignore_errors=True)

    @classmethod
    def get_or_create_default(cls) -> 'Persona':
        """Get or create the Default persona."""
        persona, _ = cls.objects.get_or_create(name='Default')
        return persona

    @classmethod
    def cleanup_chrome_all(cls) -> int:
        """Clean up Chrome state files for all personas."""
        cleaned = 0
        for persona in cls.objects.all():
            if persona.cleanup_chrome():
                cleaned += 1
        return cleaned