"""
Orchestrator for managing worker processes.

The Orchestrator polls queues for each model type (Crawl, Snapshot, ArchiveResult)
and lazily spawns worker processes when there is work to be done.

Architecture:
    Orchestrator (main loop, polls queues)
    ├── CrawlWorker subprocess(es)
    ├── SnapshotWorker subprocess(es)
    └── ArchiveResultWorker subprocess(es)
        └── Each worker spawns task subprocesses via CLI

Usage:
    # Embedded in other commands (exits when done)
    orchestrator = Orchestrator(exit_on_idle=True)
    orchestrator.runloop()
    
    # Daemon mode (runs forever)
    orchestrator = Orchestrator(exit_on_idle=False)
    orchestrator.start()  # fork and return
    
    # Or run via CLI
    archivebox orchestrator [--daemon]
"""

__package__ = 'archivebox.workers'

import os
import time
from typing import Type
from multiprocessing import Process

from django.utils import timezone

from rich import print

from .worker import Worker, CrawlWorker, SnapshotWorker, ArchiveResultWorker
from .pid_utils import (
    write_pid_file,
    remove_pid_file,
    get_all_worker_pids,
    cleanup_stale_pid_files,
)


class Orchestrator:
    """
    Manages worker processes by polling queues and spawning workers as needed.
    
    The orchestrator:
    1. Polls each model queue (Crawl, Snapshot, ArchiveResult)
    2. If items exist and fewer than MAX_CONCURRENT workers are running, spawns workers
    3. Monitors worker health and cleans up stale PIDs
    4. Exits when all queues are empty (unless daemon mode)
    """
    
    WORKER_TYPES: list[Type[Worker]] = [CrawlWorker, SnapshotWorker, ArchiveResultWorker]
    
    # Configuration
    POLL_INTERVAL: float = 1.0
    IDLE_TIMEOUT: int = 3  # Exit after N idle ticks (0 = never exit)
    MAX_WORKERS_PER_TYPE: int = 4  # Max workers per model type
    MAX_TOTAL_WORKERS: int = 12  # Max workers across all types
    
    def __init__(self, exit_on_idle: bool = True):
        self.exit_on_idle = exit_on_idle
        self.pid: int = os.getpid()
        self.pid_file = None
        self.idle_count: int = 0
    
    def __repr__(self) -> str:
        return f'[underline]Orchestrator[/underline]\\[pid={self.pid}]'
    
    @classmethod
    def is_running(cls) -> bool:
        """Check if an orchestrator is already running."""
        workers = get_all_worker_pids('orchestrator')
        return len(workers) > 0
    
    def on_startup(self) -> None:
        """Called when orchestrator starts."""
        self.pid = os.getpid()
        self.pid_file = write_pid_file('orchestrator', worker_id=0)
        print(f'[green]👨‍✈️ {self} STARTED[/green]')
        
        # Clean up any stale PID files from previous runs
        stale_count = cleanup_stale_pid_files()
        if stale_count:
            print(f'[yellow]👨‍✈️ {self} cleaned up {stale_count} stale PID files[/yellow]')
    
    def on_shutdown(self, error: BaseException | None = None) -> None:
        """Called when orchestrator shuts down."""
        if self.pid_file:
            remove_pid_file(self.pid_file)
        
        if error and not isinstance(error, KeyboardInterrupt):
            print(f'[red]👨‍✈️ {self} SHUTDOWN with error:[/red] {type(error).__name__}: {error}')
        else:
            print(f'[grey53]👨‍✈️ {self} SHUTDOWN[/grey53]')
    
    def get_total_worker_count(self) -> int:
        """Get total count of running workers across all types."""
        cleanup_stale_pid_files()
        return sum(len(W.get_running_workers()) for W in self.WORKER_TYPES)
    
    def should_spawn_worker(self, WorkerClass: Type[Worker], queue_count: int) -> bool:
        """Determine if we should spawn a new worker of the given type."""
        if queue_count == 0:
            return False
        
        # Check per-type limit
        running_workers = WorkerClass.get_running_workers()
        if len(running_workers) >= self.MAX_WORKERS_PER_TYPE:
            return False
        
        # Check total limit
        if self.get_total_worker_count() >= self.MAX_TOTAL_WORKERS:
            return False
        
        # Check if we already have enough workers for the queue size
        # Spawn more gradually - don't flood with workers
        if len(running_workers) > 0 and queue_count <= len(running_workers) * WorkerClass.MAX_CONCURRENT_TASKS:
            return False
        
        return True
    
    def spawn_worker(self, WorkerClass: Type[Worker]) -> int | None:
        """Spawn a new worker process. Returns PID or None if spawn failed."""
        try:
            pid = WorkerClass.start(daemon=False)
            print(f'[blue]👨‍✈️ {self} spawned {WorkerClass.name} worker[/blue] pid={pid}')
            return pid
        except Exception as e:
            print(f'[red]👨‍✈️ {self} failed to spawn {WorkerClass.name} worker:[/red] {e}')
            return None
    
    def check_queues_and_spawn_workers(self) -> dict[str, int]:
        """
        Check all queues and spawn workers as needed.
        Returns dict of queue sizes by worker type.
        """
        queue_sizes = {}
        
        for WorkerClass in self.WORKER_TYPES:
            # Get queue for this worker type
            # Need to instantiate worker to get queue (for model access)
            worker = WorkerClass(worker_id=-1)  # temp instance just for queue access
            queue = worker.get_queue()
            queue_count = queue.count()
            queue_sizes[WorkerClass.name] = queue_count
            
            # Spawn worker if needed
            if self.should_spawn_worker(WorkerClass, queue_count):
                self.spawn_worker(WorkerClass)
        
        return queue_sizes
    
    def has_pending_work(self, queue_sizes: dict[str, int]) -> bool:
        """Check if any queue has pending work."""
        return any(count > 0 for count in queue_sizes.values())
    
    def has_running_workers(self) -> bool:
        """Check if any workers are still running."""
        return self.get_total_worker_count() > 0
    
    def has_future_work(self) -> bool:
        """Check if there's work scheduled for the future (retry_at > now)."""
        for WorkerClass in self.WORKER_TYPES:
            worker = WorkerClass(worker_id=-1)
            Model = worker.get_model()
            # Check for items not in final state with future retry_at
            future_count = Model.objects.filter(
                retry_at__gt=timezone.now()
            ).exclude(
                status__in=Model.FINAL_STATES
            ).count()
            if future_count > 0:
                return True
        return False
    
    def on_tick(self, queue_sizes: dict[str, int]) -> None:
        """Called each orchestrator tick. Override for custom behavior."""
        total_queued = sum(queue_sizes.values())
        total_workers = self.get_total_worker_count()
        
        if total_queued > 0 or total_workers > 0:
            # Build status line
            status_parts = []
            for WorkerClass in self.WORKER_TYPES:
                name = WorkerClass.name
                queued = queue_sizes.get(name, 0)
                workers = len(WorkerClass.get_running_workers())
                if queued > 0 or workers > 0:
                    status_parts.append(f'{name}={queued}q/{workers}w')
            
            if status_parts:
                print(f'[grey53]👨‍✈️ {self} tick:[/grey53] {" ".join(status_parts)}')
    
    def on_idle(self) -> None:
        """Called when orchestrator is idle (no work, no workers)."""
        if self.idle_count == 1:
            print(f'[grey53]👨‍✈️ {self} idle, waiting for work...[/grey53]')
    
    def should_exit(self, queue_sizes: dict[str, int]) -> bool:
        """Determine if orchestrator should exit."""
        if not self.exit_on_idle:
            return False
        
        if self.IDLE_TIMEOUT == 0:
            return False
        
        # Don't exit if there's pending or future work
        if self.has_pending_work(queue_sizes):
            return False
        
        if self.has_running_workers():
            return False
        
        if self.has_future_work():
            return False
        
        # Exit after idle timeout
        return self.idle_count >= self.IDLE_TIMEOUT
    
    def runloop(self) -> None:
        """Main orchestrator loop."""
        self.on_startup()
        
        try:
            while True:
                # Check queues and spawn workers
                queue_sizes = self.check_queues_and_spawn_workers()
                
                # Track idle state
                if self.has_pending_work(queue_sizes) or self.has_running_workers():
                    self.idle_count = 0
                    self.on_tick(queue_sizes)
                else:
                    self.idle_count += 1
                    self.on_idle()
                
                # Check if we should exit
                if self.should_exit(queue_sizes):
                    print(f'[green]👨‍✈️ {self} all work complete, exiting[/green]')
                    break
                
                time.sleep(self.POLL_INTERVAL)
        
        except KeyboardInterrupt:
            print()  # Newline after ^C
        except BaseException as e:
            self.on_shutdown(error=e)
            raise
        else:
            self.on_shutdown()
    
    def start(self) -> int:
        """
        Fork orchestrator as a background process.
        Returns the PID of the new process.
        """
        def run_orchestrator():
            from archivebox.config.django import setup_django
            setup_django()
            self.runloop()
        
        proc = Process(target=run_orchestrator, name='orchestrator')
        proc.start()
        
        assert proc.pid is not None
        print(f'[green]👨‍✈️ Orchestrator started in background[/green] pid={proc.pid}')
        return proc.pid
    
    @classmethod
    def get_or_start(cls, exit_on_idle: bool = True) -> 'Orchestrator':
        """
        Get running orchestrator or start a new one.
        Used by commands like 'add' to ensure orchestrator is running.
        """
        if cls.is_running():
            print('[grey53]👨‍✈️ Orchestrator already running[/grey53]')
            # Return a placeholder - actual orchestrator is in another process
            return cls(exit_on_idle=exit_on_idle)
        
        orchestrator = cls(exit_on_idle=exit_on_idle)
        return orchestrator