__package__ = 'archivebox.crawls'

import os
from typing import ClassVar
from datetime import timedelta
from django.utils import timezone

from rich import print

from statemachine import State, StateMachine

# from workers.actor import ActorType
from crawls.models import Crawl


class CrawlMachine(StateMachine, strict_states=True):
    """State machine for managing Crawl lifecycle."""
    
    model: Crawl
    
    # States
    queued = State(value=Crawl.StatusChoices.QUEUED, initial=True)
    started = State(value=Crawl.StatusChoices.STARTED)
    sealed = State(value=Crawl.StatusChoices.SEALED, final=True)
    
    # Tick Event
    tick = (
        queued.to.itself(unless='can_start') |
        queued.to(started, cond='can_start') |
        started.to.itself(unless='is_finished') |
        started.to(sealed, cond='is_finished')
    )
    
    def __init__(self, crawl, *args, **kwargs):
        self.crawl = crawl
        super().__init__(crawl, *args, **kwargs)
    
    def __repr__(self) -> str:
        return f'Crawl[{self.crawl.id}]'

    def __str__(self) -> str:
        return self.__repr__()
        
    def can_start(self) -> bool:
        if not self.crawl.seed:
            print(f'[red]⚠️ Crawl {self.crawl.id} cannot start: no seed[/red]')
            return False
        if not self.crawl.seed.uri:
            print(f'[red]⚠️ Crawl {self.crawl.id} cannot start: seed has no URI[/red]')
            return False
        return True
        
    def is_finished(self) -> bool:
        from core.models import Snapshot, ArchiveResult
        
        # check that at least one snapshot exists for this crawl
        snapshots = Snapshot.objects.filter(crawl=self.crawl)
        if not snapshots.exists():
            return False
        
        # check to make sure no snapshots are in non-final states
        if snapshots.filter(status__in=[Snapshot.StatusChoices.QUEUED, Snapshot.StatusChoices.STARTED]).exists():
            return False
        
        # check that some archiveresults exist for this crawl
        results = ArchiveResult.objects.filter(snapshot__crawl=self.crawl)
        if not results.exists():
            return False
        
        # check if all archiveresults are finished
        if results.filter(status__in=[Crawl.StatusChoices.QUEUED, Crawl.StatusChoices.STARTED]).exists():
            return False
        
        return True
        
    # def before_transition(self, event, state):
    #     print(f"Before '{event}', on the '{state.id}' state.")
    #     return "before_transition_return"

    @started.enter
    def enter_started(self):
        # Suppressed: state transition logs
        # lock the crawl object while we create snapshots
        self.crawl.update_for_workers(
            retry_at=timezone.now() + timedelta(seconds=5),
            status=Crawl.StatusChoices.QUEUED,
        )

        try:
            # Run on_Crawl hooks to validate/install dependencies
            self._run_crawl_hooks()

            # Run the crawl - creates root snapshot and processes queued URLs
            self.crawl.run()

            # only update status to STARTED once snapshots are created
            self.crawl.update_for_workers(
                retry_at=timezone.now() + timedelta(seconds=5),
                status=Crawl.StatusChoices.STARTED,
            )
        except Exception as e:
            print(f'[red]⚠️ Crawl {self.crawl.id} failed to start: {e}[/red]')
            import traceback
            traceback.print_exc()
            # Re-raise so the worker knows it failed
            raise

    def _run_crawl_hooks(self):
        """Run on_Crawl hooks to validate/install dependencies."""
        from pathlib import Path
        from archivebox.hooks import run_hooks, discover_hooks
        from archivebox.config import CONSTANTS

        # Discover and run all on_Crawl hooks
        hooks = discover_hooks('Crawl')
        if not hooks:
            return

        # Create a temporary output directory for hook results
        output_dir = Path(CONSTANTS.DATA_DIR) / 'tmp' / f'crawl_{self.crawl.id}'
        output_dir.mkdir(parents=True, exist_ok=True)

        # Run all on_Crawl hooks
        results = run_hooks(
            event_name='Crawl',
            output_dir=output_dir,
            timeout=60,
            config_objects=[self.crawl, self.crawl.seed] if self.crawl.seed else [self.crawl],
            crawl_id=str(self.crawl.id),
            seed_uri=self.crawl.seed.uri if self.crawl.seed else '',
        )

        # Process hook results - parse JSONL output and create DB objects
        self._process_hook_results(results)

    def _process_hook_results(self, results: list):
        """Process JSONL output from hooks to create InstalledBinary and update Machine config."""
        import json
        from machine.models import Machine, InstalledBinary

        machine = Machine.current()

        for result in results:
            if result['returncode'] != 0:
                # Hook failed - might indicate missing dependency
                continue

            # Parse JSONL output
            for line in result['stdout'].strip().split('\n'):
                if not line.strip():
                    continue

                try:
                    obj = json.loads(line)
                    obj_type = obj.get('type')

                    if obj_type == 'InstalledBinary':
                        # Create or update InstalledBinary record
                        # Skip if essential fields are missing
                        if not obj.get('name') or not obj.get('abspath') or not obj.get('version'):
                            continue

                        InstalledBinary.objects.update_or_create(
                            machine=machine,
                            name=obj['name'],
                            defaults={
                                'abspath': obj['abspath'],
                                'version': obj['version'],
                                'sha256': obj.get('sha256') or '',
                                'binprovider': obj.get('binprovider') or 'env',
                            }
                        )

                    elif obj_type == 'Machine':
                        # Update Machine config
                        method = obj.get('_method', 'update')
                        if method == 'update':
                            key = obj.get('key', '')
                            value = obj.get('value')
                            if key.startswith('config/'):
                                config_key = key[7:]  # Remove 'config/' prefix
                                machine.config[config_key] = value
                                machine.save(update_fields=['config'])

                    elif obj_type == 'Dependency':
                        # Dependency request - could trigger installation
                        # For now just log it (installation hooks would be separate)
                        print(f'[yellow]Dependency requested: {obj.get("bin_name")}[/yellow]')

                except json.JSONDecodeError:
                    # Not JSON, skip
                    continue

    @sealed.enter
    def enter_sealed(self):
        # Suppressed: state transition logs
        self.crawl.update_for_workers(
            retry_at=None,
            status=Crawl.StatusChoices.SEALED,
        )