#!/usr/bin/env python3

__package__ = 'archivebox.cli'
__command__ = 'archivebox search'

from pathlib import Path
from typing import Optional, List, Any

import rich_click as click
from rich import print

from django.db.models import QuerySet

from archivebox.config import DATA_DIR
from archivebox.misc.logging import stderr
from archivebox.misc.util import enforce_types, docstring

# Filter types for URL matching
LINK_FILTERS = {
    'exact': lambda pattern: {'url': pattern},
    'substring': lambda pattern: {'url__icontains': pattern},
    'regex': lambda pattern: {'url__iregex': pattern},
    'domain': lambda pattern: {'url__istartswith': f'http://{pattern}'},
    'tag': lambda pattern: {'tags__name': pattern},
    'timestamp': lambda pattern: {'timestamp': pattern},
}

STATUS_CHOICES = [
    'indexed', 'archived', 'unarchived', 'present', 'valid', 'invalid',
    'duplicate', 'orphaned', 'corrupted', 'unrecognized'
]


def get_snapshots(snapshots: Optional[QuerySet]=None,
                  filter_patterns: Optional[List[str]]=None,
                  filter_type: str='substring',
                  after: Optional[float]=None,
                  before: Optional[float]=None,
                  out_dir: Path=DATA_DIR) -> QuerySet:
    """Filter and return Snapshots matching the given criteria."""
    from core.models import Snapshot

    if snapshots:
        result = snapshots
    else:
        result = Snapshot.objects.all()

    if after is not None:
        result = result.filter(timestamp__gte=after)
    if before is not None:
        result = result.filter(timestamp__lt=before)
    if filter_patterns:
        result = Snapshot.objects.filter_by_patterns(filter_patterns, filter_type)

    if not result:
        stderr('[!] No Snapshots matched your filters:', filter_patterns, f'({filter_type})', color='lightyellow')

    return result


def list_folders(snapshots: QuerySet, status: str, out_dir: Path=DATA_DIR) -> dict[str, Any]:

    from archivebox.misc.checks import check_data_folder
    from archivebox.misc.folders import (
        get_indexed_folders,
        get_archived_folders,
        get_unarchived_folders,
        get_present_folders,
        get_valid_folders,
        get_invalid_folders,
        get_duplicate_folders,
        get_orphaned_folders,
        get_corrupted_folders,
        get_unrecognized_folders,
    )

    check_data_folder()

    STATUS_FUNCTIONS = {
        "indexed": get_indexed_folders,
        "archived": get_archived_folders,
        "unarchived": get_unarchived_folders,
        "present": get_present_folders,
        "valid": get_valid_folders,
        "invalid": get_invalid_folders,
        "duplicate": get_duplicate_folders,
        "orphaned": get_orphaned_folders,
        "corrupted": get_corrupted_folders,
        "unrecognized": get_unrecognized_folders,
    }

    try:
        return STATUS_FUNCTIONS[status](snapshots, out_dir=out_dir)
    except KeyError:
        raise ValueError('Status not recognized.')


@enforce_types
def search(filter_patterns: list[str] | None=None,
           filter_type: str='substring',
           status: str='indexed',
           before: float | None=None,
           after: float | None=None,
           sort: str | None=None,
           json: bool=False,
           html: bool=False,
           csv: str | None=None,
           with_headers: bool=False):
    """List, filter, and export information about archive entries"""
    

    if with_headers and not (json or html or csv):
        stderr('[X] --with-headers requires --json, --html or --csv\n', color='red')
        raise SystemExit(2)

    snapshots = get_snapshots(
        filter_patterns=list(filter_patterns) if filter_patterns else None,
        filter_type=filter_type,
        before=before,
        after=after,
    )

    if sort:
        snapshots = snapshots.order_by(sort)

    folders = list_folders(
        snapshots=snapshots,
        status=status,
        out_dir=DATA_DIR,
    )

    if json:
        from core.models import Snapshot
        # Filter for non-None snapshots
        valid_snapshots = [s for s in folders.values() if s is not None]
        output = Snapshot.objects.filter(pk__in=[s.pk for s in valid_snapshots]).to_json(with_headers=with_headers)
    elif html:
        from core.models import Snapshot
        valid_snapshots = [s for s in folders.values() if s is not None]
        output = Snapshot.objects.filter(pk__in=[s.pk for s in valid_snapshots]).to_html(with_headers=with_headers)
    elif csv:
        from core.models import Snapshot
        valid_snapshots = [s for s in folders.values() if s is not None]
        output = Snapshot.objects.filter(pk__in=[s.pk for s in valid_snapshots]).to_csv(cols=csv.split(','), header=with_headers)
    else:
        from archivebox.misc.logging_util import printable_folders
        output = printable_folders(folders, with_headers)

    print(output)
    return output


@click.command()
@click.option('--filter-type', '-f', type=click.Choice(['search', *LINK_FILTERS.keys()]), default='substring', help='Pattern matching type for filtering URLs')
@click.option('--status', '-s', type=click.Choice(STATUS_CHOICES), default='indexed', help='List snapshots with the given status')
@click.option('--before', '-b', type=float, help='List snapshots bookmarked before the given UNIX timestamp')
@click.option('--after', '-a', type=float, help='List snapshots bookmarked after the given UNIX timestamp')
@click.option('--sort', '-o', type=str, help='Field to sort by, e.g. url, created_at, bookmarked_at, downloaded_at')
@click.option('--json', '-J', is_flag=True, help='Print output in JSON format')
@click.option('--html', '-M', is_flag=True, help='Print output in HTML format (suitable for viewing statically without a server)')
@click.option('--csv', '-C', type=str, help='Print output as CSV with the provided fields, e.g.: created_at,url,title')
@click.option('--with-headers', '-H', is_flag=True, help='Include extra CSV/HTML headers in the output')
@click.help_option('--help', '-h')
@click.argument('filter_patterns', nargs=-1)
@docstring(search.__doc__)
def main(**kwargs):
    return search(**kwargs)


if __name__ == '__main__':
    main()