ArchiveBox/archivebox/config/views.py

__package__ = 'archivebox.config'

import os
import shutil
import inspect
from pathlib import Path
from typing import Any, List, Dict, cast
from benedict import benedict

from django.http import HttpRequest
from django.utils import timezone
from django.utils.html import format_html, mark_safe

from admin_data_views.typing import TableContext, ItemContext
from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink

from archivebox.config import CONSTANTS
from archivebox.misc.util import parse_date

from machine.models import InstalledBinary


# Common binaries to check for
KNOWN_BINARIES = [
    'wget', 'curl', 'chromium', 'chrome', 'google-chrome', 'google-chrome-stable',
    'node', 'npm', 'npx', 'yt-dlp', 'ytdlp', 'youtube-dl',
    'git', 'singlefile', 'readability-extractor', 'mercury-parser',
    'python3', 'python', 'bash', 'zsh',
    'ffmpeg', 'ripgrep', 'rg', 'sonic', 'archivebox',
]


def obj_to_yaml(obj: Any, indent: int=0) -> str:
    indent_str = "  " * indent
    if indent == 0:
        indent_str = '\n'  # put extra newline between top-level entries

    if isinstance(obj, dict):
        if not obj:
            return "{}"
        result = "\n"
        for key, value in obj.items():
            result += f"{indent_str}{key}:{obj_to_yaml(value, indent + 1)}\n"
        return result

    elif isinstance(obj, list):
        if not obj:
            return "[]"
        result = "\n"
        for item in obj:
            result += f"{indent_str}- {obj_to_yaml(item, indent + 1).lstrip()}\n"
        return result.rstrip()

    elif isinstance(obj, str):
        if "\n" in obj:
            return f" |\n{indent_str}  " + obj.replace("\n", f"\n{indent_str}  ")
        else:
            return f" {obj}"

    elif isinstance(obj, (int, float, bool)):
        return f" {str(obj)}"

    elif callable(obj):
        source = '\n'.join(
            '' if 'def ' in line else line
            for line in inspect.getsource(obj).split('\n')
            if line.strip()
        ).split('lambda: ')[-1].rstrip(',')
        return f" {indent_str}  " + source.replace("\n", f"\n{indent_str}  ")

    else:
        return f" {str(obj)}"


def get_detected_binaries() -> Dict[str, Dict[str, Any]]:
    """Detect available binaries using shutil.which."""
    binaries = {}

    for name in KNOWN_BINARIES:
        path = shutil.which(name)
        if path:
            binaries[name] = {
                'name': name,
                'abspath': path,
                'version': None,  # Could add version detection later
                'is_available': True,
            }

    return binaries


def get_filesystem_plugins() -> Dict[str, Dict[str, Any]]:
    """Discover plugins from filesystem directories."""
    from archivebox.hooks import BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR

    plugins = {}

    for base_dir, source in [(BUILTIN_PLUGINS_DIR, 'builtin'), (USER_PLUGINS_DIR, 'user')]:
        if not base_dir.exists():
            continue

        for plugin_dir in base_dir.iterdir():
            if plugin_dir.is_dir() and not plugin_dir.name.startswith('_'):
                plugin_id = f'{source}.{plugin_dir.name}'

                # Find hook scripts
                hooks = []
                for ext in ('sh', 'py', 'js'):
                    hooks.extend(plugin_dir.glob(f'on_*__*.{ext}'))

                plugins[plugin_id] = {
                    'id': plugin_id,
                    'name': plugin_dir.name,
                    'path': str(plugin_dir),
                    'source': source,
                    'hooks': [str(h.name) for h in hooks],
                }

    return plugins


@render_with_table_view
def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
    assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'

    rows = {
        "Binary Name": [],
        "Found Version": [],
        "Provided By": [],
        "Found Abspath": [],
    }

    # Get binaries from database (previously detected/installed)
    db_binaries = {b.name: b for b in InstalledBinary.objects.all()}

    # Get currently detectable binaries
    detected = get_detected_binaries()

    # Merge and display
    all_binary_names = sorted(set(list(db_binaries.keys()) + list(detected.keys())))

    for name in all_binary_names:
        db_binary = db_binaries.get(name)
        detected_binary = detected.get(name)

        rows['Binary Name'].append(ItemLink(name, key=name))

        if db_binary:
            rows['Found Version'].append(f'✅ {db_binary.version}' if db_binary.version else '✅ found')
            rows['Provided By'].append(db_binary.binprovider or 'PATH')
            rows['Found Abspath'].append(str(db_binary.abspath or ''))
        elif detected_binary:
            rows['Found Version'].append('✅ found')
            rows['Provided By'].append('PATH')
            rows['Found Abspath'].append(detected_binary['abspath'])
        else:
            rows['Found Version'].append('❌ missing')
            rows['Provided By'].append('-')
            rows['Found Abspath'].append('-')

    return TableContext(
        title="Binaries",
        table=rows,
    )

@render_with_item_view
def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:

    assert request.user and request.user.is_superuser, 'Must be a superuser to view configuration settings.'

    # Try database first
    try:
        binary = InstalledBinary.objects.get(name=key)
        return ItemContext(
            slug=key,
            title=key,
            data=[
                {
                    "name": binary.name,
                    "description": str(binary.abspath or ''),
                    "fields": {
                        'name': binary.name,
                        'binprovider': binary.binprovider,
                        'abspath': str(binary.abspath),
                        'version': binary.version,
                        'sha256': binary.sha256,
                    },
                    "help_texts": {},
                },
            ],
        )
    except InstalledBinary.DoesNotExist:
        pass

    # Try to detect from PATH
    path = shutil.which(key)
    if path:
        return ItemContext(
            slug=key,
            title=key,
            data=[
                {
                    "name": key,
                    "description": path,
                    "fields": {
                        'name': key,
                        'binprovider': 'PATH',
                        'abspath': path,
                        'version': 'unknown',
                    },
                    "help_texts": {},
                },
            ],
        )

    return ItemContext(
        slug=key,
        title=key,
        data=[
            {
                "name": key,
                "description": "Binary not found",
                "fields": {
                    'name': key,
                    'binprovider': 'not installed',
                    'abspath': 'not found',
                    'version': 'N/A',
                },
                "help_texts": {},
            },
        ],
    )


@render_with_table_view
def plugins_list_view(request: HttpRequest, **kwargs) -> TableContext:

    assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'

    rows = {
        "Name": [],
        "Source": [],
        "Path": [],
        "Hooks": [],
    }

    plugins = get_filesystem_plugins()

    for plugin_id, plugin in plugins.items():
        rows['Name'].append(ItemLink(plugin['name'], key=plugin_id))
        rows['Source'].append(plugin['source'])
        rows['Path'].append(format_html('<code>{}</code>', plugin['path']))
        rows['Hooks'].append(', '.join(plugin['hooks']) or '(none)')

    if not plugins:
        # Show a helpful message when no plugins found
        rows['Name'].append('(no plugins found)')
        rows['Source'].append('-')
        rows['Path'].append(mark_safe('<code>archivebox/plugins/</code> or <code>data/plugins/</code>'))
        rows['Hooks'].append('-')

    return TableContext(
        title="Installed plugins",
        table=rows,
    )

@render_with_item_view
def plugin_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:

    assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'

    plugins = get_filesystem_plugins()

    plugin = plugins.get(key)
    if not plugin:
        return ItemContext(
            slug=key,
            title=f'Plugin not found: {key}',
            data=[],
        )

    return ItemContext(
        slug=key,
        title=plugin['name'],
        data=[
            {
                "name": plugin['name'],
                "description": plugin['path'],
                "fields": {
                    "id": plugin['id'],
                    "name": plugin['name'],
                    "source": plugin['source'],
                    "path": plugin['path'],
                    "hooks": plugin['hooks'],
                },
                "help_texts": {},
            },
        ],
    )


@render_with_table_view
def worker_list_view(request: HttpRequest, **kwargs) -> TableContext:
    assert request.user.is_superuser, "Must be a superuser to view configuration settings."

    rows = {
        "Name": [],
        "State": [],
        "PID": [],
        "Started": [],
        "Command": [],
        "Logfile": [],
        "Exit Status": [],
    }

    from workers.supervisord_util import get_existing_supervisord_process

    supervisor = get_existing_supervisord_process()
    if supervisor is None:
        return TableContext(
            title="No running worker processes",
            table=rows,
        )

    all_config_entries = cast(List[Dict[str, Any]], supervisor.getAllConfigInfo() or [])
    all_config = {config["name"]: benedict(config) for config in all_config_entries}

    # Add top row for supervisord process manager
    rows["Name"].append(ItemLink('supervisord', key='supervisord'))
    rows["State"].append(supervisor.getState()['statename'])
    rows['PID'].append(str(supervisor.getPID()))
    rows["Started"].append('-')
    rows["Command"].append('supervisord --configuration=tmp/supervisord.conf')
    rows["Logfile"].append(
        format_html(
            '<a href="/admin/environment/logs/{}/">{}</a>',
            'supervisord',
            'logs/supervisord.log',
        )
    )
    rows['Exit Status'].append('0')

    # Add a row for each worker process managed by supervisord
    for proc in cast(List[Dict[str, Any]], supervisor.getAllProcessInfo()):
        proc = benedict(proc)
        rows["Name"].append(ItemLink(proc.name, key=proc.name))
        rows["State"].append(proc.statename)
        rows['PID'].append(proc.description.replace('pid ', ''))
        rows["Started"].append(parse_date(proc.start).strftime("%Y-%m-%d %H:%M:%S") if proc.start else '')
        rows["Command"].append(all_config[proc.name].command)
        rows["Logfile"].append(
            format_html(
                '<a href="/admin/environment/logs/{}/">{}</a>',
                proc.stdout_logfile.split("/")[-1].split('.')[0],
                proc.stdout_logfile,
            )
        )
        rows["Exit Status"].append(str(proc.exitstatus))

    return TableContext(
        title="Running worker processes",
        table=rows,
    )


@render_with_item_view
def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
    assert request.user.is_superuser, "Must be a superuser to view configuration settings."

    from workers.supervisord_util import get_existing_supervisord_process, get_worker, get_sock_file, CONFIG_FILE_NAME

    SOCK_FILE = get_sock_file()
    CONFIG_FILE = SOCK_FILE.parent / CONFIG_FILE_NAME

    supervisor = get_existing_supervisord_process()
    if supervisor is None:
        return ItemContext(
            slug='none',
            title='error: No running supervisord process.',
            data=[],
        )

    all_config = cast(List[Dict[str, Any]], supervisor.getAllConfigInfo() or [])

    if key == 'supervisord':
        relevant_config = CONFIG_FILE.read_text()
        relevant_logs = cast(str, supervisor.readLog(0, 10_000_000))
        start_ts = [line for line in relevant_logs.split("\n") if "RPC interface 'supervisor' initialized" in line][-1].split(",", 1)[0]
        uptime = str(timezone.now() - parse_date(start_ts)).split(".")[0]

        proc = benedict(
            {
                "name": "supervisord",
                "pid": supervisor.getPID(),
                "statename": supervisor.getState()["statename"],
                "start": start_ts,
                "stop": None,
                "exitstatus": "",
                "stdout_logfile": "logs/supervisord.log",
                "description": f'pid 000, uptime {uptime}',
            }
        )
    else:
        proc = benedict(get_worker(supervisor, key) or {})
        relevant_config = [config for config in all_config if config['name'] == key][0]
        relevant_logs = supervisor.tailProcessStdoutLog(key, 0, 10_000_000)[0]

    return ItemContext(
        slug=key,
        title=key,
        data=[
            {
                "name": key,
                "description": key,
                "fields": {
                    "Command": proc.name,
                    "PID": proc.pid,
                    "State": proc.statename,
                    "Started": parse_date(proc.start).strftime("%Y-%m-%d %H:%M:%S") if proc.start else "",
                    "Stopped": parse_date(proc.stop).strftime("%Y-%m-%d %H:%M:%S") if proc.stop else "",
                    "Exit Status": str(proc.exitstatus),
                    "Logfile": proc.stdout_logfile,
                    "Uptime": (proc.description or "").split("uptime ", 1)[-1],
                    "Config": relevant_config,
                    "Logs": relevant_logs,
                },
                "help_texts": {"Uptime": "How long the process has been running ([days:]hours:minutes:seconds)"},
            },
        ],
    )


@render_with_table_view
def log_list_view(request: HttpRequest, **kwargs) -> TableContext:
    assert request.user.is_superuser, "Must be a superuser to view configuration settings."


    log_files = CONSTANTS.LOGS_DIR.glob("*.log")
    log_files = sorted(log_files, key=os.path.getmtime)[::-1]

    rows = {
        "Name": [],
        "Last Updated": [],
        "Size": [],
        "Most Recent Lines": [],
    }

    # Add a row for each worker process managed by supervisord
    for logfile in log_files:
        st = logfile.stat()
        rows["Name"].append(ItemLink("logs" + str(logfile).rsplit("/logs", 1)[-1], key=logfile.name))
        rows["Last Updated"].append(parse_date(st.st_mtime).strftime("%Y-%m-%d %H:%M:%S"))
        rows["Size"].append(f'{st.st_size//1000} kb')

        with open(logfile, 'rb') as f:
            try:
                f.seek(-1024, os.SEEK_END)
            except OSError:
                f.seek(0)
            last_lines = f.read().decode('utf-8', errors='replace').split("\n")
            non_empty_lines = [line for line in last_lines if line.strip()]
            rows["Most Recent Lines"].append(non_empty_lines[-1])

    return TableContext(
        title="Debug Log files",
        table=rows,
    )


@render_with_item_view
def log_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
    assert request.user.is_superuser, "Must be a superuser to view configuration settings."

    log_file = [logfile for logfile in CONSTANTS.LOGS_DIR.glob('*.log') if key in logfile.name][0]

    log_text = log_file.read_text()
    log_stat = log_file.stat()

    return ItemContext(
        slug=key,
        title=key,
        data=[
            {
                "name": key,
                "description": key,
                "fields": {
                    "Path": str(log_file),
                    "Size": f"{log_stat.st_size//1000} kb",
                    "Last Updated": parse_date(log_stat.st_mtime).strftime("%Y-%m-%d %H:%M:%S"),
                    "Tail": "\n".join(log_text[-10_000:].split("\n")[-20:]),
                    "Full Log": log_text,
                },
            },
        ],
    )