Add pluginmap management command

Adds a new CLI command `archivebox pluginmap` that displays:
- ASCII art diagrams of all core state machines (Crawl, Snapshot,
  ArchiveResult, Binary)
- Lists all auto-detected on_Modelname_xyz hooks grouped by model/event
- Shows hook execution order (step 0-9), plugin name, and background status

Usage:
  archivebox pluginmap              # Show all diagrams and hooks
  archivebox pluginmap -m Snapshot  # Filter to specific model
  archivebox pluginmap -a           # Include disabled plugins
  archivebox pluginmap -q           # Output JSON only
This commit is contained in:
Claude
2025-12-31 10:19:58 +00:00
parent 28a4f99f55
commit 672ccf918d
2 changed files with 358 additions and 0 deletions

View File

@@ -48,6 +48,8 @@ class ArchiveBoxGroup(click.Group):
'server': 'archivebox.cli.archivebox_server.main',
'shell': 'archivebox.cli.archivebox_shell.main',
'manage': 'archivebox.cli.archivebox_manage.main',
# Introspection commands
'pluginmap': 'archivebox.cli.archivebox_pluginmap.main',
# Worker command
'worker': 'archivebox.cli.archivebox_worker.main',
}

View File

@@ -0,0 +1,356 @@
#!/usr/bin/env python3
__package__ = 'archivebox.cli'
from typing import Optional
from pathlib import Path
import rich_click as click
from archivebox.misc.util import docstring, enforce_types
# State Machine ASCII Art Diagrams
CRAWL_MACHINE_DIAGRAM = """
┌─────────────────────────────────────────────────────────────────────────────┐
│ CrawlMachine │
├─────────────────────────────────────────────────────────────────────────────┤
│ │
│ ┌─────────────┐ │
│ │ QUEUED │◄────────────────┐ │
│ │ (initial) │ │ │
│ └──────┬──────┘ │ │
│ │ │ tick() unless can_start() │
│ │ tick() when │ │
│ │ can_start() │ │
│ ▼ │ │
│ ┌─────────────┐ │ │
│ │ STARTED │─────────────────┘ │
│ │ │◄────────────────┐ │
│ │ enter: │ │ │
│ │ crawl.run()│ │ tick() unless is_finished() │
│ │ (discover │ │ │
│ │ Crawl │─────────────────┘ │
│ │ hooks) │ │
│ └──────┬──────┘ │
│ │ │
│ │ tick() when is_finished() │
│ ▼ │
│ ┌─────────────┐ │
│ │ SEALED │ │
│ │ (final) │ │
│ │ │ │
│ │ enter: │ │
│ │ cleanup() │ │
│ └─────────────┘ │
│ │
│ Hooks triggered: on_Crawl__* (during STARTED.enter via crawl.run()) │
│ on_CrawlEnd__* (during SEALED.enter via cleanup()) │
└─────────────────────────────────────────────────────────────────────────────┘
"""
SNAPSHOT_MACHINE_DIAGRAM = """
┌─────────────────────────────────────────────────────────────────────────────┐
│ SnapshotMachine │
├─────────────────────────────────────────────────────────────────────────────┤
│ │
│ ┌─────────────┐ │
│ │ QUEUED │◄────────────────┐ │
│ │ (initial) │ │ │
│ └──────┬──────┘ │ │
│ │ │ tick() unless can_start() │
│ │ tick() when │ │
│ │ can_start() │ │
│ ▼ │ │
│ ┌─────────────┐ │ │
│ │ STARTED │─────────────────┘ │
│ │ │◄────────────────┐ │
│ │ enter: │ │ │
│ │ snapshot │ │ tick() unless is_finished() │
│ │ .run() │ │ │
│ │ (discover │─────────────────┘ │
│ │ Snapshot │ │
│ │ hooks, │ │
│ │ create │ │
│ │ pending │ │
│ │ results) │ │
│ └──────┬──────┘ │
│ │ │
│ │ tick() when is_finished() │
│ ▼ │
│ ┌─────────────┐ │
│ │ SEALED │ │
│ │ (final) │ │
│ │ │ │
│ │ enter: │ │
│ │ cleanup() │ │
│ └─────────────┘ │
│ │
│ Hooks triggered: on_Snapshot__* (creates ArchiveResults in STARTED.enter) │
└─────────────────────────────────────────────────────────────────────────────┘
"""
ARCHIVERESULT_MACHINE_DIAGRAM = """
┌─────────────────────────────────────────────────────────────────────────────┐
│ ArchiveResultMachine │
├─────────────────────────────────────────────────────────────────────────────┤
│ │
│ ┌─────────────┐ │
│ │ QUEUED │◄────────────────┐ │
│ │ (initial) │ │ │
│ └──────┬──────┘ │ │
│ │ │ tick() unless can_start() │
│ │ tick() when │ │
│ │ can_start() │ │
│ ▼ │ │
│ ┌─────────────┐ │ │
│ │ STARTED │─────────────────┘ │
│ │ │◄────────────────┐ │
│ │ enter: │ │ tick() unless is_finished() │
│ │ result.run()│─────────────────┘ │
│ │ (execute │ │
│ │ hook via │ │
│ │ run_hook())│ │
│ └──────┬──────┘ │
│ │ │
│ │ tick() checks status set by hook output │
│ ├────────────────┬────────────────┬────────────────┐ │
│ ▼ ▼ ▼ ▼ │
│ ┌───────────┐ ┌───────────┐ ┌───────────┐ ┌───────────┐ │
│ │ SUCCEEDED │ │ FAILED │ │ SKIPPED │ │ BACKOFF │ │
│ │ (final) │ │ (final) │ │ (final) │ │ │ │
│ └───────────┘ └───────────┘ └───────────┘ └─────┬─────┘ │
│ │ │
│ can_start()───┘ │
│ loops back to STARTED │
│ │
│ Each ArchiveResult runs ONE specific hook (stored in .hook_name field) │
└─────────────────────────────────────────────────────────────────────────────┘
"""
BINARY_MACHINE_DIAGRAM = """
┌─────────────────────────────────────────────────────────────────────────────┐
│ BinaryMachine │
├─────────────────────────────────────────────────────────────────────────────┤
│ │
│ ┌─────────────┐ │
│ │ QUEUED │◄────────────────┐ │
│ │ (initial) │ │ │
│ └──────┬──────┘ │ │
│ │ │ tick() unless can_start() │
│ │ tick() when │ │
│ │ can_start() │ │
│ ▼ │ │
│ ┌─────────────┐ │ │
│ │ STARTED │─────────────────┘ │
│ │ │◄────────────────┐ │
│ │ enter: │ │ │
│ │ binary.run()│ │ tick() unless is_finished() │
│ │ (discover │─────────────────┘ │
│ │ Binary │ │
│ │ hooks, │ │
│ │ try each │ │
│ │ provider) │ │
│ └──────┬──────┘ │
│ │ │
│ │ tick() checks status set by hook output │
│ ├────────────────────────────────┐ │
│ ▼ ▼ │
│ ┌─────────────┐ ┌─────────────┐ │
│ │ SUCCEEDED │ │ FAILED │ │
│ │ (final) │ │ (final) │ │
│ │ │ │ │ │
│ │ abspath, │ │ no provider │ │
│ │ version set │ │ succeeded │ │
│ └─────────────┘ └─────────────┘ │
│ │
│ Hooks triggered: on_Binary__* (provider hooks during STARTED.enter) │
│ Providers tried in sequence until one succeeds: apt, brew, pip, npm, etc. │
└─────────────────────────────────────────────────────────────────────────────┘
"""
@enforce_types
def pluginmap(
show_disabled: bool = False,
model: Optional[str] = None,
quiet: bool = False,
) -> dict:
"""
Show a map of all state machines and their associated plugin hooks.
Displays ASCII art diagrams of the core model state machines (Crawl, Snapshot,
ArchiveResult, Binary) and lists all auto-detected on_Modelname_xyz hooks
that will run for each model's transitions.
"""
from rich.console import Console
from rich.table import Table
from rich.panel import Panel
from rich import box
from archivebox.hooks import (
discover_hooks,
extract_step,
is_background_hook,
BUILTIN_PLUGINS_DIR,
USER_PLUGINS_DIR,
)
console = Console()
prnt = console.print
# Model event types that can have hooks
model_events = {
'Crawl': {
'description': 'Hooks run when a Crawl starts (QUEUED→STARTED)',
'machine': 'CrawlMachine',
'diagram': CRAWL_MACHINE_DIAGRAM,
},
'CrawlEnd': {
'description': 'Hooks run when a Crawl finishes (STARTED→SEALED)',
'machine': 'CrawlMachine',
'diagram': None, # Part of CrawlMachine
},
'Snapshot': {
'description': 'Hooks run for each Snapshot (creates ArchiveResults)',
'machine': 'SnapshotMachine',
'diagram': SNAPSHOT_MACHINE_DIAGRAM,
},
'Binary': {
'description': 'Hooks for installing binary dependencies (providers)',
'machine': 'BinaryMachine',
'diagram': BINARY_MACHINE_DIAGRAM,
},
}
# Filter to specific model if requested
if model:
model = model.title()
if model not in model_events:
prnt(f'[red]Error: Unknown model "{model}". Available: {", ".join(model_events.keys())}[/red]')
return {}
model_events = {model: model_events[model]}
result = {
'models': {},
'plugins_dir': str(BUILTIN_PLUGINS_DIR),
'user_plugins_dir': str(USER_PLUGINS_DIR),
}
if not quiet:
prnt()
prnt('[bold cyan]ArchiveBox Plugin Map[/bold cyan]')
prnt(f'[dim]Built-in plugins: {BUILTIN_PLUGINS_DIR}[/dim]')
prnt(f'[dim]User plugins: {USER_PLUGINS_DIR}[/dim]')
prnt()
# Show diagrams first (unless quiet mode)
if not quiet:
# Show ArchiveResult diagram separately since it's different
prnt(Panel(
ARCHIVERESULT_MACHINE_DIAGRAM,
title='[bold green]ArchiveResultMachine[/bold green]',
border_style='green',
expand=False,
))
prnt()
for event_name, info in model_events.items():
# Discover hooks for this event
hooks = discover_hooks(event_name, filter_disabled=not show_disabled)
# Build hook info list
hook_infos = []
for hook_path in hooks:
# Get plugin name from parent directory (e.g., 'wget' from 'plugins/wget/on_Snapshot__61_wget.py')
plugin_name = hook_path.parent.name
step = extract_step(hook_path.name)
is_bg = is_background_hook(hook_path.name)
hook_infos.append({
'path': str(hook_path),
'name': hook_path.name,
'plugin': plugin_name,
'step': step,
'is_background': is_bg,
'extension': hook_path.suffix,
})
result['models'][event_name] = {
'description': info['description'],
'machine': info['machine'],
'hooks': hook_infos,
'hook_count': len(hook_infos),
}
if not quiet:
# Show diagram if this model has one
if info.get('diagram'):
prnt(Panel(
info['diagram'],
title=f'[bold green]{info["machine"]}[/bold green]',
border_style='green',
expand=False,
))
prnt()
# Create hooks table
table = Table(
title=f'[bold yellow]on_{event_name}__* Hooks[/bold yellow] ({len(hooks)} found)',
box=box.ROUNDED,
show_header=True,
header_style='bold magenta',
)
table.add_column('Step', justify='center', width=6)
table.add_column('Plugin', style='cyan', width=20)
table.add_column('Hook Name', style='green')
table.add_column('BG', justify='center', width=4)
table.add_column('Type', justify='center', width=5)
# Sort by step then by name
sorted_hooks = sorted(hook_infos, key=lambda h: (h['step'], h['name']))
for hook in sorted_hooks:
bg_marker = '[yellow]bg[/yellow]' if hook['is_background'] else ''
ext = hook['extension'].lstrip('.')
table.add_row(
str(hook['step']),
hook['plugin'],
hook['name'],
bg_marker,
ext,
)
prnt(table)
prnt()
prnt(f'[dim]{info["description"]}[/dim]')
prnt()
# Summary
if not quiet:
total_hooks = sum(m['hook_count'] for m in result['models'].values())
prnt(f'[bold]Total hooks discovered: {total_hooks}[/bold]')
prnt()
prnt('[dim]Hook naming convention: on_{Model}__{XX}_{description}[.bg].{ext}[/dim]')
prnt('[dim] - XX: Two-digit order (first digit = step 0-9)[/dim]')
prnt('[dim] - .bg: Background hook (non-blocking)[/dim]')
prnt('[dim] - ext: py, sh, or js[/dim]')
prnt()
return result
@click.command()
@click.option('--show-disabled', '-a', is_flag=True, help='Show hooks from disabled plugins too')
@click.option('--model', '-m', type=str, default=None, help='Filter to specific model (Crawl, Snapshot, Binary, CrawlEnd)')
@click.option('--quiet', '-q', is_flag=True, help='Output JSON only, no ASCII diagrams')
@docstring(pluginmap.__doc__)
def main(**kwargs):
import json
result = pluginmap(**kwargs)
if kwargs.get('quiet'):
print(json.dumps(result, indent=2))
if __name__ == '__main__':
main()