mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-01-03 09:25:42 +10:00
Add pluginmap management command
Adds a new CLI command `archivebox pluginmap` that displays: - ASCII art diagrams of all core state machines (Crawl, Snapshot, ArchiveResult, Binary) - Lists all auto-detected on_Modelname_xyz hooks grouped by model/event - Shows hook execution order (step 0-9), plugin name, and background status Usage: archivebox pluginmap # Show all diagrams and hooks archivebox pluginmap -m Snapshot # Filter to specific model archivebox pluginmap -a # Include disabled plugins archivebox pluginmap -q # Output JSON only
This commit is contained in:
@@ -48,6 +48,8 @@ class ArchiveBoxGroup(click.Group):
|
||||
'server': 'archivebox.cli.archivebox_server.main',
|
||||
'shell': 'archivebox.cli.archivebox_shell.main',
|
||||
'manage': 'archivebox.cli.archivebox_manage.main',
|
||||
# Introspection commands
|
||||
'pluginmap': 'archivebox.cli.archivebox_pluginmap.main',
|
||||
# Worker command
|
||||
'worker': 'archivebox.cli.archivebox_worker.main',
|
||||
}
|
||||
|
||||
356
archivebox/cli/archivebox_pluginmap.py
Normal file
356
archivebox/cli/archivebox_pluginmap.py
Normal file
@@ -0,0 +1,356 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
__package__ = 'archivebox.cli'
|
||||
|
||||
from typing import Optional
|
||||
from pathlib import Path
|
||||
|
||||
import rich_click as click
|
||||
|
||||
from archivebox.misc.util import docstring, enforce_types
|
||||
|
||||
|
||||
# State Machine ASCII Art Diagrams
|
||||
CRAWL_MACHINE_DIAGRAM = """
|
||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ CrawlMachine │
|
||||
├─────────────────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ ┌─────────────┐ │
|
||||
│ │ QUEUED │◄────────────────┐ │
|
||||
│ │ (initial) │ │ │
|
||||
│ └──────┬──────┘ │ │
|
||||
│ │ │ tick() unless can_start() │
|
||||
│ │ tick() when │ │
|
||||
│ │ can_start() │ │
|
||||
│ ▼ │ │
|
||||
│ ┌─────────────┐ │ │
|
||||
│ │ STARTED │─────────────────┘ │
|
||||
│ │ │◄────────────────┐ │
|
||||
│ │ enter: │ │ │
|
||||
│ │ crawl.run()│ │ tick() unless is_finished() │
|
||||
│ │ (discover │ │ │
|
||||
│ │ Crawl │─────────────────┘ │
|
||||
│ │ hooks) │ │
|
||||
│ └──────┬──────┘ │
|
||||
│ │ │
|
||||
│ │ tick() when is_finished() │
|
||||
│ ▼ │
|
||||
│ ┌─────────────┐ │
|
||||
│ │ SEALED │ │
|
||||
│ │ (final) │ │
|
||||
│ │ │ │
|
||||
│ │ enter: │ │
|
||||
│ │ cleanup() │ │
|
||||
│ └─────────────┘ │
|
||||
│ │
|
||||
│ Hooks triggered: on_Crawl__* (during STARTED.enter via crawl.run()) │
|
||||
│ on_CrawlEnd__* (during SEALED.enter via cleanup()) │
|
||||
└─────────────────────────────────────────────────────────────────────────────┘
|
||||
"""
|
||||
|
||||
SNAPSHOT_MACHINE_DIAGRAM = """
|
||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ SnapshotMachine │
|
||||
├─────────────────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ ┌─────────────┐ │
|
||||
│ │ QUEUED │◄────────────────┐ │
|
||||
│ │ (initial) │ │ │
|
||||
│ └──────┬──────┘ │ │
|
||||
│ │ │ tick() unless can_start() │
|
||||
│ │ tick() when │ │
|
||||
│ │ can_start() │ │
|
||||
│ ▼ │ │
|
||||
│ ┌─────────────┐ │ │
|
||||
│ │ STARTED │─────────────────┘ │
|
||||
│ │ │◄────────────────┐ │
|
||||
│ │ enter: │ │ │
|
||||
│ │ snapshot │ │ tick() unless is_finished() │
|
||||
│ │ .run() │ │ │
|
||||
│ │ (discover │─────────────────┘ │
|
||||
│ │ Snapshot │ │
|
||||
│ │ hooks, │ │
|
||||
│ │ create │ │
|
||||
│ │ pending │ │
|
||||
│ │ results) │ │
|
||||
│ └──────┬──────┘ │
|
||||
│ │ │
|
||||
│ │ tick() when is_finished() │
|
||||
│ ▼ │
|
||||
│ ┌─────────────┐ │
|
||||
│ │ SEALED │ │
|
||||
│ │ (final) │ │
|
||||
│ │ │ │
|
||||
│ │ enter: │ │
|
||||
│ │ cleanup() │ │
|
||||
│ └─────────────┘ │
|
||||
│ │
|
||||
│ Hooks triggered: on_Snapshot__* (creates ArchiveResults in STARTED.enter) │
|
||||
└─────────────────────────────────────────────────────────────────────────────┘
|
||||
"""
|
||||
|
||||
ARCHIVERESULT_MACHINE_DIAGRAM = """
|
||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ ArchiveResultMachine │
|
||||
├─────────────────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ ┌─────────────┐ │
|
||||
│ │ QUEUED │◄────────────────┐ │
|
||||
│ │ (initial) │ │ │
|
||||
│ └──────┬──────┘ │ │
|
||||
│ │ │ tick() unless can_start() │
|
||||
│ │ tick() when │ │
|
||||
│ │ can_start() │ │
|
||||
│ ▼ │ │
|
||||
│ ┌─────────────┐ │ │
|
||||
│ │ STARTED │─────────────────┘ │
|
||||
│ │ │◄────────────────┐ │
|
||||
│ │ enter: │ │ tick() unless is_finished() │
|
||||
│ │ result.run()│─────────────────┘ │
|
||||
│ │ (execute │ │
|
||||
│ │ hook via │ │
|
||||
│ │ run_hook())│ │
|
||||
│ └──────┬──────┘ │
|
||||
│ │ │
|
||||
│ │ tick() checks status set by hook output │
|
||||
│ ├────────────────┬────────────────┬────────────────┐ │
|
||||
│ ▼ ▼ ▼ ▼ │
|
||||
│ ┌───────────┐ ┌───────────┐ ┌───────────┐ ┌───────────┐ │
|
||||
│ │ SUCCEEDED │ │ FAILED │ │ SKIPPED │ │ BACKOFF │ │
|
||||
│ │ (final) │ │ (final) │ │ (final) │ │ │ │
|
||||
│ └───────────┘ └───────────┘ └───────────┘ └─────┬─────┘ │
|
||||
│ │ │
|
||||
│ can_start()───┘ │
|
||||
│ loops back to STARTED │
|
||||
│ │
|
||||
│ Each ArchiveResult runs ONE specific hook (stored in .hook_name field) │
|
||||
└─────────────────────────────────────────────────────────────────────────────┘
|
||||
"""
|
||||
|
||||
BINARY_MACHINE_DIAGRAM = """
|
||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ BinaryMachine │
|
||||
├─────────────────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ ┌─────────────┐ │
|
||||
│ │ QUEUED │◄────────────────┐ │
|
||||
│ │ (initial) │ │ │
|
||||
│ └──────┬──────┘ │ │
|
||||
│ │ │ tick() unless can_start() │
|
||||
│ │ tick() when │ │
|
||||
│ │ can_start() │ │
|
||||
│ ▼ │ │
|
||||
│ ┌─────────────┐ │ │
|
||||
│ │ STARTED │─────────────────┘ │
|
||||
│ │ │◄────────────────┐ │
|
||||
│ │ enter: │ │ │
|
||||
│ │ binary.run()│ │ tick() unless is_finished() │
|
||||
│ │ (discover │─────────────────┘ │
|
||||
│ │ Binary │ │
|
||||
│ │ hooks, │ │
|
||||
│ │ try each │ │
|
||||
│ │ provider) │ │
|
||||
│ └──────┬──────┘ │
|
||||
│ │ │
|
||||
│ │ tick() checks status set by hook output │
|
||||
│ ├────────────────────────────────┐ │
|
||||
│ ▼ ▼ │
|
||||
│ ┌─────────────┐ ┌─────────────┐ │
|
||||
│ │ SUCCEEDED │ │ FAILED │ │
|
||||
│ │ (final) │ │ (final) │ │
|
||||
│ │ │ │ │ │
|
||||
│ │ abspath, │ │ no provider │ │
|
||||
│ │ version set │ │ succeeded │ │
|
||||
│ └─────────────┘ └─────────────┘ │
|
||||
│ │
|
||||
│ Hooks triggered: on_Binary__* (provider hooks during STARTED.enter) │
|
||||
│ Providers tried in sequence until one succeeds: apt, brew, pip, npm, etc. │
|
||||
└─────────────────────────────────────────────────────────────────────────────┘
|
||||
"""
|
||||
|
||||
|
||||
@enforce_types
|
||||
def pluginmap(
|
||||
show_disabled: bool = False,
|
||||
model: Optional[str] = None,
|
||||
quiet: bool = False,
|
||||
) -> dict:
|
||||
"""
|
||||
Show a map of all state machines and their associated plugin hooks.
|
||||
|
||||
Displays ASCII art diagrams of the core model state machines (Crawl, Snapshot,
|
||||
ArchiveResult, Binary) and lists all auto-detected on_Modelname_xyz hooks
|
||||
that will run for each model's transitions.
|
||||
"""
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
from rich.panel import Panel
|
||||
from rich import box
|
||||
|
||||
from archivebox.hooks import (
|
||||
discover_hooks,
|
||||
extract_step,
|
||||
is_background_hook,
|
||||
BUILTIN_PLUGINS_DIR,
|
||||
USER_PLUGINS_DIR,
|
||||
)
|
||||
|
||||
console = Console()
|
||||
prnt = console.print
|
||||
|
||||
# Model event types that can have hooks
|
||||
model_events = {
|
||||
'Crawl': {
|
||||
'description': 'Hooks run when a Crawl starts (QUEUED→STARTED)',
|
||||
'machine': 'CrawlMachine',
|
||||
'diagram': CRAWL_MACHINE_DIAGRAM,
|
||||
},
|
||||
'CrawlEnd': {
|
||||
'description': 'Hooks run when a Crawl finishes (STARTED→SEALED)',
|
||||
'machine': 'CrawlMachine',
|
||||
'diagram': None, # Part of CrawlMachine
|
||||
},
|
||||
'Snapshot': {
|
||||
'description': 'Hooks run for each Snapshot (creates ArchiveResults)',
|
||||
'machine': 'SnapshotMachine',
|
||||
'diagram': SNAPSHOT_MACHINE_DIAGRAM,
|
||||
},
|
||||
'Binary': {
|
||||
'description': 'Hooks for installing binary dependencies (providers)',
|
||||
'machine': 'BinaryMachine',
|
||||
'diagram': BINARY_MACHINE_DIAGRAM,
|
||||
},
|
||||
}
|
||||
|
||||
# Filter to specific model if requested
|
||||
if model:
|
||||
model = model.title()
|
||||
if model not in model_events:
|
||||
prnt(f'[red]Error: Unknown model "{model}". Available: {", ".join(model_events.keys())}[/red]')
|
||||
return {}
|
||||
model_events = {model: model_events[model]}
|
||||
|
||||
result = {
|
||||
'models': {},
|
||||
'plugins_dir': str(BUILTIN_PLUGINS_DIR),
|
||||
'user_plugins_dir': str(USER_PLUGINS_DIR),
|
||||
}
|
||||
|
||||
if not quiet:
|
||||
prnt()
|
||||
prnt('[bold cyan]ArchiveBox Plugin Map[/bold cyan]')
|
||||
prnt(f'[dim]Built-in plugins: {BUILTIN_PLUGINS_DIR}[/dim]')
|
||||
prnt(f'[dim]User plugins: {USER_PLUGINS_DIR}[/dim]')
|
||||
prnt()
|
||||
|
||||
# Show diagrams first (unless quiet mode)
|
||||
if not quiet:
|
||||
# Show ArchiveResult diagram separately since it's different
|
||||
prnt(Panel(
|
||||
ARCHIVERESULT_MACHINE_DIAGRAM,
|
||||
title='[bold green]ArchiveResultMachine[/bold green]',
|
||||
border_style='green',
|
||||
expand=False,
|
||||
))
|
||||
prnt()
|
||||
|
||||
for event_name, info in model_events.items():
|
||||
# Discover hooks for this event
|
||||
hooks = discover_hooks(event_name, filter_disabled=not show_disabled)
|
||||
|
||||
# Build hook info list
|
||||
hook_infos = []
|
||||
for hook_path in hooks:
|
||||
# Get plugin name from parent directory (e.g., 'wget' from 'plugins/wget/on_Snapshot__61_wget.py')
|
||||
plugin_name = hook_path.parent.name
|
||||
step = extract_step(hook_path.name)
|
||||
is_bg = is_background_hook(hook_path.name)
|
||||
|
||||
hook_infos.append({
|
||||
'path': str(hook_path),
|
||||
'name': hook_path.name,
|
||||
'plugin': plugin_name,
|
||||
'step': step,
|
||||
'is_background': is_bg,
|
||||
'extension': hook_path.suffix,
|
||||
})
|
||||
|
||||
result['models'][event_name] = {
|
||||
'description': info['description'],
|
||||
'machine': info['machine'],
|
||||
'hooks': hook_infos,
|
||||
'hook_count': len(hook_infos),
|
||||
}
|
||||
|
||||
if not quiet:
|
||||
# Show diagram if this model has one
|
||||
if info.get('diagram'):
|
||||
prnt(Panel(
|
||||
info['diagram'],
|
||||
title=f'[bold green]{info["machine"]}[/bold green]',
|
||||
border_style='green',
|
||||
expand=False,
|
||||
))
|
||||
prnt()
|
||||
|
||||
# Create hooks table
|
||||
table = Table(
|
||||
title=f'[bold yellow]on_{event_name}__* Hooks[/bold yellow] ({len(hooks)} found)',
|
||||
box=box.ROUNDED,
|
||||
show_header=True,
|
||||
header_style='bold magenta',
|
||||
)
|
||||
table.add_column('Step', justify='center', width=6)
|
||||
table.add_column('Plugin', style='cyan', width=20)
|
||||
table.add_column('Hook Name', style='green')
|
||||
table.add_column('BG', justify='center', width=4)
|
||||
table.add_column('Type', justify='center', width=5)
|
||||
|
||||
# Sort by step then by name
|
||||
sorted_hooks = sorted(hook_infos, key=lambda h: (h['step'], h['name']))
|
||||
|
||||
for hook in sorted_hooks:
|
||||
bg_marker = '[yellow]bg[/yellow]' if hook['is_background'] else ''
|
||||
ext = hook['extension'].lstrip('.')
|
||||
table.add_row(
|
||||
str(hook['step']),
|
||||
hook['plugin'],
|
||||
hook['name'],
|
||||
bg_marker,
|
||||
ext,
|
||||
)
|
||||
|
||||
prnt(table)
|
||||
prnt()
|
||||
prnt(f'[dim]{info["description"]}[/dim]')
|
||||
prnt()
|
||||
|
||||
# Summary
|
||||
if not quiet:
|
||||
total_hooks = sum(m['hook_count'] for m in result['models'].values())
|
||||
prnt(f'[bold]Total hooks discovered: {total_hooks}[/bold]')
|
||||
prnt()
|
||||
prnt('[dim]Hook naming convention: on_{Model}__{XX}_{description}[.bg].{ext}[/dim]')
|
||||
prnt('[dim] - XX: Two-digit order (first digit = step 0-9)[/dim]')
|
||||
prnt('[dim] - .bg: Background hook (non-blocking)[/dim]')
|
||||
prnt('[dim] - ext: py, sh, or js[/dim]')
|
||||
prnt()
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option('--show-disabled', '-a', is_flag=True, help='Show hooks from disabled plugins too')
|
||||
@click.option('--model', '-m', type=str, default=None, help='Filter to specific model (Crawl, Snapshot, Binary, CrawlEnd)')
|
||||
@click.option('--quiet', '-q', is_flag=True, help='Output JSON only, no ASCII diagrams')
|
||||
@docstring(pluginmap.__doc__)
|
||||
def main(**kwargs):
|
||||
import json
|
||||
result = pluginmap(**kwargs)
|
||||
if kwargs.get('quiet'):
|
||||
print(json.dumps(result, indent=2))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user