split CrawlSetup into Install phase with new Binary + BinaryRequest events

This commit is contained in:
Nick Sweeting
2026-03-23 13:15:41 -07:00
parent f2c81142e1
commit 25f935b9d1
22 changed files with 473 additions and 413 deletions

View File

@@ -8,127 +8,32 @@ import rich_click as click
from archivebox.misc.util import docstring, enforce_types
# State Machine ASCII Art Diagrams
CRAWL_MACHINE_DIAGRAM = """
EVENT_FLOW_DIAGRAM = """
┌─────────────────────────────────────────────────────────────────────────────┐
CrawlMachine
ArchiveBox / abx-dl Flow
├─────────────────────────────────────────────────────────────────────────────┤
│ │
┌─────────────┐
│ QUEUED │◄────────────────┐
(initial)
└──────┬──────┘ │
│ tick() unless can_start()
│ tick() when │
│ │ can_start() │ │
│ ▼ │ │
│ ┌─────────────┐ │ │
│ │ STARTED │─────────────────┘ │
│ │ │◄────────────────┐ │
│ │ enter: │ │ │
│ │ crawl.run()│ │ tick() unless is_finished() │
│ │ (discover │ │ │
│ │ Crawl │─────────────────┘ │
│ │ hooks) │ │
│ └──────┬──────┘ │
│ │ │
│ │ tick() when is_finished() │
│ ▼ │
│ ┌─────────────┐ │
│ │ SEALED │ │
│ │ (final) │ │
│ │ │ │
│ │ enter: │ │
│ │ cleanup() │ │
│ └─────────────┘ │
InstallEvent
└─ on_Install__*
└─ BinaryRequest records
└─ BinaryRequestEvent
└─ on_BinaryRequest__*
└─ BinaryEvent / MachineEvent
│ │
Hooks triggered: on_Crawl__* (during STARTED.enter via crawl.run())
on_CrawlEnd__* (during SEALED.enter via cleanup())
└─────────────────────────────────────────────────────────────────────────────┘
"""
SNAPSHOT_MACHINE_DIAGRAM = """
┌─────────────────────────────────────────────────────────────────────────────┐
│ SnapshotMachine │
├─────────────────────────────────────────────────────────────────────────────┤
CrawlEvent
└─ CrawlSetupEvent
│ └─ on_CrawlSetup__* │
│ │
┌─────────────┐
│ QUEUED │◄────────────────┐
(initial) │
└──────┬──────┘ │
│ │ │ tick() unless can_start() │
│ │ tick() when │ │
│ │ can_start() │ │
│ ▼ │ │
│ ┌─────────────┐ │ │
│ │ STARTED │─────────────────┘ │
│ │ │◄────────────────┐ │
│ │ enter: │ │ │
│ │ snapshot │ │ tick() unless is_finished() │
│ │ .run() │ │ │
│ │ (discover │─────────────────┘ │
│ │ Snapshot │ │
│ │ hooks, │ │
│ │ create │ │
│ │ pending │ │
│ │ results) │ │
│ └──────┬──────┘ │
│ │ │
│ │ tick() when is_finished() │
│ ▼ │
│ ┌─────────────┐ │
│ │ SEALED │ │
│ │ (final) │ │
│ │ │ │
│ │ enter: │ │
│ │ cleanup() │ │
│ └─────────────┘ │
CrawlStartEvent
└─ SnapshotEvent
└─ on_Snapshot__*
└─ Snapshot / ArchiveResult / Tag / Machine / BinaryRequest
│ │
Hooks triggered: on_Snapshot__* (creates ArchiveResults in STARTED.enter)
└─────────────────────────────────────────────────────────────────────────────┘
"""
BINARY_MACHINE_DIAGRAM = """
┌─────────────────────────────────────────────────────────────────────────────┐
│ BinaryMachine │
├─────────────────────────────────────────────────────────────────────────────┤
SnapshotCleanupEvent -> internal cleanup, no direct hook family
│ CrawlCleanupEvent -> internal cleanup, no direct hook family │
│ │
┌─────────────┐
│ QUEUED │◄────────────────┐
│ │ (initial) │ │ │
│ └──────┬──────┘ │ │
│ │ │ tick() unless can_install() │
│ │ │ (stays queued if failed) │
│ │ tick() when │ │
│ │ can_install() │ │
│ │ │ │
│ │ on_install() runs │ │
│ │ during transition: │ │
│ │ • binary.run() │ │
│ │ (discover Binary │ │
│ │ hooks, try each │ │
│ │ provider until │ │
│ │ one succeeds) │ │
│ │ • Sets abspath, │ │
│ │ version, sha256 │ │
│ │ │ │
│ │ If install fails: │ │
│ │ raises exception──────┘ │
│ │ (retry_at bumped) │
│ │ │
│ ▼ │
│ ┌─────────────┐ │
│ │ INSTALLED │ │
│ │ (final) │ │
│ │ │ │
│ │ Binary is │ │
│ │ ready to │ │
│ │ use │ │
│ └─────────────┘ │
│ │
│ Hooks triggered: on_Binary__* (provider hooks during transition) │
│ Providers tried in sequence until one succeeds: apt, brew, pip, npm, etc. │
│ Installation is synchronous - no intermediate STARTED state │
ArchiveBox projects bus events into the DB; it no longer drives plugin
execution through the old queued model executor.
└─────────────────────────────────────────────────────────────────────────────┘
"""
@@ -136,15 +41,16 @@ BINARY_MACHINE_DIAGRAM = """
@enforce_types
def pluginmap(
show_disabled: bool = False,
model: str | None = None,
event: str | None = None,
quiet: bool = False,
) -> dict:
"""
Show a map of all state machines and their associated plugin hooks.
Show the current abx-dl event phases and their associated plugin hooks.
Displays ASCII art diagrams of the core queued model state machines (Crawl,
Snapshot, Binary) and lists all auto-detected on_Modelname_xyz hooks
that will run for each model's transitions.
This command reflects the new bus-driven runtime, not the legacy ArchiveBox
state-machine executor. Event names are normalized to hook prefixes by
stripping a trailing `Event`, then ArchiveBox checks whether any matching
`on_{EventFamily}__*` scripts actually exist.
"""
from rich.console import Console
from rich.table import Table
@@ -152,49 +58,65 @@ def pluginmap(
from rich import box
from archivebox.hooks import (
discover_hooks,
is_background_hook,
BUILTIN_PLUGINS_DIR,
USER_PLUGINS_DIR,
discover_hooks,
is_background_hook,
normalize_hook_event_name,
)
console = Console()
prnt = console.print
# Model event types that can have hooks
model_events = {
"Crawl": {
"description": "Hooks run when a Crawl starts (QUEUED→STARTED)",
"machine": "CrawlMachine",
"diagram": CRAWL_MACHINE_DIAGRAM,
event_phases = {
"InstallEvent": {
"description": "Pre-run dependency phase. on_Install hooks request binaries and update machine config.",
"emits": ["BinaryRequestEvent", "BinaryEvent", "MachineEvent", "ProcessEvent"],
},
"CrawlEnd": {
"description": "Hooks run when a Crawl finishes (STARTED→SEALED)",
"machine": "CrawlMachine",
"diagram": None, # Part of CrawlMachine
"BinaryRequestEvent": {
"description": "Provider phase. on_BinaryRequest hooks resolve or install requested binaries.",
"emits": ["BinaryEvent", "MachineEvent", "ProcessEvent"],
},
"Snapshot": {
"description": "Hooks run for each Snapshot (creates ArchiveResults)",
"machine": "SnapshotMachine",
"diagram": SNAPSHOT_MACHINE_DIAGRAM,
"BinaryEvent": {
"description": "Resolved binary metadata event. Projected into the DB/runtime config.",
"emits": [],
},
"Binary": {
"description": "Hooks for installing binary dependencies (providers)",
"machine": "BinaryMachine",
"diagram": BINARY_MACHINE_DIAGRAM,
"CrawlEvent": {
"description": "Root crawl lifecycle event emitted by the runner.",
"emits": ["CrawlSetupEvent", "CrawlStartEvent", "CrawlCleanupEvent", "CrawlCompletedEvent"],
},
"CrawlSetupEvent": {
"description": "Crawl-scoped setup phase. on_CrawlSetup hooks launch/configure shared daemons and runtime state.",
"emits": ["MachineEvent", "ProcessEvent"],
},
"SnapshotEvent": {
"description": "Per-snapshot extraction phase. on_Snapshot hooks emit ArchiveResult, Snapshot, Tag, Machine, and BinaryRequest records.",
"emits": ["ArchiveResultEvent", "SnapshotEvent", "TagEvent", "MachineEvent", "BinaryRequestEvent", "ProcessEvent"],
},
"SnapshotCleanupEvent": {
"description": "Internal snapshot cleanup phase.",
"emits": ["ProcessKillEvent"],
},
"CrawlCleanupEvent": {
"description": "Internal crawl cleanup phase.",
"emits": ["ProcessKillEvent"],
},
}
# Filter to specific model if requested
if model:
model = model.title()
if model not in model_events:
prnt(f'[red]Error: Unknown model "{model}". Available: {", ".join(model_events.keys())}[/red]')
return {}
model_events = {model: model_events[model]}
if event:
requested = str(event).strip()
if requested in event_phases:
event_phases = {requested: event_phases[requested]}
else:
normalized_requested = normalize_hook_event_name(requested)
matched_name = next((name for name in event_phases if normalize_hook_event_name(name) == normalized_requested), None)
if matched_name is None:
prnt(f'[red]Error: Unknown event "{requested}". Available: {", ".join(event_phases.keys())}[/red]')
return {}
event_phases = {matched_name: event_phases[matched_name]}
result = {
"models": {},
"events": {},
"plugins_dir": str(BUILTIN_PLUGINS_DIR),
"user_plugins_dir": str(USER_PLUGINS_DIR),
}
@@ -205,88 +127,83 @@ def pluginmap(
prnt(f"[dim]Built-in plugins: {BUILTIN_PLUGINS_DIR}[/dim]")
prnt(f"[dim]User plugins: {USER_PLUGINS_DIR}[/dim]")
prnt()
prnt(
Panel(
EVENT_FLOW_DIAGRAM,
title="[bold green]Event Flow[/bold green]",
border_style="green",
expand=False,
),
)
prnt()
for event_name, info in model_events.items():
# Discover hooks for this event
for event_name, info in event_phases.items():
hook_event = normalize_hook_event_name(event_name)
hooks = discover_hooks(event_name, filter_disabled=not show_disabled)
# Build hook info list
hook_infos = []
for hook_path in hooks:
# Get plugin name from parent directory (e.g., 'wget' from 'plugins/wget/on_Snapshot__06_wget.bg.py')
plugin_name = hook_path.parent.name
is_bg = is_background_hook(hook_path.name)
hook_infos.append(
{
"path": str(hook_path),
"name": hook_path.name,
"plugin": plugin_name,
"is_background": is_bg,
"is_background": is_background_hook(hook_path.name),
"extension": hook_path.suffix,
},
)
result["models"][event_name] = {
result["events"][event_name] = {
"description": info["description"],
"machine": info["machine"],
"hook_event": hook_event,
"emits": info["emits"],
"hooks": hook_infos,
"hook_count": len(hook_infos),
}
if not quiet:
# Show diagram if this model has one
if info.get("diagram"):
assert info["diagram"] is not None
prnt(
Panel(
info["diagram"],
title=f"[bold green]{info['machine']}[/bold green]",
border_style="green",
expand=False,
),
)
prnt()
if quiet:
continue
# Create hooks table
table = Table(
title=f"[bold yellow]on_{event_name}__* Hooks[/bold yellow] ({len(hooks)} found)",
box=box.ROUNDED,
show_header=True,
header_style="bold magenta",
)
table.add_column("Plugin", style="cyan", width=20)
table.add_column("Hook Name", style="green")
table.add_column("BG", justify="center", width=4)
table.add_column("Type", justify="center", width=5)
title_suffix = f" -> on_{hook_event}__*" if hook_infos else ""
table = Table(
title=f"[bold yellow]{event_name}[/bold yellow]{title_suffix} ({len(hooks)} hooks)",
box=box.ROUNDED,
show_header=True,
header_style="bold magenta",
)
table.add_column("Plugin", style="cyan", width=20)
table.add_column("Hook Name", style="green")
table.add_column("BG", justify="center", width=4)
table.add_column("Type", justify="center", width=5)
# Sort lexicographically by hook name
sorted_hooks = sorted(hook_infos, key=lambda h: h["name"])
for hook in sorted_hooks:
if hook_infos:
for hook in sorted(hook_infos, key=lambda h: h["name"]):
bg_marker = "[yellow]bg[/yellow]" if hook["is_background"] else ""
ext = hook["extension"].lstrip(".")
table.add_row(
hook["plugin"],
hook["name"],
bg_marker,
ext,
hook["extension"].lstrip("."),
)
else:
table.add_row("[dim]-[/dim]", "[dim]No direct hooks[/dim]", "", "")
prnt(table)
prnt()
prnt(f"[dim]{info['description']}[/dim]")
prnt()
prnt(table)
prnt(f"[dim]{info['description']}[/dim]")
if info["emits"]:
prnt(f"[dim]Emits: {', '.join(info['emits'])}[/dim]")
if not hook_infos:
prnt(f"[dim]No direct on_{hook_event}__* scripts are currently defined for this event family.[/dim]")
prnt()
# Summary
if not quiet:
total_hooks = sum(m["hook_count"] for m in result["models"].values())
total_hooks = sum(event_info["hook_count"] for event_info in result["events"].values())
prnt(f"[bold]Total hooks discovered: {total_hooks}[/bold]")
prnt()
prnt("[dim]Hook naming convention: on_{Model}__{XX}_{description}[.bg].{ext}[/dim]")
prnt("[dim] - XX: Two-digit lexicographic order (00-99)[/dim]")
prnt("[dim] - .bg: Background hook (non-blocking)[/dim]")
prnt("[dim] - ext: py, sh, or js[/dim]")
prnt("[dim]Hook naming convention: on_{EventFamily}__{XX}_{description}[.bg].{ext}[/dim]")
prnt("[dim]Event names are normalized with a simple `Event` suffix strip before hook discovery.[/dim]")
prnt("[dim]If no `on_{EventFamily}__*` scripts exist, the event is shown as having no direct hooks.[/dim]")
prnt()
return result
@@ -294,8 +211,8 @@ def pluginmap(
@click.command()
@click.option("--show-disabled", "-a", is_flag=True, help="Show hooks from disabled plugins too")
@click.option("--model", "-m", type=str, default=None, help="Filter to specific model (Crawl, Snapshot, Binary, CrawlEnd)")
@click.option("--quiet", "-q", is_flag=True, help="Output JSON only, no ASCII diagrams")
@click.option("--event", "-e", type=str, default=None, help="Filter to specific event (e.g. InstallEvent, SnapshotEvent)")
@click.option("--quiet", "-q", is_flag=True, help="Output JSON only, no tables")
@docstring(pluginmap.__doc__)
def main(**kwargs):
import json

View File

@@ -10,7 +10,7 @@ Modes:
- Without stdin (TTY): Run the background runner in foreground until killed
- --crawl-id: Run the crawl runner for a specific crawl only
- --snapshot-id: Run a specific snapshot through its parent crawl
- --binary-id: Emit a BinaryEvent for a specific Binary row
- --binary-id: Emit a BinaryRequestEvent for a specific Binary row
Examples:
# Run the background runner in foreground
@@ -64,7 +64,15 @@ def process_stdin_records() -> int:
"""
from django.utils import timezone
from archivebox.misc.jsonl import read_stdin, write_record, TYPE_CRAWL, TYPE_SNAPSHOT, TYPE_ARCHIVERESULT, TYPE_BINARY
from archivebox.misc.jsonl import (
read_stdin,
write_record,
TYPE_CRAWL,
TYPE_SNAPSHOT,
TYPE_ARCHIVERESULT,
TYPE_BINARYREQUEST,
TYPE_BINARY,
)
from archivebox.base_models.models import get_or_create_system_user_pk
from archivebox.core.models import Snapshot, ArchiveResult
from archivebox.crawls.models import Crawl
@@ -185,7 +193,7 @@ def process_stdin_records() -> int:
output_records.append(record if not archiveresult else archiveresult.to_json())
queued_count += 1
elif record_type == TYPE_BINARY:
elif record_type in {TYPE_BINARYREQUEST, TYPE_BINARY}:
if record_id:
try:
binary = Binary.objects.get(id=record_id)

View File

@@ -1104,6 +1104,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
from archivebox.misc.jsonl import (
TYPE_SNAPSHOT,
TYPE_ARCHIVERESULT,
TYPE_BINARYREQUEST,
TYPE_BINARY,
TYPE_PROCESS,
)
@@ -1126,7 +1127,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
result["snapshot"] = record
elif record_type == TYPE_ARCHIVERESULT:
result["archive_results"].append(record)
elif record_type == TYPE_BINARY:
elif record_type in {TYPE_BINARYREQUEST, TYPE_BINARY}:
result["binaries"].append(record)
elif record_type == TYPE_PROCESS:
result["processes"].append(record)

View File

@@ -1226,11 +1226,13 @@ def live_progress_view(request):
return (plugin, plugin, "unknown", "")
phase = "unknown"
if normalized_hook_name.startswith("on_Crawl__"):
if normalized_hook_name.startswith("on_Install__"):
phase = "install"
elif normalized_hook_name.startswith("on_CrawlSetup__"):
phase = "crawl"
elif normalized_hook_name.startswith("on_Snapshot__"):
phase = "snapshot"
elif normalized_hook_name.startswith("on_Binary__"):
elif normalized_hook_name.startswith("on_BinaryRequest__"):
phase = "binary"
label = normalized_hook_name

View File

@@ -827,14 +827,16 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
for record in records[:3]:
print(f" Record: type={record.get('type')}, keys={list(record.keys())[:5]}")
if system_task:
records = [record for record in records if record.get("type") in ("Binary", "Machine")]
records = [record for record in records if record.get("type") in ("BinaryRequest", "Binary", "Machine")]
overrides = {"crawl": self}
stats = process_hook_records(records, overrides=overrides)
if stats:
print(f"[green]✓ Created: {stats}[/green]")
hook_binary_names = {
str(record.get("name")).strip() for record in records if record.get("type") == "Binary" and record.get("name")
str(record.get("name")).strip()
for record in records
if record.get("type") in ("BinaryRequest", "Binary") and record.get("name")
}
hook_binary_names.discard("")
if hook_binary_names:
@@ -933,7 +935,7 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
# Check if any snapshots exist for this crawl
snapshots = Snapshot.objects.filter(crawl=self)
# If no snapshots exist, allow finishing (e.g., archivebox://install crawls that only run hooks)
# If no snapshots exist, allow finishing (e.g., system crawls that only run setup hooks)
if not snapshots.exists():
return True
@@ -1081,7 +1083,7 @@ class CrawlMachine(BaseStateMachine):
status=Crawl.StatusChoices.STARTED,
)
else:
# No snapshots (system crawl like archivebox://install)
# No snapshots (system crawl that only runs setup hooks)
print("[cyan]🔄 No snapshots created, sealing crawl immediately[/cyan]", file=sys.stderr)
# Seal immediately since there's no work to do
self.seal()

View File

@@ -1,9 +1,22 @@
"""
Hook discovery and execution system for ArchiveBox plugins.
Hook discovery and execution helpers for ArchiveBox plugins.
Hooks are standalone scripts that run as separate processes and communicate
with ArchiveBox via CLI arguments and stdout JSON output. This keeps the plugin
system simple and language-agnostic.
ArchiveBox no longer drives plugin execution itself during normal crawls.
`abx-dl` owns the live runtime and emits typed bus events; ArchiveBox mainly:
- discovers hook files for inspection / docs / legacy direct execution helpers
- executes individual hook scripts when explicitly requested
- parses hook stdout JSONL records into ArchiveBox models when needed
Hook-backed event families are discovered from filenames like:
on_Install__*
on_BinaryRequest__*
on_CrawlSetup__*
on_Snapshot__*
Lifecycle event names like `InstallEvent` or `SnapshotCleanupEvent` are
normalized to the corresponding `on_{EventFamily}__*` prefix by a simple
string transform. If no scripts exist for that prefix, discovery returns `[]`.
Directory structure:
abx_plugins/plugins/<plugin_name>/on_<Event>__<hook_name>.<ext> (built-in package)
@@ -11,7 +24,7 @@ Directory structure:
Hook contract:
Input: --url=<url> (and other --key=value args)
Output: JSON to stdout, files to $PWD
Output: JSONL records to stdout, files to $PWD
Exit: 0 = success, non-zero = failure
Execution order:
@@ -19,36 +32,13 @@ Execution order:
- Foreground hooks run sequentially in that order
- Background hooks (.bg suffix) run concurrently and do not block foreground progress
- After all foreground hooks complete, background hooks receive SIGTERM and must finalize
- Failed extractors don't block subsequent extractors
Hook Naming Convention:
on_{ModelName}__{run_order}_{description}[.finite.bg|.daemon.bg].{ext}
Hook naming convention:
on_{EventFamily}__{run_order}_{description}[.finite.bg|.daemon.bg].{ext}
Examples:
on_Snapshot__00_setup.py # runs first
on_Snapshot__10_chrome_tab.daemon.bg.js # background (doesn't block)
on_Snapshot__50_screenshot.js # foreground (blocks)
on_Snapshot__63_media.finite.bg.py # background (long-running)
Dependency handling:
Extractor plugins that depend on other plugins' output should check at runtime:
```python
# Example: screenshot plugin depends on chrome plugin
chrome_dir = Path(os.environ.get('SNAPSHOT_DIR', '.')) / 'chrome'
if not (chrome_dir / 'cdp_url.txt').exists():
print('{"status": "skipped", "output": "chrome session not available"}')
sys.exit(1) # Exit non-zero so it gets retried later
```
On retry (Snapshot.retry_failed_archiveresults()):
- Only FAILED/SKIPPED plugins reset to queued (SUCCEEDED stays)
- Run in order again
- If dependencies now succeed, dependents can run
API (all hook logic lives here):
discover_hooks(event) -> List[Path] Find hook scripts
run_hook(script, ...) -> HookResult Execute a hook script
API:
discover_hooks(event) -> List[Path] Find hook scripts for a hook-backed event family
run_hook(script, ...) -> Process Execute a hook script directly
is_background_hook(name) -> bool Check if hook is background (.bg suffix)
"""
@@ -122,6 +112,27 @@ def iter_plugin_dirs() -> list[Path]:
return plugin_dirs
def normalize_hook_event_name(event_name: str) -> str | None:
"""
Normalize a hook event family or event class name to its on_* prefix.
Examples:
InstallEvent -> Install
BinaryRequestEvent -> BinaryRequest
CrawlSetupEvent -> CrawlSetup
SnapshotEvent -> Snapshot
BinaryEvent -> Binary
CrawlCleanupEvent -> CrawlCleanup
"""
normalized = str(event_name or "").strip()
if not normalized:
return None
if normalized.endswith("Event"):
return normalized[:-5] or None
return normalized
class HookResult(TypedDict, total=False):
"""Raw result from run_hook()."""
@@ -144,7 +155,7 @@ def discover_hooks(
config: dict[str, Any] | None = None,
) -> list[Path]:
"""
Find all hook scripts matching on_{event_name}__*.{sh,py,js} pattern.
Find all hook scripts for an event family.
Searches both built-in and user plugin directories.
Filters out hooks from disabled plugins by default (respects USE_/SAVE_ flags).
@@ -156,7 +167,10 @@ def discover_hooks(
on_Snapshot__26_readability.py # runs later (depends on singlefile)
Args:
event_name: Event name (e.g., 'Snapshot', 'Binary', 'Crawl')
event_name: Hook event family or event class name.
Examples: 'Install', 'InstallEvent', 'BinaryRequestEvent', 'Snapshot'.
Event names are normalized by stripping a trailing `Event`.
If no matching `on_{EventFamily}__*` scripts exist, returns [].
filter_disabled: If True, skip hooks from disabled plugins (default: True)
config: Optional config dict from get_config() (merges file, env, machine, crawl, snapshot)
If None, will call get_config() with global scope
@@ -179,6 +193,10 @@ def discover_hooks(
discover_hooks('Snapshot', filter_disabled=False)
# Returns: [Path('.../on_Snapshot__10_title.py'), ..., Path('.../on_Snapshot__50_wget.py')]
"""
hook_event_name = normalize_hook_event_name(event_name)
if not hook_event_name:
return []
hooks = []
for base_dir in (BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR):
@@ -187,18 +205,18 @@ def discover_hooks(
# Search for hook scripts in all subdirectories
for ext in ("sh", "py", "js"):
pattern = f"*/on_{event_name}__*.{ext}"
pattern = f"*/on_{hook_event_name}__*.{ext}"
hooks.extend(base_dir.glob(pattern))
# Also check for hooks directly in the plugins directory
pattern_direct = f"on_{event_name}__*.{ext}"
pattern_direct = f"on_{hook_event_name}__*.{ext}"
hooks.extend(base_dir.glob(pattern_direct))
# Binary install hooks are provider hooks, not end-user extractors. They
# self-filter via `binproviders`, so applying the PLUGINS whitelist here
# can hide the very installer needed by a selected plugin (e.g.
# `--plugins=singlefile` still needs the `npm` Binary hook).
if filter_disabled and event_name != "Binary":
# `--plugins=singlefile` still needs the `npm` BinaryRequest hook).
if filter_disabled and hook_event_name != "BinaryRequest":
# Get merged config if not provided (lazy import to avoid circular dependency)
if config is None:
from archivebox.config.configset import get_config
@@ -1051,8 +1069,12 @@ def get_plugin_icon(plugin: str) -> str:
def process_hook_records(records: list[dict[str, Any]], overrides: dict[str, Any] | None = None) -> dict[str, int]:
"""
Process JSONL records from hook output.
Dispatches to Model.from_json() for each record type.
Process JSONL records emitted by hook stdout.
This handles hook-emitted record types such as Snapshot, Tag, BinaryRequest,
Binary, and Machine. It does not process bus lifecycle events like
InstallEvent, CrawlEvent, CrawlCleanupEvent, or SnapshotCleanupEvent, since
those are not emitted as JSONL records by hook subprocesses.
Args:
records: List of JSONL record dicts from result['records']
@@ -1104,12 +1126,12 @@ def process_hook_records(records: list[dict[str, Any]], overrides: dict[str, Any
if obj:
stats["Tag"] = stats.get("Tag", 0) + 1
elif record_type == "Binary":
elif record_type in {"BinaryRequest", "Binary"}:
from archivebox.machine.models import Binary
obj = Binary.from_json(record.copy(), overrides)
if obj:
stats["Binary"] = stats.get("Binary", 0) + 1
stats[record_type] = stats.get(record_type, 0) + 1
elif record_type == "Machine":
from archivebox.machine.models import Machine

View File

@@ -1,6 +1,6 @@
# Generated by hand on 2026-01-01
# Converges machine app for 0.8.6rc0 → 0.9.x migration path
# Drops old InstalledBinary table and ensures Binary table exists
# Drops old Binary table and ensures Binary table exists
from django.db import migrations, connection
@@ -18,7 +18,7 @@ def converge_binary_table(apps, schema_editor):
print(f"DEBUG 0005: Existing tables: {existing_tables}")
# Drop old InstalledBinary table if it exists (0.8.6rc0 path)
# Drop old Binary table if it exists (0.8.6rc0 path)
if "machine_installedbinary" in existing_tables:
print("✓ Dropping machine_installedbinary table (0.8.6rc0 divergence)")
cursor.execute("DROP TABLE IF EXISTS machine_installedbinary")

View File

@@ -348,7 +348,7 @@ class Binary(ModelWithHealthStats, ModelWithStateMachine):
Installation is synchronous during queued→installed transition.
If installation fails, Binary stays in queued with retry_at set for later retry.
State machine calls run() which executes on_Binary__install_* hooks
State machine calls run() which executes on_BinaryRequest__* hooks
to install the binary using the specified providers.
"""
@@ -447,12 +447,15 @@ class Binary(ModelWithHealthStats, ModelWithStateMachine):
"""
from archivebox.config import VERSION
is_installed = bool(self.abspath and self.version)
return {
"type": "Binary",
"type": "Binary" if is_installed else "BinaryRequest",
"schema_version": VERSION,
"id": str(self.id),
"machine_id": str(self.machine_id),
"name": self.name,
"binproviders": self.binproviders,
"overrides": self.overrides,
"binprovider": self.binprovider,
"abspath": self.abspath,
"version": self.version,
@@ -540,7 +543,7 @@ class Binary(ModelWithHealthStats, ModelWithStateMachine):
)
return binary
# Case 3: From on_Binary__install hook output - update with installation results
# Case 3: From on_BinaryRequest__ hook output - update with installation results
if abspath and version:
binary, _ = Binary.objects.update_or_create(
machine=machine,
@@ -607,10 +610,10 @@ class Binary(ModelWithHealthStats, ModelWithStateMachine):
def run(self):
"""
Execute binary installation by running on_Binary__install_* hooks.
Execute binary installation by running on_BinaryRequest__* hooks.
Called by BinaryMachine when entering 'started' state.
Runs ALL on_Binary__install_* hooks - each hook checks binproviders
Runs ALL on_BinaryRequest__* hooks - each hook checks binproviders
and decides if it can handle this binary. First hook to succeed wins.
Updates status to SUCCEEDED or FAILED based on hook output.
"""
@@ -637,8 +640,8 @@ class Binary(ModelWithHealthStats, ModelWithStateMachine):
output_dir = self.output_dir
output_dir.mkdir(parents=True, exist_ok=True)
# Discover ALL on_Binary__install_* hooks
hooks = discover_hooks("Binary", config=config)
# Discover ALL on_BinaryRequest__* hooks
hooks = discover_hooks("BinaryRequest", config=config)
if not hooks:
# No hooks available - stay queued, will retry later
return

View File

@@ -35,11 +35,21 @@ TYPE_SNAPSHOT = "Snapshot"
TYPE_ARCHIVERESULT = "ArchiveResult"
TYPE_TAG = "Tag"
TYPE_CRAWL = "Crawl"
TYPE_BINARYREQUEST = "BinaryRequest"
TYPE_BINARY = "Binary"
TYPE_PROCESS = "Process"
TYPE_MACHINE = "Machine"
VALID_TYPES = {TYPE_SNAPSHOT, TYPE_ARCHIVERESULT, TYPE_TAG, TYPE_CRAWL, TYPE_BINARY, TYPE_PROCESS, TYPE_MACHINE}
VALID_TYPES = {
TYPE_SNAPSHOT,
TYPE_ARCHIVERESULT,
TYPE_TAG,
TYPE_CRAWL,
TYPE_BINARYREQUEST,
TYPE_BINARY,
TYPE_PROCESS,
TYPE_MACHINE,
}
def parse_line(line: str) -> dict[str, Any] | None:

View File

@@ -2,24 +2,24 @@ from __future__ import annotations
import asyncio
from abx_dl.events import BinaryEvent, BinaryInstalledEvent
from abx_dl.events import BinaryRequestEvent, BinaryEvent
from abx_dl.services.base import BaseService
from .db import run_db_op
class BinaryService(BaseService):
LISTENS_TO = [BinaryEvent, BinaryInstalledEvent]
LISTENS_TO = [BinaryRequestEvent, BinaryEvent]
EMITS = []
async def on_BinaryEvent__Outer(self, event: BinaryEvent) -> None:
async def on_BinaryRequestEvent__Outer(self, event: BinaryRequestEvent) -> None:
await run_db_op(self._project_binary, event)
async def on_BinaryInstalledEvent__Outer(self, event: BinaryInstalledEvent) -> None:
async def on_BinaryEvent__Outer(self, event: BinaryEvent) -> None:
resolved = await asyncio.to_thread(self._resolve_installed_binary_metadata, event)
await run_db_op(self._project_installed_binary, event, resolved)
def _project_binary(self, event: BinaryEvent) -> None:
def _project_binary(self, event: BinaryRequestEvent) -> None:
from archivebox.machine.models import Binary, Machine
machine = Machine.current()
@@ -39,16 +39,12 @@ class BinaryService(BaseService):
Binary.from_json(
{
"name": event.name,
"abspath": event.abspath,
"version": event.version,
"sha256": event.sha256,
"binproviders": event.binproviders,
"binprovider": event.binprovider,
"overrides": event.overrides or {},
},
)
def _resolve_installed_binary_metadata(self, event: BinaryInstalledEvent) -> dict[str, str]:
def _resolve_installed_binary_metadata(self, event: BinaryEvent) -> dict[str, str]:
resolved = {
"abspath": event.abspath or "",
"version": event.version or "",
@@ -59,6 +55,18 @@ class BinaryService(BaseService):
if resolved["abspath"] and resolved["version"] and resolved["binprovider"]:
return resolved
if resolved["abspath"] and not resolved["version"]:
try:
from abx_pkg.semver import bin_version
detected_version = bin_version(resolved["abspath"])
except Exception:
detected_version = None
if detected_version:
resolved["version"] = str(detected_version)
if resolved["version"] and resolved["binprovider"]:
return resolved
try:
from abx_dl.dependencies import load_binary
@@ -80,7 +88,7 @@ class BinaryService(BaseService):
return resolved
def _project_installed_binary(self, event: BinaryInstalledEvent, resolved: dict[str, str]) -> None:
def _project_installed_binary(self, event: BinaryEvent, resolved: dict[str, str]) -> None:
from archivebox.machine.models import Binary, Machine
machine = Machine.current()

View File

@@ -58,7 +58,7 @@ class ProcessService(BaseService):
return process
process_type = getattr(event, "process_type", "") or (
Process.TypeChoices.BINARY if event.hook_name.startswith("on_Binary") else Process.TypeChoices.HOOK
Process.TypeChoices.BINARY if event.hook_name.startswith("on_BinaryRequest") else Process.TypeChoices.HOOK
)
worker_type = getattr(event, "worker_type", "") or ""
if process_type == Process.TypeChoices.WORKER and worker_type:

View File

@@ -15,14 +15,13 @@ from typing import Any
from django.utils import timezone
from rich.console import Console
from abx_dl.events import BinaryEvent
from abx_dl.events import BinaryRequestEvent
from abx_dl.limits import CrawlLimitState
from abx_dl.models import INSTALL_URL, Plugin, Snapshot as AbxSnapshot, discover_plugins, filter_plugins
from abx_dl.models import Plugin, Snapshot as AbxSnapshot, discover_plugins, filter_plugins
from abx_dl.orchestrator import (
create_bus,
download,
install_plugins as abx_install_plugins,
prepare_install_plugins,
setup_services as setup_abx_services,
)
@@ -51,11 +50,12 @@ def _selected_plugins_from_config(config: dict[str, Any]) -> list[str] | None:
def _count_selected_hooks(plugins: dict[str, Plugin], selected_plugins: list[str] | None) -> int:
selected = filter_plugins(plugins, selected_plugins) if selected_plugins else plugins
total = 0
for plugin in selected.values():
total += len(list(plugin.get_crawl_hooks()))
total += len(list(plugin.get_snapshot_hooks()))
return total
return sum(
1
for plugin in selected.values()
for hook in plugin.hooks
if "Install" in hook.name or "CrawlSetup" in hook.name or "Snapshot" in hook.name
)
def _runner_debug(message: str) -> None:
@@ -68,10 +68,9 @@ def _binary_env_key(name: str) -> str:
def _binary_config_keys_for_plugins(plugins: dict[str, Plugin], binary_name: str) -> list[str]:
keys = [_binary_env_key(binary_name)]
if binary_name == "postlight-parser":
keys.insert(0, "MERCURY_BINARY")
keys: list[str] = []
if binary_name != "postlight-parser":
keys.append(_binary_env_key(binary_name))
for plugin in plugins.values():
for key, prop in plugin.config_schema.items():
@@ -86,6 +85,12 @@ def _installed_binary_config_overrides(plugins: dict[str, Plugin]) -> dict[str,
machine = Machine.current()
overrides: dict[str, str] = {}
shared_lib_dir: Path | None = None
pip_home: Path | None = None
pip_bin_dir: Path | None = None
npm_home: Path | None = None
node_modules_dir: Path | None = None
npm_bin_dir: Path | None = None
binaries = (
Binary.objects.filter(machine=machine, status=Binary.StatusChoices.INSTALLED).exclude(abspath="").exclude(abspath__isnull=True)
)
@@ -100,6 +105,32 @@ def _installed_binary_config_overrides(plugins: dict[str, Plugin]) -> dict[str,
for key in _binary_config_keys_for_plugins(plugins, binary.name):
overrides[key] = binary.abspath
if resolved_path.parent.name == ".bin" and resolved_path.parent.parent.name == "node_modules":
npm_bin_dir = npm_bin_dir or resolved_path.parent
node_modules_dir = node_modules_dir or resolved_path.parent.parent
npm_home = npm_home or resolved_path.parent.parent.parent
shared_lib_dir = shared_lib_dir or resolved_path.parent.parent.parent.parent
elif resolved_path.parent.name == "bin" and resolved_path.parent.parent.name == "venv" and resolved_path.parent.parent.parent.name == "pip":
pip_bin_dir = pip_bin_dir or resolved_path.parent
pip_home = pip_home or resolved_path.parent.parent.parent
shared_lib_dir = shared_lib_dir or resolved_path.parent.parent.parent.parent
if shared_lib_dir is not None:
overrides["LIB_DIR"] = str(shared_lib_dir)
overrides["LIB_BIN_DIR"] = str(shared_lib_dir / "bin")
if pip_home is not None:
overrides["PIP_HOME"] = str(pip_home)
if pip_bin_dir is not None:
overrides["PIP_BIN_DIR"] = str(pip_bin_dir)
if npm_home is not None:
overrides["NPM_HOME"] = str(npm_home)
if node_modules_dir is not None:
overrides["NODE_MODULES_DIR"] = str(node_modules_dir)
overrides["NODE_MODULE_DIR"] = str(node_modules_dir)
overrides["NODE_PATH"] = str(node_modules_dir)
if npm_bin_dir is not None:
overrides["NPM_BIN_DIR"] = str(npm_bin_dir)
return overrides
@@ -264,26 +295,23 @@ class CrawlRunner:
auto_install=True,
emit_jsonl=False,
)
if self.crawl.get_system_task() == INSTALL_URL:
await self._run_install_crawl()
else:
snapshot_ids = await sync_to_async(self._initial_snapshot_ids, thread_sensitive=True)()
if snapshot_ids:
root_snapshot_id = snapshot_ids[0]
_runner_debug(f"crawl {self.crawl.id} starting crawl setup root_snapshot={root_snapshot_id}")
await self._run_crawl_setup(root_snapshot_id)
_runner_debug(f"crawl {self.crawl.id} finished crawl setup root_snapshot={root_snapshot_id}")
for snapshot_id in snapshot_ids:
await self.enqueue_snapshot(snapshot_id)
_runner_debug(f"crawl {self.crawl.id} waiting for snapshot tasks count={len(self.snapshot_tasks)}")
await self._wait_for_snapshot_tasks()
_runner_debug(f"crawl {self.crawl.id} finished waiting for snapshot tasks")
_runner_debug(f"crawl {self.crawl.id} starting django crawl.cleanup()")
await sync_to_async(self.crawl.cleanup, thread_sensitive=True)()
_runner_debug(f"crawl {self.crawl.id} finished django crawl.cleanup()")
_runner_debug(f"crawl {self.crawl.id} starting abx crawl cleanup root_snapshot={root_snapshot_id}")
await self._run_crawl_cleanup(root_snapshot_id)
_runner_debug(f"crawl {self.crawl.id} finished abx crawl cleanup root_snapshot={root_snapshot_id}")
snapshot_ids = await sync_to_async(self._initial_snapshot_ids, thread_sensitive=True)()
if snapshot_ids:
root_snapshot_id = snapshot_ids[0]
_runner_debug(f"crawl {self.crawl.id} starting crawl setup root_snapshot={root_snapshot_id}")
await self._run_crawl_setup(root_snapshot_id)
_runner_debug(f"crawl {self.crawl.id} finished crawl setup root_snapshot={root_snapshot_id}")
for snapshot_id in snapshot_ids:
await self.enqueue_snapshot(snapshot_id)
_runner_debug(f"crawl {self.crawl.id} waiting for snapshot tasks count={len(self.snapshot_tasks)}")
await self._wait_for_snapshot_tasks()
_runner_debug(f"crawl {self.crawl.id} finished waiting for snapshot tasks")
_runner_debug(f"crawl {self.crawl.id} starting django crawl.cleanup()")
await sync_to_async(self.crawl.cleanup, thread_sensitive=True)()
_runner_debug(f"crawl {self.crawl.id} finished django crawl.cleanup()")
_runner_debug(f"crawl {self.crawl.id} starting abx crawl cleanup root_snapshot={root_snapshot_id}")
await self._run_crawl_cleanup(root_snapshot_id)
_runner_debug(f"crawl {self.crawl.id} finished abx crawl cleanup root_snapshot={root_snapshot_id}")
if self.abx_services is not None:
_runner_debug(f"crawl {self.crawl.id} waiting for main bus background monitors")
await self.abx_services.process.wait_for_background_monitors()
@@ -404,7 +432,7 @@ class CrawlRunner:
interactive_tty=True,
)
live_ui.print_intro(
url=self.primary_url or INSTALL_URL,
url=self.primary_url or "crawl",
output_dir=Path(self.crawl.output_dir),
plugins_label=plugins_label,
)
@@ -435,30 +463,6 @@ class CrawlRunner:
config["PARENT_SNAPSHOT_ID"] = str(snapshot.parent_snapshot_id)
return config
async def _run_install_crawl(self) -> None:
install_snapshot = AbxSnapshot(
url=self.primary_url or INSTALL_URL,
id=str(self.crawl.id),
crawl_id=str(self.crawl.id),
)
await download(
url=self.primary_url or INSTALL_URL,
plugins=self.plugins,
output_dir=Path(self.crawl.output_dir),
selected_plugins=self.selected_plugins,
config_overrides={
**self.base_config,
"CRAWL_DIR": str(self.crawl.output_dir),
"SNAP_DIR": str(self.crawl.output_dir),
"CRAWL_ID": str(self.crawl.id),
"SOURCE_URL": self.crawl.urls,
},
bus=self.bus,
emit_jsonl=False,
snapshot=install_snapshot,
crawl_only=True,
)
async def _run_crawl_setup(self, snapshot_id: str) -> None:
from asgiref.sync import sync_to_async
@@ -625,7 +629,7 @@ async def _run_binary(binary_id: str) -> None:
binary = await sync_to_async(Binary.objects.get, thread_sensitive=True)(id=binary_id)
plugins = discover_plugins()
config = get_config()
config.update(_installed_binary_config_overrides(plugins))
config.update(await sync_to_async(_installed_binary_config_overrides, thread_sensitive=True)(plugins))
config["ABX_RUNTIME"] = "archivebox"
bus = create_bus(name=_bus_name("ArchiveBox_binary", str(binary.id)), total_timeout=1800.0)
process_service = ProcessService(bus)
@@ -645,18 +649,14 @@ async def _run_binary(binary_id: str) -> None:
try:
_attach_bus_trace(bus)
await bus.emit(
BinaryEvent(
BinaryRequestEvent(
name=binary.name,
plugin_name="archivebox",
hook_name="archivebox_run",
hook_name="on_BinaryRequest__archivebox_run",
output_dir=str(binary.output_dir),
binary_id=str(binary.id),
machine_id=str(binary.machine_id),
abspath=binary.abspath,
version=binary.version,
sha256=binary.sha256,
binproviders=binary.binproviders,
binprovider=binary.binprovider,
overrides=binary.overrides or None,
),
)
@@ -670,11 +670,13 @@ def run_binary(binary_id: str) -> None:
async def _run_install(plugin_names: list[str] | None = None) -> None:
from asgiref.sync import sync_to_async
from archivebox.config.configset import get_config
plugins = discover_plugins()
config = get_config()
config.update(_installed_binary_config_overrides(plugins))
config.update(await sync_to_async(_installed_binary_config_overrides, thread_sensitive=True)(plugins))
config["ABX_RUNTIME"] = "archivebox"
bus = create_bus(name="ArchiveBox_install", total_timeout=3600.0)
process_service = ProcessService(bus)
@@ -693,7 +695,9 @@ async def _run_install(plugin_names: list[str] | None = None) -> None:
live_stream = None
try:
selected_plugins = prepare_install_plugins(plugins, plugin_names=plugin_names)
selected_plugins = filter_plugins(plugins, list(plugin_names), include_providers=True) if plugin_names else plugins
if not selected_plugins:
return
plugins_label = ", ".join(plugin_names) if plugin_names else f"all ({len(plugins)} available)"
timeout_seconds = int(config.get("TIMEOUT") or 60)
stdout_is_tty = sys.stdout.isatty()
@@ -740,7 +744,7 @@ async def _run_install(plugin_names: list[str] | None = None) -> None:
interactive_tty=interactive_tty,
)
live_ui.print_intro(
url=INSTALL_URL,
url="install",
output_dir=output_dir,
plugins_label=plugins_label,
)

View File

@@ -1252,7 +1252,7 @@ class TestLiveProgressView:
process_type=Process.TypeChoices.HOOK,
status=Process.StatusChoices.RUNNING,
pid=pid,
cmd=["/plugins/chrome/on_Crawl__91_chrome_wait.js", "--url=https://example.com"],
cmd=["/plugins/chrome/on_CrawlSetup__91_chrome_wait.js", "--url=https://example.com"],
env={
"CRAWL_ID": str(snapshot.crawl_id),
"SNAPSHOT_ID": str(snapshot.id),

View File

@@ -5,7 +5,7 @@ import pytest
from django.db import connection
from abx_dl.events import BinaryEvent, ProcessCompletedEvent, ProcessStartedEvent
from abx_dl.events import BinaryRequestEvent, ProcessCompletedEvent, ProcessStartedEvent
from abx_dl.orchestrator import create_bus
from abx_dl.output_files import OutputFile
@@ -515,10 +515,10 @@ def test_binary_event_reuses_existing_installed_binary_row(monkeypatch):
)
service = ArchiveBoxBinaryService(create_bus(name="test_binary_event_reuses_existing_installed_binary_row"))
event = BinaryEvent(
event = BinaryRequestEvent(
name="wget",
plugin_name="wget",
hook_name="on_Crawl__10_wget_install.finite.bg",
hook_name="on_Install__10_wget.finite.bg",
output_dir="/tmp/wget",
binproviders="provider",
)

View File

@@ -337,7 +337,11 @@ def test_binary_create_stdout_pipes_into_run(initialized_archive):
assert create_code == 0, create_stderr
_assert_stdout_is_jsonl_only(create_stdout)
binary = next(record for record in parse_jsonl_output(create_stdout) if record.get("type") == "Binary")
binary = next(
record
for record in parse_jsonl_output(create_stdout)
if record.get("type") in {"BinaryRequest", "Binary"}
)
run_stdout, run_stderr, run_code = run_archivebox_cmd(
["run"],
@@ -349,7 +353,10 @@ def test_binary_create_stdout_pipes_into_run(initialized_archive):
_assert_stdout_is_jsonl_only(run_stdout)
run_records = parse_jsonl_output(run_stdout)
assert any(record.get("type") == "Binary" and record.get("id") == binary["id"] for record in run_records)
assert any(
record.get("type") in {"BinaryRequest", "Binary"} and record.get("id") == binary["id"]
for record in run_records
)
status = _db_value(
initialized_archive,

View File

@@ -378,7 +378,7 @@ class TestRecoverOrphanedCrawls:
machine=machine,
process_type=Process.TypeChoices.HOOK,
status=Process.StatusChoices.RUNNING,
cmd=["/plugins/chrome/on_Crawl__91_chrome_wait.js"],
cmd=["/plugins/chrome/on_CrawlSetup__91_chrome_wait.js"],
env={
"CRAWL_ID": str(crawl.id),
"SNAPSHOT_ID": str(snapshot.id),

View File

@@ -107,7 +107,7 @@ Hook completed successfully"""
stdout = """{"type": "ArchiveResult", "status": "succeeded"}
{invalid json here}
not json at all
{"type": "Binary", "name": "wget"}"""
{"type": "BinaryRequest", "name": "wget"}"""
from archivebox.machine.models import Process
records = Process.parse_records_from_text(stdout)
@@ -187,7 +187,7 @@ class TestHookDiscovery(unittest.TestCase):
wget_dir = self.plugins_dir / "wget"
wget_dir.mkdir()
(wget_dir / "on_Snapshot__50_wget.py").write_text("# test hook")
(wget_dir / "on_Crawl__10_wget_install.finite.bg.py").write_text("# install hook")
(wget_dir / "on_Install__10_wget.finite.bg.py").write_text("# install hook")
chrome_dir = self.plugins_dir / "chrome"
chrome_dir.mkdir(exist_ok=True)
@@ -231,11 +231,29 @@ class TestHookDiscovery(unittest.TestCase):
self.assertEqual(hooks[1].name, "on_Snapshot__21_consolelog.daemon.bg.js")
self.assertEqual(hooks[2].name, "on_Snapshot__50_wget.py")
def test_normalize_hook_event_name_accepts_event_classes(self):
"""Hook discovery should normalize bus event class names to hook families."""
from archivebox import hooks as hooks_module
self.assertEqual(hooks_module.normalize_hook_event_name("InstallEvent"), "Install")
self.assertEqual(hooks_module.normalize_hook_event_name("BinaryRequestEvent"), "BinaryRequest")
self.assertEqual(hooks_module.normalize_hook_event_name("CrawlSetupEvent"), "CrawlSetup")
self.assertEqual(hooks_module.normalize_hook_event_name("SnapshotEvent"), "Snapshot")
def test_normalize_hook_event_name_strips_event_suffix_for_lifecycle_events(self):
"""Lifecycle event names should normalize via simple suffix stripping."""
from archivebox import hooks as hooks_module
self.assertEqual(hooks_module.normalize_hook_event_name("BinaryEvent"), "Binary")
self.assertEqual(hooks_module.normalize_hook_event_name("CrawlEvent"), "Crawl")
self.assertEqual(hooks_module.normalize_hook_event_name("SnapshotCleanupEvent"), "SnapshotCleanup")
self.assertEqual(hooks_module.normalize_hook_event_name("CrawlCleanupEvent"), "CrawlCleanup")
def test_get_plugins_includes_non_snapshot_plugin_dirs(self):
"""get_plugins() should include binary-only plugins with standardized metadata."""
env_dir = self.plugins_dir / "env"
env_dir.mkdir()
(env_dir / "on_Binary__15_env_discover.py").write_text("# binary hook")
(env_dir / "on_BinaryRequest__15_env.py").write_text("# binary hook")
(env_dir / "config.json").write_text('{"type": "object", "properties": {}}')
from archivebox import hooks as hooks_module
@@ -265,7 +283,7 @@ class TestHookDiscovery(unittest.TestCase):
npm_dir = self.plugins_dir / "npm"
npm_dir.mkdir()
(npm_dir / "on_Binary__10_npm_install.py").write_text("# npm binary hook")
(npm_dir / "on_BinaryRequest__10_npm.py").write_text("# npm binary hook")
(npm_dir / "config.json").write_text('{"type": "object", "properties": {}}')
from archivebox import hooks as hooks_module
@@ -275,13 +293,40 @@ class TestHookDiscovery(unittest.TestCase):
patch.object(hooks_module, "BUILTIN_PLUGINS_DIR", self.plugins_dir),
patch.object(hooks_module, "USER_PLUGINS_DIR", self.test_dir / "user_plugins"),
):
hooks = hooks_module.discover_hooks("Binary", config={"PLUGINS": "singlefile"})
hooks = hooks_module.discover_hooks("BinaryRequest", config={"PLUGINS": "singlefile"})
hook_names = [hook.name for hook in hooks]
self.assertIn("on_Binary__10_npm_install.py", hook_names)
self.assertIn("on_BinaryRequest__10_npm.py", hook_names)
def test_discover_crawl_hooks_only_include_declared_plugin_dependencies(self):
"""Crawl hook discovery should include required_plugins without broadening to provider plugins."""
def test_discover_hooks_accepts_event_class_names(self):
"""discover_hooks should accept InstallEvent / SnapshotEvent class names."""
from archivebox import hooks as hooks_module
hooks_module.get_plugins.cache_clear()
with (
patch.object(hooks_module, "BUILTIN_PLUGINS_DIR", self.plugins_dir),
patch.object(hooks_module, "USER_PLUGINS_DIR", self.test_dir / "user_plugins"),
):
install_hooks = hooks_module.discover_hooks("InstallEvent", filter_disabled=False)
snapshot_hooks = hooks_module.discover_hooks("SnapshotEvent", filter_disabled=False)
self.assertIn("on_Install__10_wget.finite.bg.py", [hook.name for hook in install_hooks])
self.assertIn("on_Snapshot__50_wget.py", [hook.name for hook in snapshot_hooks])
def test_discover_hooks_returns_empty_for_non_hook_lifecycle_events(self):
"""Lifecycle events without a hook family should return no hooks."""
from archivebox import hooks as hooks_module
hooks_module.get_plugins.cache_clear()
with (
patch.object(hooks_module, "BUILTIN_PLUGINS_DIR", self.plugins_dir),
patch.object(hooks_module, "USER_PLUGINS_DIR", self.test_dir / "user_plugins"),
):
self.assertEqual(hooks_module.discover_hooks("BinaryEvent", filter_disabled=False), [])
self.assertEqual(hooks_module.discover_hooks("CrawlCleanupEvent", filter_disabled=False), [])
def test_discover_install_hooks_only_include_declared_plugin_dependencies(self):
"""Install hook discovery should include required_plugins without broadening to provider plugins."""
responses_dir = self.plugins_dir / "responses"
responses_dir.mkdir()
(responses_dir / "config.json").write_text(
@@ -297,12 +342,12 @@ class TestHookDiscovery(unittest.TestCase):
chrome_dir = self.plugins_dir / "chrome"
chrome_dir.mkdir(exist_ok=True)
(chrome_dir / "config.json").write_text('{"type": "object", "properties": {}}')
(chrome_dir / "on_Crawl__70_chrome_install.finite.bg.py").write_text("# chrome crawl hook")
(chrome_dir / "on_Install__70_chrome.finite.bg.py").write_text("# chrome install hook")
npm_dir = self.plugins_dir / "npm"
npm_dir.mkdir()
(npm_dir / "on_Binary__10_npm_install.py").write_text("# npm binary hook")
(npm_dir / "on_Crawl__00_npm_install.py").write_text("# npm crawl hook")
(npm_dir / "on_BinaryRequest__10_npm.py").write_text("# npm binary hook")
(npm_dir / "on_Install__00_npm.py").write_text("# npm install hook")
(npm_dir / "config.json").write_text('{"type": "object", "properties": {}}')
from archivebox import hooks as hooks_module
@@ -312,11 +357,11 @@ class TestHookDiscovery(unittest.TestCase):
patch.object(hooks_module, "BUILTIN_PLUGINS_DIR", self.plugins_dir),
patch.object(hooks_module, "USER_PLUGINS_DIR", self.test_dir / "user_plugins"),
):
hooks = hooks_module.discover_hooks("Crawl", config={"PLUGINS": "responses"})
hooks = hooks_module.discover_hooks("Install", config={"PLUGINS": "responses"})
hook_names = [hook.name for hook in hooks]
self.assertIn("on_Crawl__70_chrome_install.finite.bg.py", hook_names)
self.assertNotIn("on_Crawl__00_npm_install.py", hook_names)
self.assertIn("on_Install__70_chrome.finite.bg.py", hook_names)
self.assertNotIn("on_Install__00_npm.py", hook_names)
class TestGetExtractorName(unittest.TestCase):

View File

@@ -478,7 +478,7 @@ class TestProcessCurrent(TestCase):
"""Process.proc should accept a script recorded in DB when wrapped by an interpreter in psutil."""
proc = Process.objects.create(
machine=Machine.current(),
cmd=["/tmp/on_Crawl__90_chrome_launch.daemon.bg.js", "--url=https://example.com/"],
cmd=["/tmp/on_CrawlSetup__90_chrome_launch.daemon.bg.js", "--url=https://example.com/"],
pid=12345,
status=Process.StatusChoices.RUNNING,
started_at=timezone.now(),
@@ -488,7 +488,7 @@ class TestProcessCurrent(TestCase):
os_proc.create_time.return_value = proc.started_at.timestamp()
os_proc.cmdline.return_value = [
"node",
"/tmp/on_Crawl__90_chrome_launch.daemon.bg.js",
"/tmp/on_CrawlSetup__90_chrome_launch.daemon.bg.js",
"--url=https://example.com/",
]

View File

@@ -295,13 +295,35 @@ def test_installed_binary_config_overrides_include_valid_installed_binaries(monk
binproviders="env",
status=Binary.StatusChoices.INSTALLED,
)
puppeteer_binary = Binary.objects.create(
machine=machine,
name="puppeteer",
abspath="/tmp/shared-lib/npm/node_modules/.bin/puppeteer",
version="24.40.0",
binprovider="npm",
binproviders="npm",
status=Binary.StatusChoices.INSTALLED,
)
ytdlp_binary = Binary.objects.create(
machine=machine,
name="yt-dlp",
abspath="/tmp/shared-lib/pip/venv/bin/yt-dlp",
version="2026.3.17",
binprovider="pip",
binproviders="pip",
status=Binary.StatusChoices.INSTALLED,
)
monkeypatch.setattr(Machine, "current", classmethod(lambda cls: machine))
monkeypatch.setattr(Path, "is_file", lambda self: str(self) in {sys.executable, mercury_binary.abspath, wget_binary.abspath})
monkeypatch.setattr(
Path,
"is_file",
lambda self: str(self) in {sys.executable, mercury_binary.abspath, wget_binary.abspath, puppeteer_binary.abspath, ytdlp_binary.abspath},
)
monkeypatch.setattr(
runner_module.os,
"access",
lambda path, mode: str(path) == sys.executable,
lambda path, mode: str(path) in {sys.executable, puppeteer_binary.abspath, ytdlp_binary.abspath},
)
overrides = runner_module._installed_binary_config_overrides(
@@ -316,8 +338,17 @@ def test_installed_binary_config_overrides_include_valid_installed_binaries(monk
)
assert overrides["MERCURY_BINARY"] == sys.executable
assert overrides["POSTLIGHT_PARSER_BINARY"] == sys.executable
assert "POSTLIGHT_PARSER_BINARY" not in overrides
assert "WGET_BINARY" not in overrides
assert overrides["LIB_DIR"] == "/tmp/shared-lib"
assert overrides["LIB_BIN_DIR"] == "/tmp/shared-lib/bin"
assert overrides["PIP_HOME"] == "/tmp/shared-lib/pip"
assert overrides["PIP_BIN_DIR"] == "/tmp/shared-lib/pip/venv/bin"
assert overrides["NPM_HOME"] == "/tmp/shared-lib/npm"
assert overrides["NPM_BIN_DIR"] == "/tmp/shared-lib/npm/node_modules/.bin"
assert overrides["NODE_MODULES_DIR"] == "/tmp/shared-lib/npm/node_modules"
assert overrides["NODE_MODULE_DIR"] == "/tmp/shared-lib/npm/node_modules"
assert overrides["NODE_PATH"] == "/tmp/shared-lib/npm/node_modules"
def test_run_snapshot_skips_descendant_when_max_size_already_reached(monkeypatch):
@@ -707,10 +738,10 @@ def test_abx_process_service_background_monitor_finishes_after_process_exit(monk
plugin_output_dir = tmp_path / "chrome"
plugin_output_dir.mkdir()
stdout_file = plugin_output_dir / "on_Crawl__90_chrome_launch.daemon.bg.stdout.log"
stderr_file = plugin_output_dir / "on_Crawl__90_chrome_launch.daemon.bg.stderr.log"
stdout_file = plugin_output_dir / "on_CrawlSetup__90_chrome_launch.daemon.bg.stdout.log"
stderr_file = plugin_output_dir / "on_CrawlSetup__90_chrome_launch.daemon.bg.stderr.log"
stderr_file.write_text("")
pid_file = plugin_output_dir / "on_Crawl__90_chrome_launch.daemon.bg.pid"
pid_file = plugin_output_dir / "on_CrawlSetup__90_chrome_launch.daemon.bg.pid"
pid_file.write_text("12345")
proc = AbxProcess(
@@ -719,12 +750,12 @@ def test_abx_process_service_background_monitor_finishes_after_process_exit(monk
timeout=60,
started_at=now_iso(),
plugin="chrome",
hook_name="on_Crawl__90_chrome_launch.daemon.bg",
hook_name="on_CrawlSetup__90_chrome_launch.daemon.bg",
)
process = FakeAsyncProcess()
event = SimpleNamespace(
plugin_name="chrome",
hook_name="on_Crawl__90_chrome_launch.daemon.bg",
hook_name="on_CrawlSetup__90_chrome_launch.daemon.bg",
hook_path="hook",
hook_args=["--url=https://example.org/"],
env={},

View File

@@ -644,7 +644,7 @@ Binary(queued) → BinaryMachine → Binary.run() → succeeded/failed
#### Benefits of Eliminating Dependency
1. **No global singleton conflicts**: Binary is per-machine, no race conditions
2. **Simpler data model**: One table instead of two (Dependency + InstalledBinary)
2. **Simpler data model**: One table instead of two (Dependency + Binary)
3. **Static configuration**: dependencies.jsonl in version control, not database
4. **Consistent state machine**: Binary follows same pattern as other models
5. **Cleaner hooks**: Hooks check bin_providers themselves instead of orchestrator parsing names

View File

@@ -1,6 +1,6 @@
[project]
name = "archivebox"
version = "0.9.10rc2"
version = "0.9.12rc1"
requires-python = ">=3.13"
description = "Self-hosted internet archiving solution."
authors = [{name = "Nick Sweeting", email = "pyproject.toml@archivebox.io"}]
@@ -78,10 +78,10 @@ dependencies = [
"w3lib>=2.2.1", # used for parsing content-type encoding from http response headers & html tags
### Extractor dependencies (optional binaries detected at runtime via shutil.which)
### Binary/Package Management
"abxbus>=2.4.2", # explicit direct dep so local dev env resolves sibling abxbus repo, matching abx-dl EventBus API
"abx-pkg>=1.9.18", # for: detecting, versioning, and installing binaries via apt/brew/pip/npm
"abx-plugins>=1.10.14", # shared ArchiveBox plugin package with install_args-only overrides
"abx-dl>=1.10.14", # shared ArchiveBox downloader package with install_args-only overrides
"abxbus>=2.4.9", # explicit direct dep so local dev env resolves sibling abxbus repo, matching abx-dl EventBus API
"abx-pkg>=1.9.19", # for: detecting, versioning, and installing binaries via apt/brew/pip/npm
"abx-plugins>=1.10.19", # shared ArchiveBox plugin package with install_args-only overrides
"abx-dl>=1.10.19", # shared ArchiveBox downloader package with install_args-only overrides
### UUID7 backport for Python <3.14
"uuid7>=0.1.0; python_version < '3.14'", # provides the uuid_extensions module on Python 3.13
]

72
uv.lock generated
View File

@@ -14,7 +14,7 @@ supported-markers = [
[[package]]
name = "abx-dl"
version = "1.10.14"
version = "1.10.19"
source = { editable = "../abx-dl" }
dependencies = [
{ name = "abx-pkg", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -59,7 +59,7 @@ dev = [
[[package]]
name = "abx-pkg"
version = "1.9.18"
version = "1.9.19"
source = { editable = "../abx-pkg" }
dependencies = [
{ name = "pip", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -101,7 +101,7 @@ dev = [
[[package]]
name = "abx-plugins"
version = "1.10.14"
version = "1.10.19"
source = { editable = "../abx-plugins" }
dependencies = [
{ name = "abx-pkg", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -130,7 +130,7 @@ dev = [{ name = "prek", specifier = ">=0.3.6" }]
[[package]]
name = "abxbus"
version = "2.4.8"
version = "2.4.9"
source = { editable = "../abxbus" }
dependencies = [
{ name = "aiofiles", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -218,7 +218,7 @@ wheels = [
[[package]]
name = "archivebox"
version = "0.9.10rc2"
version = "0.9.12rc1"
source = { editable = "." }
dependencies = [
{ name = "abx-dl", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -524,21 +524,21 @@ wheels = [
[[package]]
name = "cbor2"
version = "5.8.0"
version = "5.9.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/d9/8e/8b4fdde28e42ffcd741a37f4ffa9fb59cd4fe01625b544dfcfd9ccb54f01/cbor2-5.8.0.tar.gz", hash = "sha256:b19c35fcae9688ac01ef75bad5db27300c2537eb4ee00ed07e05d8456a0d4931", size = 107825, upload-time = "2025-12-30T18:44:22.455Z" }
sdist = { url = "https://files.pythonhosted.org/packages/bd/cb/09939728be094d155b5d4ac262e39877875f5f7e36eea66beb359f647bd0/cbor2-5.9.0.tar.gz", hash = "sha256:85c7a46279ac8f226e1059275221e6b3d0e370d2bb6bd0500f9780781615bcea", size = 111231, upload-time = "2026-03-22T15:56:50.638Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/a6/0d/5a3f20bafaefeb2c1903d961416f051c0950f0d09e7297a3aa6941596b29/cbor2-5.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6d8d104480845e2f28c6165b4c961bbe58d08cb5638f368375cfcae051c28015", size = 70332, upload-time = "2025-12-30T18:43:54.694Z" },
{ url = "https://files.pythonhosted.org/packages/57/66/177a3f089e69db69c987453ab4934086408c3338551e4984734597be9f80/cbor2-5.8.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:43efee947e5ab67d406d6e0dc61b5dee9d2f5e89ae176f90677a3741a20ca2e7", size = 285985, upload-time = "2025-12-30T18:43:55.733Z" },
{ url = "https://files.pythonhosted.org/packages/b7/8e/9e17b8e4ed80a2ce97e2dfa5915c169dbb31599409ddb830f514b57f96cc/cbor2-5.8.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:be7ae582f50be539e09c134966d0fd63723fc4789b8dff1f6c2e3f24ae3eaf32", size = 285173, upload-time = "2025-12-30T18:43:57.321Z" },
{ url = "https://files.pythonhosted.org/packages/cc/33/9f92e107d78f88ac22723ac15d0259d220ba98c1d855e51796317f4c4114/cbor2-5.8.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:50f5c709561a71ea7970b4cd2bf9eda4eccacc0aac212577080fdfe64183e7f5", size = 278395, upload-time = "2025-12-30T18:43:58.497Z" },
{ url = "https://files.pythonhosted.org/packages/2f/3f/46b80050a4a35ce5cf7903693864a9fdea7213567dc8faa6e25cb375c182/cbor2-5.8.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a6790ecc73aa93e76d2d9076fc42bf91a9e69f2295e5fa702e776dbe986465bd", size = 278330, upload-time = "2025-12-30T18:43:59.656Z" },
{ url = "https://files.pythonhosted.org/packages/4b/0c/0654233d7543ac8a50f4785f172430ddc97538ba418eb305d6e529d1a120/cbor2-5.8.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ad72381477133046ce217617d839ea4e9454f8b77d9a6351b229e214102daeb7", size = 70710, upload-time = "2025-12-30T18:44:03.209Z" },
{ url = "https://files.pythonhosted.org/packages/84/62/4671d24e557d7f5a74a01b422c538925140c0495e57decde7e566f91d029/cbor2-5.8.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6da25190fad3434ce99876b11d4ca6b8828df6ca232cf7344cd14ae1166fb718", size = 285005, upload-time = "2025-12-30T18:44:05.109Z" },
{ url = "https://files.pythonhosted.org/packages/87/85/0c67d763a08e848c9a80d7e4723ba497cce676f41bc7ca1828ae90a0a872/cbor2-5.8.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c13919e3a24c5a6d286551fa288848a4cedc3e507c58a722ccd134e461217d99", size = 282435, upload-time = "2025-12-30T18:44:06.465Z" },
{ url = "https://files.pythonhosted.org/packages/b2/01/0650972b4dbfbebcfbe37cbba7fc3cd9019a8da6397ab3446e07175e342b/cbor2-5.8.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f8c40d32e5972047a777f9bf730870828f3cf1c43b3eb96fd0429c57a1d3b9e6", size = 277493, upload-time = "2025-12-30T18:44:07.609Z" },
{ url = "https://files.pythonhosted.org/packages/b3/6c/7704a4f32adc7f10f3b41ec067f500a4458f7606397af5e4cf2d368fd288/cbor2-5.8.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7627894bc0b3d5d0807f31e3107e11b996205470c4429dc2bb4ef8bfe7f64e1e", size = 276085, upload-time = "2025-12-30T18:44:09.021Z" },
{ url = "https://files.pythonhosted.org/packages/d6/4f/101071f880b4da05771128c0b89f41e334cff044dee05fb013c8f4be661c/cbor2-5.8.0-py3-none-any.whl", hash = "sha256:3727d80f539567b03a7aa11890e57798c67092c38df9e6c23abb059e0f65069c", size = 24374, upload-time = "2025-12-30T18:44:21.476Z" },
{ url = "https://files.pythonhosted.org/packages/81/c5/4901e21a8afe9448fd947b11e8f383903207cd6dd0800e5f5a386838de5b/cbor2-5.9.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:fbb06f34aa645b4deca66643bba3d400d20c15312d1fe88d429be60c1ab50f27", size = 71284, upload-time = "2026-03-22T15:56:22.836Z" },
{ url = "https://files.pythonhosted.org/packages/1b/10/df643a381aebc3f05486de4813662bc58accb640fc3275cb276a75e89694/cbor2-5.9.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ac684fe195c39821fca70d18afbf748f728aefbfbf88456018d299e559b8cae0", size = 287682, upload-time = "2026-03-22T15:56:24.024Z" },
{ url = "https://files.pythonhosted.org/packages/c6/0c/8aa6b766059ae4a0ca1ec3ff96fe3823a69a7be880dba2e249f7fbe2700b/cbor2-5.9.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2a54fbb32cb828c214f7f333a707e4aec61182e7efdc06ea5d9596d3ecee624a", size = 288009, upload-time = "2026-03-22T15:56:25.305Z" },
{ url = "https://files.pythonhosted.org/packages/74/07/6236bc25c183a9cf7e8062e5dddf9eae9b0b14ebf14a58a69fe5a1e872c6/cbor2-5.9.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4753a6d1bc71054d9179557bc65740860f185095ccb401d46637fff028a5b3ec", size = 280437, upload-time = "2026-03-22T15:56:26.479Z" },
{ url = "https://files.pythonhosted.org/packages/4e/0a/84328d23c3c68874ac6497edb9b1900579a1028efa54734df3f1762bbc15/cbor2-5.9.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:380e534482b843e43442b87d8777a7bf9bed20cb7526f89b780c3400f617304b", size = 282247, upload-time = "2026-03-22T15:56:28.644Z" },
{ url = "https://files.pythonhosted.org/packages/08/7d/9ccc36d10ef96e6038e48046ebe1ce35a1e7814da0e1e204d09e6ef09b8d/cbor2-5.9.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:23606d31ba1368bd1b6602e3020ee88fe9523ca80e8630faf6b2fc904fd84560", size = 71500, upload-time = "2026-03-22T15:56:31.876Z" },
{ url = "https://files.pythonhosted.org/packages/70/e1/a6cca2cc72e13f00030c6a649f57ae703eb2c620806ab70c40db8eab33fa/cbor2-5.9.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0322296b9d52f55880e300ba8ba09ecf644303b99b51138bbb1c0fb644fa7c3e", size = 286953, upload-time = "2026-03-22T15:56:33.292Z" },
{ url = "https://files.pythonhosted.org/packages/08/3c/24cd5ef488a957d90e016f200a3aad820e4c2f85edd61c9fe4523007a1ee/cbor2-5.9.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:422817286c1d0ce947fb2f7eca9212b39bddd7231e8b452e2d2cc52f15332dba", size = 285454, upload-time = "2026-03-22T15:56:34.703Z" },
{ url = "https://files.pythonhosted.org/packages/a4/35/dca96818494c0ba47cdd73e8d809b27fa91f8fa0ce32a068a09237687454/cbor2-5.9.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9a4907e0c3035bb8836116854ed8e56d8aef23909d601fa59706320897ec2551", size = 279441, upload-time = "2026-03-22T15:56:35.888Z" },
{ url = "https://files.pythonhosted.org/packages/a4/44/d3362378b16e53cf7e535a3f5aed8476e2109068154e24e31981ef5bde9e/cbor2-5.9.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:fb7afe77f8d269e42d7c4b515c6fd14f1ccc0625379fb6829b269f493d16eddd", size = 279673, upload-time = "2026-03-22T15:56:37.08Z" },
{ url = "https://files.pythonhosted.org/packages/42/ff/b83492b096fbef26e9cb62c1a4bf2d3cef579ea7b33138c6c37c4ae66f67/cbor2-5.9.0-py3-none-any.whl", hash = "sha256:27695cbd70c90b8de5c4a284642c2836449b14e2c2e07e3ffe0744cb7669a01b", size = 24627, upload-time = "2026-03-22T15:56:48.847Z" },
]
[[package]]
@@ -1279,7 +1279,7 @@ wheels = [
[[package]]
name = "logfire"
version = "4.29.0"
version = "4.30.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "executing", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -1290,9 +1290,9 @@ dependencies = [
{ name = "rich", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/8f/40/3d09fe09cfa63753feada2d41dd909ce0741dd5731014a4b3eb31bdee977/logfire-4.29.0.tar.gz", hash = "sha256:18a306a0b5744aee8ad0a8f5d6b3a47a6d8951c340eaecc42dc5d0224f4bdca0", size = 1057563, upload-time = "2026-03-13T15:30:24.343Z" }
sdist = { url = "https://files.pythonhosted.org/packages/03/77/ed3b6453c0c8027724ceb968ca17e550c47e58cdb5dc27458392db40e327/logfire-4.30.0.tar.gz", hash = "sha256:460ed1a7433d88570659903f31b6f9b70903110addbb18b1cf7b414cdb516bb5", size = 1058676, upload-time = "2026-03-23T17:08:28.944Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/73/aa/fb8102ea48924fbbb9dfced7bada5717875801808ad53f9a60b6b4fec440/logfire-4.29.0-py3-none-any.whl", hash = "sha256:8dd7fdf6bed21459b8893eaa290d61977b9ebcc901844e365ddee868b5d8bca8", size = 302227, upload-time = "2026-03-13T15:30:20.742Z" },
{ url = "https://files.pythonhosted.org/packages/d9/3a/ead5b87ff38292e0ef800b1d184a9a4eedf9f7ce1cf86264b4798a0a8b14/logfire-4.30.0-py3-none-any.whl", hash = "sha256:a520a2b6da7765bc15143fd4098c6f9ec56a836bf3a046f06c823c73af932f3a", size = 302618, upload-time = "2026-03-23T17:08:25.923Z" },
]
[package.optional-dependencies]
@@ -1727,23 +1727,23 @@ wheels = [
[[package]]
name = "prek"
version = "0.3.6"
version = "0.3.8"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/ab/e4/983840179c652feb9793c95b88abfe4b1f1d1aed7a791b45db97241be1a0/prek-0.3.6.tar.gz", hash = "sha256:bdf5c1e13ba0c04c2f488c5f90b1fd97a72aa740dc373b17fbbfc51898fa0377", size = 378106, upload-time = "2026-03-16T08:31:54.302Z" }
sdist = { url = "https://files.pythonhosted.org/packages/62/ee/03e8180e3fda9de25b6480bd15cc2bde40d573868d50648b0e527b35562f/prek-0.3.8.tar.gz", hash = "sha256:434a214256516f187a3ab15f869d950243be66b94ad47987ee4281b69643a2d9", size = 400224, upload-time = "2026-03-23T08:23:35.981Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/04/05/157631f14fef32361a36956368a1e6559d857443d7585bc4c9225f4a4a18/prek-0.3.6-py3-none-linux_armv6l.whl", hash = "sha256:1713119cf0c390486786f4c84450ea584bcdf43979cc28e1350ec62e5d9a41ed", size = 5126301, upload-time = "2026-03-16T08:31:31.194Z" },
{ url = "https://files.pythonhosted.org/packages/54/f0/0918501708994d165c4bfc64c5749a263d04a08ae1196f3ad3b2e0d93b12/prek-0.3.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:b68ef211fa60c53ec8866dcf38bacd8cb86b14f0e2b5491dd7a42370bee32e3e", size = 5527520, upload-time = "2026-03-16T08:31:41.948Z" },
{ url = "https://files.pythonhosted.org/packages/e3/9f/0d8ed2eaea58d8a7c5a3b0129914b7a73cd1a1fc7513a1d6b1efa0ec4ce4/prek-0.3.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:327b9030c3424c9fbcdf962992288295e89afe54fa94a7e0928e2691d1d2b53d", size = 5120490, upload-time = "2026-03-16T08:31:29.808Z" },
{ url = "https://files.pythonhosted.org/packages/d8/d5/63e21d19687816082df5bfd234f451b17858b37f500e2a8845cda1a031db/prek-0.3.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:61de3f019f5a082688654139fd9a3e03f74dbd4a09533667714d28833359114d", size = 5355957, upload-time = "2026-03-16T08:31:37.408Z" },
{ url = "https://files.pythonhosted.org/packages/e2/0e/bb52a352e5d7dc92eaebb69aeef4e5b7cddc47c646e24fe9d6a61956b45d/prek-0.3.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5bbba688c5283c8e8c907fb00f7c79fce630129f27f77cbee67e356fcfdedea8", size = 5055675, upload-time = "2026-03-16T08:31:40.311Z" },
{ url = "https://files.pythonhosted.org/packages/34/8b/7c2a49314eb4909d50ee1c2171e00d524f9e080a5be598effbe36158d35c/prek-0.3.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5dfe26bc2675114734fa626e7dc635f76e53a28fed7470ba6f32caf2f29cc21f", size = 5459285, upload-time = "2026-03-16T08:31:32.764Z" },
{ url = "https://files.pythonhosted.org/packages/70/11/86cbf205b111f93d45b5c04a61ea2cdcf12970b11277fa6a8eef1b8aaa0d/prek-0.3.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3f8121060b4610411a936570ebb03b0f78c1b637c25d4914885b3bba127cb554", size = 6391127, upload-time = "2026-03-16T08:31:52.587Z" },
{ url = "https://files.pythonhosted.org/packages/0a/d3/bae4a351b9b095e317ad294817d3dff980d73a907a0449b49a9549894a80/prek-0.3.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a38d8061caae4ffd757316b9ef65409d808ae92482386385413365bad033c26", size = 5734755, upload-time = "2026-03-16T08:31:34.387Z" },
{ url = "https://files.pythonhosted.org/packages/ea/48/5b1d6d91407e14f86daf580a93f073d00b70f4dca8ff441d40971652a38e/prek-0.3.6-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:3d9e3b5031608657bec5d572fa45a41b6c7ddbe98f925f8240addbf57af55ea7", size = 5362190, upload-time = "2026-03-16T08:31:49.403Z" },
{ url = "https://files.pythonhosted.org/packages/08/18/38d6ea85770bb522d3dad18e8bbe435365e1e3e88f67716c2d8c2e57a36a/prek-0.3.6-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:a581d2903be460a236748fb3cfcb5b7dbe5b4af2409f06c0427b637676d4b78a", size = 5181858, upload-time = "2026-03-16T08:31:43.515Z" },
{ url = "https://files.pythonhosted.org/packages/3b/61/7179e9faffa3722a96fee8d9cebdb3982390410b85fc2aaeacfe49c361b5/prek-0.3.6-py3-none-musllinux_1_1_armv7l.whl", hash = "sha256:d663f1c467dccbd414ab0caa323230f33aa27797c575d98af1013866e1f83a12", size = 5023469, upload-time = "2026-03-16T08:31:35.975Z" },
{ url = "https://files.pythonhosted.org/packages/ad/69/8a496892f8c9c898dea8cfe4917bbd58808367975132457b5ab5ac095269/prek-0.3.6-py3-none-musllinux_1_1_i686.whl", hash = "sha256:cbc7f0b344432630e990a6c6dd512773fbb7253c8df3c3f78eedd80b115ed3c9", size = 5322570, upload-time = "2026-03-16T08:31:51.034Z" },
{ url = "https://files.pythonhosted.org/packages/95/ee/f174bcfd73e8337a4290cb7eaf70b37aaec228e4f5d5ec6e61e0546ee896/prek-0.3.6-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:6ef02ce9d2389daae85f099fd4f34aa5537e3670b5e2a3174c9110ce69958c10", size = 5848197, upload-time = "2026-03-16T08:31:44.975Z" },
{ url = "https://files.pythonhosted.org/packages/00/84/40d2ddf362d12c4cd4a25a8c89a862edf87cdfbf1422aa41aac8e315d409/prek-0.3.8-py3-none-linux_armv6l.whl", hash = "sha256:6fb646ada60658fa6dd7771b2e0fb097f005151be222f869dada3eb26d79ed33", size = 5226646, upload-time = "2026-03-23T08:23:18.306Z" },
{ url = "https://files.pythonhosted.org/packages/e1/52/7308a033fa43b7e8e188797bd2b3b017c0f0adda70fa7af575b1f43ea888/prek-0.3.8-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:f3d7fdadb15efc19c09953c7a33cf2061a70f367d1e1957358d3ad5cc49d0616", size = 5620104, upload-time = "2026-03-23T08:23:40.053Z" },
{ url = "https://files.pythonhosted.org/packages/ff/b1/f106ac000a91511a9cd80169868daf2f5b693480ef5232cec5517a38a512/prek-0.3.8-py3-none-macosx_11_0_arm64.whl", hash = "sha256:72728c3295e79ca443f8c1ec037d2a5b914ec73a358f69cf1bc1964511876bf8", size = 5199867, upload-time = "2026-03-23T08:23:38.066Z" },
{ url = "https://files.pythonhosted.org/packages/b3/e9/970713f4b019f69de9844e1bab37b8ddb67558e410916f4eb5869a696165/prek-0.3.8-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:48efc28f2f53b5b8087efca9daaed91572d62df97d5f24a1c7a087fecb5017de", size = 5441801, upload-time = "2026-03-23T08:23:32.617Z" },
{ url = "https://files.pythonhosted.org/packages/12/a4/7ef44032b181753e19452ec3b09abb3a32607cf6b0a0508f0604becaaf2b/prek-0.3.8-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f6ca9d63bacbc448a5c18e955c78d3ac5176c3a17c3baacdd949b1a623e08a36", size = 5155107, upload-time = "2026-03-23T08:23:31.021Z" },
{ url = "https://files.pythonhosted.org/packages/bd/77/4d9c8985dbba84149760785dfe07093ea1e29d710257dfb7c89615e2234c/prek-0.3.8-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1000f7029696b4fe712fb1fefd4c55b9c4de72b65509c8e50296370a06f9dc3f", size = 5566541, upload-time = "2026-03-23T08:23:45.694Z" },
{ url = "https://files.pythonhosted.org/packages/1a/1a/81e6769ac1f7f8346d09ce2ab0b47cf06466acd9ff72e87e5d1f0d98cd32/prek-0.3.8-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6ff0bed0e2c1286522987d982168a86cbbd0d069d840506a46c9fda983515517", size = 6552991, upload-time = "2026-03-23T08:23:21.958Z" },
{ url = "https://files.pythonhosted.org/packages/6f/fa/ce2df0dd2dc75a9437a52463239d0782998943d7b04e191fb89b83016c34/prek-0.3.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4fb087ac0ffda3ac65bbbae9a38326a7fd27ee007bb4a94323ce1eb539d8bbec", size = 5832972, upload-time = "2026-03-23T08:23:20.258Z" },
{ url = "https://files.pythonhosted.org/packages/18/6b/9d4269df9073216d296244595a21c253b6475dfc9076c0bd2906be7a436c/prek-0.3.8-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:2e1e5e206ff7b31bd079cce525daddc96cd6bc544d20dc128921ad92f7a4c85d", size = 5448371, upload-time = "2026-03-23T08:23:41.835Z" },
{ url = "https://files.pythonhosted.org/packages/60/1d/1e4d8a78abefa5b9d086e5a9f1638a74b5e540eec8a648d9946707701f29/prek-0.3.8-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:dcea3fe23832a4481bccb7c45f55650cb233be7c805602e788bb7dba60f2d861", size = 5270546, upload-time = "2026-03-23T08:23:24.231Z" },
{ url = "https://files.pythonhosted.org/packages/77/07/34f36551a6319ae36e272bea63a42f59d41d2d47ab0d5fb00eb7b4e88e87/prek-0.3.8-py3-none-musllinux_1_1_armv7l.whl", hash = "sha256:4d25e647e9682f6818ab5c31e7a4b842993c14782a6ffcd128d22b784e0d677f", size = 5124032, upload-time = "2026-03-23T08:23:26.368Z" },
{ url = "https://files.pythonhosted.org/packages/e3/01/6d544009bb655e709993411796af77339f439526db4f3b3509c583ad8eb9/prek-0.3.8-py3-none-musllinux_1_1_i686.whl", hash = "sha256:de528b82935e33074815acff3c7c86026754d1212136295bc88fe9c43b4231d5", size = 5432245, upload-time = "2026-03-23T08:23:47.877Z" },
{ url = "https://files.pythonhosted.org/packages/54/96/1237ee269e9bfa283ffadbcba1f401f48a47aed2b2563eb1002740d6079d/prek-0.3.8-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:6d660f1c25a126e6d9f682fe61449441226514f412a4469f5d71f8f8cad56db2", size = 5950550, upload-time = "2026-03-23T08:23:43.8Z" },
]
[[package]]