mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
split CrawlSetup into Install phase with new Binary + BinaryRequest events
This commit is contained in:
@@ -8,127 +8,32 @@ import rich_click as click
|
||||
from archivebox.misc.util import docstring, enforce_types
|
||||
|
||||
|
||||
# State Machine ASCII Art Diagrams
|
||||
CRAWL_MACHINE_DIAGRAM = """
|
||||
EVENT_FLOW_DIAGRAM = """
|
||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ CrawlMachine │
|
||||
│ ArchiveBox / abx-dl Flow │
|
||||
├─────────────────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ ┌─────────────┐ │
|
||||
│ │ QUEUED │◄────────────────┐ │
|
||||
│ │ (initial) │ │ │
|
||||
│ └──────┬──────┘ │ │
|
||||
│ │ │ tick() unless can_start() │
|
||||
│ │ tick() when │ │
|
||||
│ │ can_start() │ │
|
||||
│ ▼ │ │
|
||||
│ ┌─────────────┐ │ │
|
||||
│ │ STARTED │─────────────────┘ │
|
||||
│ │ │◄────────────────┐ │
|
||||
│ │ enter: │ │ │
|
||||
│ │ crawl.run()│ │ tick() unless is_finished() │
|
||||
│ │ (discover │ │ │
|
||||
│ │ Crawl │─────────────────┘ │
|
||||
│ │ hooks) │ │
|
||||
│ └──────┬──────┘ │
|
||||
│ │ │
|
||||
│ │ tick() when is_finished() │
|
||||
│ ▼ │
|
||||
│ ┌─────────────┐ │
|
||||
│ │ SEALED │ │
|
||||
│ │ (final) │ │
|
||||
│ │ │ │
|
||||
│ │ enter: │ │
|
||||
│ │ cleanup() │ │
|
||||
│ └─────────────┘ │
|
||||
│ InstallEvent │
|
||||
│ └─ on_Install__* │
|
||||
│ └─ BinaryRequest records │
|
||||
│ └─ BinaryRequestEvent │
|
||||
│ └─ on_BinaryRequest__* │
|
||||
│ └─ BinaryEvent / MachineEvent │
|
||||
│ │
|
||||
│ Hooks triggered: on_Crawl__* (during STARTED.enter via crawl.run()) │
|
||||
│ on_CrawlEnd__* (during SEALED.enter via cleanup()) │
|
||||
└─────────────────────────────────────────────────────────────────────────────┘
|
||||
"""
|
||||
|
||||
SNAPSHOT_MACHINE_DIAGRAM = """
|
||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ SnapshotMachine │
|
||||
├─────────────────────────────────────────────────────────────────────────────┤
|
||||
│ CrawlEvent │
|
||||
│ └─ CrawlSetupEvent │
|
||||
│ └─ on_CrawlSetup__* │
|
||||
│ │
|
||||
│ ┌─────────────┐ │
|
||||
│ │ QUEUED │◄────────────────┐ │
|
||||
│ │ (initial) │ │ │
|
||||
│ └──────┬──────┘ │ │
|
||||
│ │ │ tick() unless can_start() │
|
||||
│ │ tick() when │ │
|
||||
│ │ can_start() │ │
|
||||
│ ▼ │ │
|
||||
│ ┌─────────────┐ │ │
|
||||
│ │ STARTED │─────────────────┘ │
|
||||
│ │ │◄────────────────┐ │
|
||||
│ │ enter: │ │ │
|
||||
│ │ snapshot │ │ tick() unless is_finished() │
|
||||
│ │ .run() │ │ │
|
||||
│ │ (discover │─────────────────┘ │
|
||||
│ │ Snapshot │ │
|
||||
│ │ hooks, │ │
|
||||
│ │ create │ │
|
||||
│ │ pending │ │
|
||||
│ │ results) │ │
|
||||
│ └──────┬──────┘ │
|
||||
│ │ │
|
||||
│ │ tick() when is_finished() │
|
||||
│ ▼ │
|
||||
│ ┌─────────────┐ │
|
||||
│ │ SEALED │ │
|
||||
│ │ (final) │ │
|
||||
│ │ │ │
|
||||
│ │ enter: │ │
|
||||
│ │ cleanup() │ │
|
||||
│ └─────────────┘ │
|
||||
│ CrawlStartEvent │
|
||||
│ └─ SnapshotEvent │
|
||||
│ └─ on_Snapshot__* │
|
||||
│ └─ Snapshot / ArchiveResult / Tag / Machine / BinaryRequest │
|
||||
│ │
|
||||
│ Hooks triggered: on_Snapshot__* (creates ArchiveResults in STARTED.enter) │
|
||||
└─────────────────────────────────────────────────────────────────────────────┘
|
||||
"""
|
||||
|
||||
BINARY_MACHINE_DIAGRAM = """
|
||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ BinaryMachine │
|
||||
├─────────────────────────────────────────────────────────────────────────────┤
|
||||
│ SnapshotCleanupEvent -> internal cleanup, no direct hook family │
|
||||
│ CrawlCleanupEvent -> internal cleanup, no direct hook family │
|
||||
│ │
|
||||
│ ┌─────────────┐ │
|
||||
│ │ QUEUED │◄────────────────┐ │
|
||||
│ │ (initial) │ │ │
|
||||
│ └──────┬──────┘ │ │
|
||||
│ │ │ tick() unless can_install() │
|
||||
│ │ │ (stays queued if failed) │
|
||||
│ │ tick() when │ │
|
||||
│ │ can_install() │ │
|
||||
│ │ │ │
|
||||
│ │ on_install() runs │ │
|
||||
│ │ during transition: │ │
|
||||
│ │ • binary.run() │ │
|
||||
│ │ (discover Binary │ │
|
||||
│ │ hooks, try each │ │
|
||||
│ │ provider until │ │
|
||||
│ │ one succeeds) │ │
|
||||
│ │ • Sets abspath, │ │
|
||||
│ │ version, sha256 │ │
|
||||
│ │ │ │
|
||||
│ │ If install fails: │ │
|
||||
│ │ raises exception──────┘ │
|
||||
│ │ (retry_at bumped) │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ ┌─────────────┐ │
|
||||
│ │ INSTALLED │ │
|
||||
│ │ (final) │ │
|
||||
│ │ │ │
|
||||
│ │ Binary is │ │
|
||||
│ │ ready to │ │
|
||||
│ │ use │ │
|
||||
│ └─────────────┘ │
|
||||
│ │
|
||||
│ Hooks triggered: on_Binary__* (provider hooks during transition) │
|
||||
│ Providers tried in sequence until one succeeds: apt, brew, pip, npm, etc. │
|
||||
│ Installation is synchronous - no intermediate STARTED state │
|
||||
│ ArchiveBox projects bus events into the DB; it no longer drives plugin │
|
||||
│ execution through the old queued model executor. │
|
||||
└─────────────────────────────────────────────────────────────────────────────┘
|
||||
"""
|
||||
|
||||
@@ -136,15 +41,16 @@ BINARY_MACHINE_DIAGRAM = """
|
||||
@enforce_types
|
||||
def pluginmap(
|
||||
show_disabled: bool = False,
|
||||
model: str | None = None,
|
||||
event: str | None = None,
|
||||
quiet: bool = False,
|
||||
) -> dict:
|
||||
"""
|
||||
Show a map of all state machines and their associated plugin hooks.
|
||||
Show the current abx-dl event phases and their associated plugin hooks.
|
||||
|
||||
Displays ASCII art diagrams of the core queued model state machines (Crawl,
|
||||
Snapshot, Binary) and lists all auto-detected on_Modelname_xyz hooks
|
||||
that will run for each model's transitions.
|
||||
This command reflects the new bus-driven runtime, not the legacy ArchiveBox
|
||||
state-machine executor. Event names are normalized to hook prefixes by
|
||||
stripping a trailing `Event`, then ArchiveBox checks whether any matching
|
||||
`on_{EventFamily}__*` scripts actually exist.
|
||||
"""
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
@@ -152,49 +58,65 @@ def pluginmap(
|
||||
from rich import box
|
||||
|
||||
from archivebox.hooks import (
|
||||
discover_hooks,
|
||||
is_background_hook,
|
||||
BUILTIN_PLUGINS_DIR,
|
||||
USER_PLUGINS_DIR,
|
||||
discover_hooks,
|
||||
is_background_hook,
|
||||
normalize_hook_event_name,
|
||||
)
|
||||
|
||||
console = Console()
|
||||
prnt = console.print
|
||||
|
||||
# Model event types that can have hooks
|
||||
model_events = {
|
||||
"Crawl": {
|
||||
"description": "Hooks run when a Crawl starts (QUEUED→STARTED)",
|
||||
"machine": "CrawlMachine",
|
||||
"diagram": CRAWL_MACHINE_DIAGRAM,
|
||||
event_phases = {
|
||||
"InstallEvent": {
|
||||
"description": "Pre-run dependency phase. on_Install hooks request binaries and update machine config.",
|
||||
"emits": ["BinaryRequestEvent", "BinaryEvent", "MachineEvent", "ProcessEvent"],
|
||||
},
|
||||
"CrawlEnd": {
|
||||
"description": "Hooks run when a Crawl finishes (STARTED→SEALED)",
|
||||
"machine": "CrawlMachine",
|
||||
"diagram": None, # Part of CrawlMachine
|
||||
"BinaryRequestEvent": {
|
||||
"description": "Provider phase. on_BinaryRequest hooks resolve or install requested binaries.",
|
||||
"emits": ["BinaryEvent", "MachineEvent", "ProcessEvent"],
|
||||
},
|
||||
"Snapshot": {
|
||||
"description": "Hooks run for each Snapshot (creates ArchiveResults)",
|
||||
"machine": "SnapshotMachine",
|
||||
"diagram": SNAPSHOT_MACHINE_DIAGRAM,
|
||||
"BinaryEvent": {
|
||||
"description": "Resolved binary metadata event. Projected into the DB/runtime config.",
|
||||
"emits": [],
|
||||
},
|
||||
"Binary": {
|
||||
"description": "Hooks for installing binary dependencies (providers)",
|
||||
"machine": "BinaryMachine",
|
||||
"diagram": BINARY_MACHINE_DIAGRAM,
|
||||
"CrawlEvent": {
|
||||
"description": "Root crawl lifecycle event emitted by the runner.",
|
||||
"emits": ["CrawlSetupEvent", "CrawlStartEvent", "CrawlCleanupEvent", "CrawlCompletedEvent"],
|
||||
},
|
||||
"CrawlSetupEvent": {
|
||||
"description": "Crawl-scoped setup phase. on_CrawlSetup hooks launch/configure shared daemons and runtime state.",
|
||||
"emits": ["MachineEvent", "ProcessEvent"],
|
||||
},
|
||||
"SnapshotEvent": {
|
||||
"description": "Per-snapshot extraction phase. on_Snapshot hooks emit ArchiveResult, Snapshot, Tag, Machine, and BinaryRequest records.",
|
||||
"emits": ["ArchiveResultEvent", "SnapshotEvent", "TagEvent", "MachineEvent", "BinaryRequestEvent", "ProcessEvent"],
|
||||
},
|
||||
"SnapshotCleanupEvent": {
|
||||
"description": "Internal snapshot cleanup phase.",
|
||||
"emits": ["ProcessKillEvent"],
|
||||
},
|
||||
"CrawlCleanupEvent": {
|
||||
"description": "Internal crawl cleanup phase.",
|
||||
"emits": ["ProcessKillEvent"],
|
||||
},
|
||||
}
|
||||
|
||||
# Filter to specific model if requested
|
||||
if model:
|
||||
model = model.title()
|
||||
if model not in model_events:
|
||||
prnt(f'[red]Error: Unknown model "{model}". Available: {", ".join(model_events.keys())}[/red]')
|
||||
return {}
|
||||
model_events = {model: model_events[model]}
|
||||
if event:
|
||||
requested = str(event).strip()
|
||||
if requested in event_phases:
|
||||
event_phases = {requested: event_phases[requested]}
|
||||
else:
|
||||
normalized_requested = normalize_hook_event_name(requested)
|
||||
matched_name = next((name for name in event_phases if normalize_hook_event_name(name) == normalized_requested), None)
|
||||
if matched_name is None:
|
||||
prnt(f'[red]Error: Unknown event "{requested}". Available: {", ".join(event_phases.keys())}[/red]')
|
||||
return {}
|
||||
event_phases = {matched_name: event_phases[matched_name]}
|
||||
|
||||
result = {
|
||||
"models": {},
|
||||
"events": {},
|
||||
"plugins_dir": str(BUILTIN_PLUGINS_DIR),
|
||||
"user_plugins_dir": str(USER_PLUGINS_DIR),
|
||||
}
|
||||
@@ -205,88 +127,83 @@ def pluginmap(
|
||||
prnt(f"[dim]Built-in plugins: {BUILTIN_PLUGINS_DIR}[/dim]")
|
||||
prnt(f"[dim]User plugins: {USER_PLUGINS_DIR}[/dim]")
|
||||
prnt()
|
||||
prnt(
|
||||
Panel(
|
||||
EVENT_FLOW_DIAGRAM,
|
||||
title="[bold green]Event Flow[/bold green]",
|
||||
border_style="green",
|
||||
expand=False,
|
||||
),
|
||||
)
|
||||
prnt()
|
||||
|
||||
for event_name, info in model_events.items():
|
||||
# Discover hooks for this event
|
||||
for event_name, info in event_phases.items():
|
||||
hook_event = normalize_hook_event_name(event_name)
|
||||
hooks = discover_hooks(event_name, filter_disabled=not show_disabled)
|
||||
|
||||
# Build hook info list
|
||||
hook_infos = []
|
||||
for hook_path in hooks:
|
||||
# Get plugin name from parent directory (e.g., 'wget' from 'plugins/wget/on_Snapshot__06_wget.bg.py')
|
||||
plugin_name = hook_path.parent.name
|
||||
is_bg = is_background_hook(hook_path.name)
|
||||
|
||||
hook_infos.append(
|
||||
{
|
||||
"path": str(hook_path),
|
||||
"name": hook_path.name,
|
||||
"plugin": plugin_name,
|
||||
"is_background": is_bg,
|
||||
"is_background": is_background_hook(hook_path.name),
|
||||
"extension": hook_path.suffix,
|
||||
},
|
||||
)
|
||||
|
||||
result["models"][event_name] = {
|
||||
result["events"][event_name] = {
|
||||
"description": info["description"],
|
||||
"machine": info["machine"],
|
||||
"hook_event": hook_event,
|
||||
"emits": info["emits"],
|
||||
"hooks": hook_infos,
|
||||
"hook_count": len(hook_infos),
|
||||
}
|
||||
|
||||
if not quiet:
|
||||
# Show diagram if this model has one
|
||||
if info.get("diagram"):
|
||||
assert info["diagram"] is not None
|
||||
prnt(
|
||||
Panel(
|
||||
info["diagram"],
|
||||
title=f"[bold green]{info['machine']}[/bold green]",
|
||||
border_style="green",
|
||||
expand=False,
|
||||
),
|
||||
)
|
||||
prnt()
|
||||
if quiet:
|
||||
continue
|
||||
|
||||
# Create hooks table
|
||||
table = Table(
|
||||
title=f"[bold yellow]on_{event_name}__* Hooks[/bold yellow] ({len(hooks)} found)",
|
||||
box=box.ROUNDED,
|
||||
show_header=True,
|
||||
header_style="bold magenta",
|
||||
)
|
||||
table.add_column("Plugin", style="cyan", width=20)
|
||||
table.add_column("Hook Name", style="green")
|
||||
table.add_column("BG", justify="center", width=4)
|
||||
table.add_column("Type", justify="center", width=5)
|
||||
title_suffix = f" -> on_{hook_event}__*" if hook_infos else ""
|
||||
table = Table(
|
||||
title=f"[bold yellow]{event_name}[/bold yellow]{title_suffix} ({len(hooks)} hooks)",
|
||||
box=box.ROUNDED,
|
||||
show_header=True,
|
||||
header_style="bold magenta",
|
||||
)
|
||||
table.add_column("Plugin", style="cyan", width=20)
|
||||
table.add_column("Hook Name", style="green")
|
||||
table.add_column("BG", justify="center", width=4)
|
||||
table.add_column("Type", justify="center", width=5)
|
||||
|
||||
# Sort lexicographically by hook name
|
||||
sorted_hooks = sorted(hook_infos, key=lambda h: h["name"])
|
||||
|
||||
for hook in sorted_hooks:
|
||||
if hook_infos:
|
||||
for hook in sorted(hook_infos, key=lambda h: h["name"]):
|
||||
bg_marker = "[yellow]bg[/yellow]" if hook["is_background"] else ""
|
||||
ext = hook["extension"].lstrip(".")
|
||||
table.add_row(
|
||||
hook["plugin"],
|
||||
hook["name"],
|
||||
bg_marker,
|
||||
ext,
|
||||
hook["extension"].lstrip("."),
|
||||
)
|
||||
else:
|
||||
table.add_row("[dim]-[/dim]", "[dim]No direct hooks[/dim]", "", "")
|
||||
|
||||
prnt(table)
|
||||
prnt()
|
||||
prnt(f"[dim]{info['description']}[/dim]")
|
||||
prnt()
|
||||
prnt(table)
|
||||
prnt(f"[dim]{info['description']}[/dim]")
|
||||
if info["emits"]:
|
||||
prnt(f"[dim]Emits: {', '.join(info['emits'])}[/dim]")
|
||||
if not hook_infos:
|
||||
prnt(f"[dim]No direct on_{hook_event}__* scripts are currently defined for this event family.[/dim]")
|
||||
prnt()
|
||||
|
||||
# Summary
|
||||
if not quiet:
|
||||
total_hooks = sum(m["hook_count"] for m in result["models"].values())
|
||||
total_hooks = sum(event_info["hook_count"] for event_info in result["events"].values())
|
||||
prnt(f"[bold]Total hooks discovered: {total_hooks}[/bold]")
|
||||
prnt()
|
||||
prnt("[dim]Hook naming convention: on_{Model}__{XX}_{description}[.bg].{ext}[/dim]")
|
||||
prnt("[dim] - XX: Two-digit lexicographic order (00-99)[/dim]")
|
||||
prnt("[dim] - .bg: Background hook (non-blocking)[/dim]")
|
||||
prnt("[dim] - ext: py, sh, or js[/dim]")
|
||||
prnt("[dim]Hook naming convention: on_{EventFamily}__{XX}_{description}[.bg].{ext}[/dim]")
|
||||
prnt("[dim]Event names are normalized with a simple `Event` suffix strip before hook discovery.[/dim]")
|
||||
prnt("[dim]If no `on_{EventFamily}__*` scripts exist, the event is shown as having no direct hooks.[/dim]")
|
||||
prnt()
|
||||
|
||||
return result
|
||||
@@ -294,8 +211,8 @@ def pluginmap(
|
||||
|
||||
@click.command()
|
||||
@click.option("--show-disabled", "-a", is_flag=True, help="Show hooks from disabled plugins too")
|
||||
@click.option("--model", "-m", type=str, default=None, help="Filter to specific model (Crawl, Snapshot, Binary, CrawlEnd)")
|
||||
@click.option("--quiet", "-q", is_flag=True, help="Output JSON only, no ASCII diagrams")
|
||||
@click.option("--event", "-e", type=str, default=None, help="Filter to specific event (e.g. InstallEvent, SnapshotEvent)")
|
||||
@click.option("--quiet", "-q", is_flag=True, help="Output JSON only, no tables")
|
||||
@docstring(pluginmap.__doc__)
|
||||
def main(**kwargs):
|
||||
import json
|
||||
|
||||
@@ -10,7 +10,7 @@ Modes:
|
||||
- Without stdin (TTY): Run the background runner in foreground until killed
|
||||
- --crawl-id: Run the crawl runner for a specific crawl only
|
||||
- --snapshot-id: Run a specific snapshot through its parent crawl
|
||||
- --binary-id: Emit a BinaryEvent for a specific Binary row
|
||||
- --binary-id: Emit a BinaryRequestEvent for a specific Binary row
|
||||
|
||||
Examples:
|
||||
# Run the background runner in foreground
|
||||
@@ -64,7 +64,15 @@ def process_stdin_records() -> int:
|
||||
"""
|
||||
from django.utils import timezone
|
||||
|
||||
from archivebox.misc.jsonl import read_stdin, write_record, TYPE_CRAWL, TYPE_SNAPSHOT, TYPE_ARCHIVERESULT, TYPE_BINARY
|
||||
from archivebox.misc.jsonl import (
|
||||
read_stdin,
|
||||
write_record,
|
||||
TYPE_CRAWL,
|
||||
TYPE_SNAPSHOT,
|
||||
TYPE_ARCHIVERESULT,
|
||||
TYPE_BINARYREQUEST,
|
||||
TYPE_BINARY,
|
||||
)
|
||||
from archivebox.base_models.models import get_or_create_system_user_pk
|
||||
from archivebox.core.models import Snapshot, ArchiveResult
|
||||
from archivebox.crawls.models import Crawl
|
||||
@@ -185,7 +193,7 @@ def process_stdin_records() -> int:
|
||||
output_records.append(record if not archiveresult else archiveresult.to_json())
|
||||
queued_count += 1
|
||||
|
||||
elif record_type == TYPE_BINARY:
|
||||
elif record_type in {TYPE_BINARYREQUEST, TYPE_BINARY}:
|
||||
if record_id:
|
||||
try:
|
||||
binary = Binary.objects.get(id=record_id)
|
||||
|
||||
@@ -1104,6 +1104,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
||||
from archivebox.misc.jsonl import (
|
||||
TYPE_SNAPSHOT,
|
||||
TYPE_ARCHIVERESULT,
|
||||
TYPE_BINARYREQUEST,
|
||||
TYPE_BINARY,
|
||||
TYPE_PROCESS,
|
||||
)
|
||||
@@ -1126,7 +1127,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
||||
result["snapshot"] = record
|
||||
elif record_type == TYPE_ARCHIVERESULT:
|
||||
result["archive_results"].append(record)
|
||||
elif record_type == TYPE_BINARY:
|
||||
elif record_type in {TYPE_BINARYREQUEST, TYPE_BINARY}:
|
||||
result["binaries"].append(record)
|
||||
elif record_type == TYPE_PROCESS:
|
||||
result["processes"].append(record)
|
||||
|
||||
@@ -1226,11 +1226,13 @@ def live_progress_view(request):
|
||||
return (plugin, plugin, "unknown", "")
|
||||
|
||||
phase = "unknown"
|
||||
if normalized_hook_name.startswith("on_Crawl__"):
|
||||
if normalized_hook_name.startswith("on_Install__"):
|
||||
phase = "install"
|
||||
elif normalized_hook_name.startswith("on_CrawlSetup__"):
|
||||
phase = "crawl"
|
||||
elif normalized_hook_name.startswith("on_Snapshot__"):
|
||||
phase = "snapshot"
|
||||
elif normalized_hook_name.startswith("on_Binary__"):
|
||||
elif normalized_hook_name.startswith("on_BinaryRequest__"):
|
||||
phase = "binary"
|
||||
|
||||
label = normalized_hook_name
|
||||
|
||||
@@ -827,14 +827,16 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
|
||||
for record in records[:3]:
|
||||
print(f" Record: type={record.get('type')}, keys={list(record.keys())[:5]}")
|
||||
if system_task:
|
||||
records = [record for record in records if record.get("type") in ("Binary", "Machine")]
|
||||
records = [record for record in records if record.get("type") in ("BinaryRequest", "Binary", "Machine")]
|
||||
overrides = {"crawl": self}
|
||||
stats = process_hook_records(records, overrides=overrides)
|
||||
if stats:
|
||||
print(f"[green]✓ Created: {stats}[/green]")
|
||||
|
||||
hook_binary_names = {
|
||||
str(record.get("name")).strip() for record in records if record.get("type") == "Binary" and record.get("name")
|
||||
str(record.get("name")).strip()
|
||||
for record in records
|
||||
if record.get("type") in ("BinaryRequest", "Binary") and record.get("name")
|
||||
}
|
||||
hook_binary_names.discard("")
|
||||
if hook_binary_names:
|
||||
@@ -933,7 +935,7 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
|
||||
# Check if any snapshots exist for this crawl
|
||||
snapshots = Snapshot.objects.filter(crawl=self)
|
||||
|
||||
# If no snapshots exist, allow finishing (e.g., archivebox://install crawls that only run hooks)
|
||||
# If no snapshots exist, allow finishing (e.g., system crawls that only run setup hooks)
|
||||
if not snapshots.exists():
|
||||
return True
|
||||
|
||||
@@ -1081,7 +1083,7 @@ class CrawlMachine(BaseStateMachine):
|
||||
status=Crawl.StatusChoices.STARTED,
|
||||
)
|
||||
else:
|
||||
# No snapshots (system crawl like archivebox://install)
|
||||
# No snapshots (system crawl that only runs setup hooks)
|
||||
print("[cyan]🔄 No snapshots created, sealing crawl immediately[/cyan]", file=sys.stderr)
|
||||
# Seal immediately since there's no work to do
|
||||
self.seal()
|
||||
|
||||
@@ -1,9 +1,22 @@
|
||||
"""
|
||||
Hook discovery and execution system for ArchiveBox plugins.
|
||||
Hook discovery and execution helpers for ArchiveBox plugins.
|
||||
|
||||
Hooks are standalone scripts that run as separate processes and communicate
|
||||
with ArchiveBox via CLI arguments and stdout JSON output. This keeps the plugin
|
||||
system simple and language-agnostic.
|
||||
ArchiveBox no longer drives plugin execution itself during normal crawls.
|
||||
`abx-dl` owns the live runtime and emits typed bus events; ArchiveBox mainly:
|
||||
|
||||
- discovers hook files for inspection / docs / legacy direct execution helpers
|
||||
- executes individual hook scripts when explicitly requested
|
||||
- parses hook stdout JSONL records into ArchiveBox models when needed
|
||||
|
||||
Hook-backed event families are discovered from filenames like:
|
||||
on_Install__*
|
||||
on_BinaryRequest__*
|
||||
on_CrawlSetup__*
|
||||
on_Snapshot__*
|
||||
|
||||
Lifecycle event names like `InstallEvent` or `SnapshotCleanupEvent` are
|
||||
normalized to the corresponding `on_{EventFamily}__*` prefix by a simple
|
||||
string transform. If no scripts exist for that prefix, discovery returns `[]`.
|
||||
|
||||
Directory structure:
|
||||
abx_plugins/plugins/<plugin_name>/on_<Event>__<hook_name>.<ext> (built-in package)
|
||||
@@ -11,7 +24,7 @@ Directory structure:
|
||||
|
||||
Hook contract:
|
||||
Input: --url=<url> (and other --key=value args)
|
||||
Output: JSON to stdout, files to $PWD
|
||||
Output: JSONL records to stdout, files to $PWD
|
||||
Exit: 0 = success, non-zero = failure
|
||||
|
||||
Execution order:
|
||||
@@ -19,36 +32,13 @@ Execution order:
|
||||
- Foreground hooks run sequentially in that order
|
||||
- Background hooks (.bg suffix) run concurrently and do not block foreground progress
|
||||
- After all foreground hooks complete, background hooks receive SIGTERM and must finalize
|
||||
- Failed extractors don't block subsequent extractors
|
||||
|
||||
Hook Naming Convention:
|
||||
on_{ModelName}__{run_order}_{description}[.finite.bg|.daemon.bg].{ext}
|
||||
Hook naming convention:
|
||||
on_{EventFamily}__{run_order}_{description}[.finite.bg|.daemon.bg].{ext}
|
||||
|
||||
Examples:
|
||||
on_Snapshot__00_setup.py # runs first
|
||||
on_Snapshot__10_chrome_tab.daemon.bg.js # background (doesn't block)
|
||||
on_Snapshot__50_screenshot.js # foreground (blocks)
|
||||
on_Snapshot__63_media.finite.bg.py # background (long-running)
|
||||
|
||||
Dependency handling:
|
||||
Extractor plugins that depend on other plugins' output should check at runtime:
|
||||
|
||||
```python
|
||||
# Example: screenshot plugin depends on chrome plugin
|
||||
chrome_dir = Path(os.environ.get('SNAPSHOT_DIR', '.')) / 'chrome'
|
||||
if not (chrome_dir / 'cdp_url.txt').exists():
|
||||
print('{"status": "skipped", "output": "chrome session not available"}')
|
||||
sys.exit(1) # Exit non-zero so it gets retried later
|
||||
```
|
||||
|
||||
On retry (Snapshot.retry_failed_archiveresults()):
|
||||
- Only FAILED/SKIPPED plugins reset to queued (SUCCEEDED stays)
|
||||
- Run in order again
|
||||
- If dependencies now succeed, dependents can run
|
||||
|
||||
API (all hook logic lives here):
|
||||
discover_hooks(event) -> List[Path] Find hook scripts
|
||||
run_hook(script, ...) -> HookResult Execute a hook script
|
||||
API:
|
||||
discover_hooks(event) -> List[Path] Find hook scripts for a hook-backed event family
|
||||
run_hook(script, ...) -> Process Execute a hook script directly
|
||||
is_background_hook(name) -> bool Check if hook is background (.bg suffix)
|
||||
"""
|
||||
|
||||
@@ -122,6 +112,27 @@ def iter_plugin_dirs() -> list[Path]:
|
||||
return plugin_dirs
|
||||
|
||||
|
||||
def normalize_hook_event_name(event_name: str) -> str | None:
|
||||
"""
|
||||
Normalize a hook event family or event class name to its on_* prefix.
|
||||
|
||||
Examples:
|
||||
InstallEvent -> Install
|
||||
BinaryRequestEvent -> BinaryRequest
|
||||
CrawlSetupEvent -> CrawlSetup
|
||||
SnapshotEvent -> Snapshot
|
||||
BinaryEvent -> Binary
|
||||
CrawlCleanupEvent -> CrawlCleanup
|
||||
"""
|
||||
normalized = str(event_name or "").strip()
|
||||
if not normalized:
|
||||
return None
|
||||
|
||||
if normalized.endswith("Event"):
|
||||
return normalized[:-5] or None
|
||||
return normalized
|
||||
|
||||
|
||||
class HookResult(TypedDict, total=False):
|
||||
"""Raw result from run_hook()."""
|
||||
|
||||
@@ -144,7 +155,7 @@ def discover_hooks(
|
||||
config: dict[str, Any] | None = None,
|
||||
) -> list[Path]:
|
||||
"""
|
||||
Find all hook scripts matching on_{event_name}__*.{sh,py,js} pattern.
|
||||
Find all hook scripts for an event family.
|
||||
|
||||
Searches both built-in and user plugin directories.
|
||||
Filters out hooks from disabled plugins by default (respects USE_/SAVE_ flags).
|
||||
@@ -156,7 +167,10 @@ def discover_hooks(
|
||||
on_Snapshot__26_readability.py # runs later (depends on singlefile)
|
||||
|
||||
Args:
|
||||
event_name: Event name (e.g., 'Snapshot', 'Binary', 'Crawl')
|
||||
event_name: Hook event family or event class name.
|
||||
Examples: 'Install', 'InstallEvent', 'BinaryRequestEvent', 'Snapshot'.
|
||||
Event names are normalized by stripping a trailing `Event`.
|
||||
If no matching `on_{EventFamily}__*` scripts exist, returns [].
|
||||
filter_disabled: If True, skip hooks from disabled plugins (default: True)
|
||||
config: Optional config dict from get_config() (merges file, env, machine, crawl, snapshot)
|
||||
If None, will call get_config() with global scope
|
||||
@@ -179,6 +193,10 @@ def discover_hooks(
|
||||
discover_hooks('Snapshot', filter_disabled=False)
|
||||
# Returns: [Path('.../on_Snapshot__10_title.py'), ..., Path('.../on_Snapshot__50_wget.py')]
|
||||
"""
|
||||
hook_event_name = normalize_hook_event_name(event_name)
|
||||
if not hook_event_name:
|
||||
return []
|
||||
|
||||
hooks = []
|
||||
|
||||
for base_dir in (BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR):
|
||||
@@ -187,18 +205,18 @@ def discover_hooks(
|
||||
|
||||
# Search for hook scripts in all subdirectories
|
||||
for ext in ("sh", "py", "js"):
|
||||
pattern = f"*/on_{event_name}__*.{ext}"
|
||||
pattern = f"*/on_{hook_event_name}__*.{ext}"
|
||||
hooks.extend(base_dir.glob(pattern))
|
||||
|
||||
# Also check for hooks directly in the plugins directory
|
||||
pattern_direct = f"on_{event_name}__*.{ext}"
|
||||
pattern_direct = f"on_{hook_event_name}__*.{ext}"
|
||||
hooks.extend(base_dir.glob(pattern_direct))
|
||||
|
||||
# Binary install hooks are provider hooks, not end-user extractors. They
|
||||
# self-filter via `binproviders`, so applying the PLUGINS whitelist here
|
||||
# can hide the very installer needed by a selected plugin (e.g.
|
||||
# `--plugins=singlefile` still needs the `npm` Binary hook).
|
||||
if filter_disabled and event_name != "Binary":
|
||||
# `--plugins=singlefile` still needs the `npm` BinaryRequest hook).
|
||||
if filter_disabled and hook_event_name != "BinaryRequest":
|
||||
# Get merged config if not provided (lazy import to avoid circular dependency)
|
||||
if config is None:
|
||||
from archivebox.config.configset import get_config
|
||||
@@ -1051,8 +1069,12 @@ def get_plugin_icon(plugin: str) -> str:
|
||||
|
||||
def process_hook_records(records: list[dict[str, Any]], overrides: dict[str, Any] | None = None) -> dict[str, int]:
|
||||
"""
|
||||
Process JSONL records from hook output.
|
||||
Dispatches to Model.from_json() for each record type.
|
||||
Process JSONL records emitted by hook stdout.
|
||||
|
||||
This handles hook-emitted record types such as Snapshot, Tag, BinaryRequest,
|
||||
Binary, and Machine. It does not process bus lifecycle events like
|
||||
InstallEvent, CrawlEvent, CrawlCleanupEvent, or SnapshotCleanupEvent, since
|
||||
those are not emitted as JSONL records by hook subprocesses.
|
||||
|
||||
Args:
|
||||
records: List of JSONL record dicts from result['records']
|
||||
@@ -1104,12 +1126,12 @@ def process_hook_records(records: list[dict[str, Any]], overrides: dict[str, Any
|
||||
if obj:
|
||||
stats["Tag"] = stats.get("Tag", 0) + 1
|
||||
|
||||
elif record_type == "Binary":
|
||||
elif record_type in {"BinaryRequest", "Binary"}:
|
||||
from archivebox.machine.models import Binary
|
||||
|
||||
obj = Binary.from_json(record.copy(), overrides)
|
||||
if obj:
|
||||
stats["Binary"] = stats.get("Binary", 0) + 1
|
||||
stats[record_type] = stats.get(record_type, 0) + 1
|
||||
|
||||
elif record_type == "Machine":
|
||||
from archivebox.machine.models import Machine
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Generated by hand on 2026-01-01
|
||||
# Converges machine app for 0.8.6rc0 → 0.9.x migration path
|
||||
# Drops old InstalledBinary table and ensures Binary table exists
|
||||
# Drops old Binary table and ensures Binary table exists
|
||||
|
||||
from django.db import migrations, connection
|
||||
|
||||
@@ -18,7 +18,7 @@ def converge_binary_table(apps, schema_editor):
|
||||
|
||||
print(f"DEBUG 0005: Existing tables: {existing_tables}")
|
||||
|
||||
# Drop old InstalledBinary table if it exists (0.8.6rc0 path)
|
||||
# Drop old Binary table if it exists (0.8.6rc0 path)
|
||||
if "machine_installedbinary" in existing_tables:
|
||||
print("✓ Dropping machine_installedbinary table (0.8.6rc0 divergence)")
|
||||
cursor.execute("DROP TABLE IF EXISTS machine_installedbinary")
|
||||
|
||||
@@ -348,7 +348,7 @@ class Binary(ModelWithHealthStats, ModelWithStateMachine):
|
||||
Installation is synchronous during queued→installed transition.
|
||||
If installation fails, Binary stays in queued with retry_at set for later retry.
|
||||
|
||||
State machine calls run() which executes on_Binary__install_* hooks
|
||||
State machine calls run() which executes on_BinaryRequest__* hooks
|
||||
to install the binary using the specified providers.
|
||||
"""
|
||||
|
||||
@@ -447,12 +447,15 @@ class Binary(ModelWithHealthStats, ModelWithStateMachine):
|
||||
"""
|
||||
from archivebox.config import VERSION
|
||||
|
||||
is_installed = bool(self.abspath and self.version)
|
||||
return {
|
||||
"type": "Binary",
|
||||
"type": "Binary" if is_installed else "BinaryRequest",
|
||||
"schema_version": VERSION,
|
||||
"id": str(self.id),
|
||||
"machine_id": str(self.machine_id),
|
||||
"name": self.name,
|
||||
"binproviders": self.binproviders,
|
||||
"overrides": self.overrides,
|
||||
"binprovider": self.binprovider,
|
||||
"abspath": self.abspath,
|
||||
"version": self.version,
|
||||
@@ -540,7 +543,7 @@ class Binary(ModelWithHealthStats, ModelWithStateMachine):
|
||||
)
|
||||
return binary
|
||||
|
||||
# Case 3: From on_Binary__install hook output - update with installation results
|
||||
# Case 3: From on_BinaryRequest__ hook output - update with installation results
|
||||
if abspath and version:
|
||||
binary, _ = Binary.objects.update_or_create(
|
||||
machine=machine,
|
||||
@@ -607,10 +610,10 @@ class Binary(ModelWithHealthStats, ModelWithStateMachine):
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
Execute binary installation by running on_Binary__install_* hooks.
|
||||
Execute binary installation by running on_BinaryRequest__* hooks.
|
||||
|
||||
Called by BinaryMachine when entering 'started' state.
|
||||
Runs ALL on_Binary__install_* hooks - each hook checks binproviders
|
||||
Runs ALL on_BinaryRequest__* hooks - each hook checks binproviders
|
||||
and decides if it can handle this binary. First hook to succeed wins.
|
||||
Updates status to SUCCEEDED or FAILED based on hook output.
|
||||
"""
|
||||
@@ -637,8 +640,8 @@ class Binary(ModelWithHealthStats, ModelWithStateMachine):
|
||||
output_dir = self.output_dir
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Discover ALL on_Binary__install_* hooks
|
||||
hooks = discover_hooks("Binary", config=config)
|
||||
# Discover ALL on_BinaryRequest__* hooks
|
||||
hooks = discover_hooks("BinaryRequest", config=config)
|
||||
if not hooks:
|
||||
# No hooks available - stay queued, will retry later
|
||||
return
|
||||
|
||||
@@ -35,11 +35,21 @@ TYPE_SNAPSHOT = "Snapshot"
|
||||
TYPE_ARCHIVERESULT = "ArchiveResult"
|
||||
TYPE_TAG = "Tag"
|
||||
TYPE_CRAWL = "Crawl"
|
||||
TYPE_BINARYREQUEST = "BinaryRequest"
|
||||
TYPE_BINARY = "Binary"
|
||||
TYPE_PROCESS = "Process"
|
||||
TYPE_MACHINE = "Machine"
|
||||
|
||||
VALID_TYPES = {TYPE_SNAPSHOT, TYPE_ARCHIVERESULT, TYPE_TAG, TYPE_CRAWL, TYPE_BINARY, TYPE_PROCESS, TYPE_MACHINE}
|
||||
VALID_TYPES = {
|
||||
TYPE_SNAPSHOT,
|
||||
TYPE_ARCHIVERESULT,
|
||||
TYPE_TAG,
|
||||
TYPE_CRAWL,
|
||||
TYPE_BINARYREQUEST,
|
||||
TYPE_BINARY,
|
||||
TYPE_PROCESS,
|
||||
TYPE_MACHINE,
|
||||
}
|
||||
|
||||
|
||||
def parse_line(line: str) -> dict[str, Any] | None:
|
||||
|
||||
@@ -2,24 +2,24 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
|
||||
from abx_dl.events import BinaryEvent, BinaryInstalledEvent
|
||||
from abx_dl.events import BinaryRequestEvent, BinaryEvent
|
||||
from abx_dl.services.base import BaseService
|
||||
|
||||
from .db import run_db_op
|
||||
|
||||
|
||||
class BinaryService(BaseService):
|
||||
LISTENS_TO = [BinaryEvent, BinaryInstalledEvent]
|
||||
LISTENS_TO = [BinaryRequestEvent, BinaryEvent]
|
||||
EMITS = []
|
||||
|
||||
async def on_BinaryEvent__Outer(self, event: BinaryEvent) -> None:
|
||||
async def on_BinaryRequestEvent__Outer(self, event: BinaryRequestEvent) -> None:
|
||||
await run_db_op(self._project_binary, event)
|
||||
|
||||
async def on_BinaryInstalledEvent__Outer(self, event: BinaryInstalledEvent) -> None:
|
||||
async def on_BinaryEvent__Outer(self, event: BinaryEvent) -> None:
|
||||
resolved = await asyncio.to_thread(self._resolve_installed_binary_metadata, event)
|
||||
await run_db_op(self._project_installed_binary, event, resolved)
|
||||
|
||||
def _project_binary(self, event: BinaryEvent) -> None:
|
||||
def _project_binary(self, event: BinaryRequestEvent) -> None:
|
||||
from archivebox.machine.models import Binary, Machine
|
||||
|
||||
machine = Machine.current()
|
||||
@@ -39,16 +39,12 @@ class BinaryService(BaseService):
|
||||
Binary.from_json(
|
||||
{
|
||||
"name": event.name,
|
||||
"abspath": event.abspath,
|
||||
"version": event.version,
|
||||
"sha256": event.sha256,
|
||||
"binproviders": event.binproviders,
|
||||
"binprovider": event.binprovider,
|
||||
"overrides": event.overrides or {},
|
||||
},
|
||||
)
|
||||
|
||||
def _resolve_installed_binary_metadata(self, event: BinaryInstalledEvent) -> dict[str, str]:
|
||||
def _resolve_installed_binary_metadata(self, event: BinaryEvent) -> dict[str, str]:
|
||||
resolved = {
|
||||
"abspath": event.abspath or "",
|
||||
"version": event.version or "",
|
||||
@@ -59,6 +55,18 @@ class BinaryService(BaseService):
|
||||
if resolved["abspath"] and resolved["version"] and resolved["binprovider"]:
|
||||
return resolved
|
||||
|
||||
if resolved["abspath"] and not resolved["version"]:
|
||||
try:
|
||||
from abx_pkg.semver import bin_version
|
||||
|
||||
detected_version = bin_version(resolved["abspath"])
|
||||
except Exception:
|
||||
detected_version = None
|
||||
if detected_version:
|
||||
resolved["version"] = str(detected_version)
|
||||
if resolved["version"] and resolved["binprovider"]:
|
||||
return resolved
|
||||
|
||||
try:
|
||||
from abx_dl.dependencies import load_binary
|
||||
|
||||
@@ -80,7 +88,7 @@ class BinaryService(BaseService):
|
||||
|
||||
return resolved
|
||||
|
||||
def _project_installed_binary(self, event: BinaryInstalledEvent, resolved: dict[str, str]) -> None:
|
||||
def _project_installed_binary(self, event: BinaryEvent, resolved: dict[str, str]) -> None:
|
||||
from archivebox.machine.models import Binary, Machine
|
||||
|
||||
machine = Machine.current()
|
||||
|
||||
@@ -58,7 +58,7 @@ class ProcessService(BaseService):
|
||||
return process
|
||||
|
||||
process_type = getattr(event, "process_type", "") or (
|
||||
Process.TypeChoices.BINARY if event.hook_name.startswith("on_Binary") else Process.TypeChoices.HOOK
|
||||
Process.TypeChoices.BINARY if event.hook_name.startswith("on_BinaryRequest") else Process.TypeChoices.HOOK
|
||||
)
|
||||
worker_type = getattr(event, "worker_type", "") or ""
|
||||
if process_type == Process.TypeChoices.WORKER and worker_type:
|
||||
|
||||
@@ -15,14 +15,13 @@ from typing import Any
|
||||
from django.utils import timezone
|
||||
from rich.console import Console
|
||||
|
||||
from abx_dl.events import BinaryEvent
|
||||
from abx_dl.events import BinaryRequestEvent
|
||||
from abx_dl.limits import CrawlLimitState
|
||||
from abx_dl.models import INSTALL_URL, Plugin, Snapshot as AbxSnapshot, discover_plugins, filter_plugins
|
||||
from abx_dl.models import Plugin, Snapshot as AbxSnapshot, discover_plugins, filter_plugins
|
||||
from abx_dl.orchestrator import (
|
||||
create_bus,
|
||||
download,
|
||||
install_plugins as abx_install_plugins,
|
||||
prepare_install_plugins,
|
||||
setup_services as setup_abx_services,
|
||||
)
|
||||
|
||||
@@ -51,11 +50,12 @@ def _selected_plugins_from_config(config: dict[str, Any]) -> list[str] | None:
|
||||
|
||||
def _count_selected_hooks(plugins: dict[str, Plugin], selected_plugins: list[str] | None) -> int:
|
||||
selected = filter_plugins(plugins, selected_plugins) if selected_plugins else plugins
|
||||
total = 0
|
||||
for plugin in selected.values():
|
||||
total += len(list(plugin.get_crawl_hooks()))
|
||||
total += len(list(plugin.get_snapshot_hooks()))
|
||||
return total
|
||||
return sum(
|
||||
1
|
||||
for plugin in selected.values()
|
||||
for hook in plugin.hooks
|
||||
if "Install" in hook.name or "CrawlSetup" in hook.name or "Snapshot" in hook.name
|
||||
)
|
||||
|
||||
|
||||
def _runner_debug(message: str) -> None:
|
||||
@@ -68,10 +68,9 @@ def _binary_env_key(name: str) -> str:
|
||||
|
||||
|
||||
def _binary_config_keys_for_plugins(plugins: dict[str, Plugin], binary_name: str) -> list[str]:
|
||||
keys = [_binary_env_key(binary_name)]
|
||||
|
||||
if binary_name == "postlight-parser":
|
||||
keys.insert(0, "MERCURY_BINARY")
|
||||
keys: list[str] = []
|
||||
if binary_name != "postlight-parser":
|
||||
keys.append(_binary_env_key(binary_name))
|
||||
|
||||
for plugin in plugins.values():
|
||||
for key, prop in plugin.config_schema.items():
|
||||
@@ -86,6 +85,12 @@ def _installed_binary_config_overrides(plugins: dict[str, Plugin]) -> dict[str,
|
||||
|
||||
machine = Machine.current()
|
||||
overrides: dict[str, str] = {}
|
||||
shared_lib_dir: Path | None = None
|
||||
pip_home: Path | None = None
|
||||
pip_bin_dir: Path | None = None
|
||||
npm_home: Path | None = None
|
||||
node_modules_dir: Path | None = None
|
||||
npm_bin_dir: Path | None = None
|
||||
binaries = (
|
||||
Binary.objects.filter(machine=machine, status=Binary.StatusChoices.INSTALLED).exclude(abspath="").exclude(abspath__isnull=True)
|
||||
)
|
||||
@@ -100,6 +105,32 @@ def _installed_binary_config_overrides(plugins: dict[str, Plugin]) -> dict[str,
|
||||
for key in _binary_config_keys_for_plugins(plugins, binary.name):
|
||||
overrides[key] = binary.abspath
|
||||
|
||||
if resolved_path.parent.name == ".bin" and resolved_path.parent.parent.name == "node_modules":
|
||||
npm_bin_dir = npm_bin_dir or resolved_path.parent
|
||||
node_modules_dir = node_modules_dir or resolved_path.parent.parent
|
||||
npm_home = npm_home or resolved_path.parent.parent.parent
|
||||
shared_lib_dir = shared_lib_dir or resolved_path.parent.parent.parent.parent
|
||||
elif resolved_path.parent.name == "bin" and resolved_path.parent.parent.name == "venv" and resolved_path.parent.parent.parent.name == "pip":
|
||||
pip_bin_dir = pip_bin_dir or resolved_path.parent
|
||||
pip_home = pip_home or resolved_path.parent.parent.parent
|
||||
shared_lib_dir = shared_lib_dir or resolved_path.parent.parent.parent.parent
|
||||
|
||||
if shared_lib_dir is not None:
|
||||
overrides["LIB_DIR"] = str(shared_lib_dir)
|
||||
overrides["LIB_BIN_DIR"] = str(shared_lib_dir / "bin")
|
||||
if pip_home is not None:
|
||||
overrides["PIP_HOME"] = str(pip_home)
|
||||
if pip_bin_dir is not None:
|
||||
overrides["PIP_BIN_DIR"] = str(pip_bin_dir)
|
||||
if npm_home is not None:
|
||||
overrides["NPM_HOME"] = str(npm_home)
|
||||
if node_modules_dir is not None:
|
||||
overrides["NODE_MODULES_DIR"] = str(node_modules_dir)
|
||||
overrides["NODE_MODULE_DIR"] = str(node_modules_dir)
|
||||
overrides["NODE_PATH"] = str(node_modules_dir)
|
||||
if npm_bin_dir is not None:
|
||||
overrides["NPM_BIN_DIR"] = str(npm_bin_dir)
|
||||
|
||||
return overrides
|
||||
|
||||
|
||||
@@ -264,26 +295,23 @@ class CrawlRunner:
|
||||
auto_install=True,
|
||||
emit_jsonl=False,
|
||||
)
|
||||
if self.crawl.get_system_task() == INSTALL_URL:
|
||||
await self._run_install_crawl()
|
||||
else:
|
||||
snapshot_ids = await sync_to_async(self._initial_snapshot_ids, thread_sensitive=True)()
|
||||
if snapshot_ids:
|
||||
root_snapshot_id = snapshot_ids[0]
|
||||
_runner_debug(f"crawl {self.crawl.id} starting crawl setup root_snapshot={root_snapshot_id}")
|
||||
await self._run_crawl_setup(root_snapshot_id)
|
||||
_runner_debug(f"crawl {self.crawl.id} finished crawl setup root_snapshot={root_snapshot_id}")
|
||||
for snapshot_id in snapshot_ids:
|
||||
await self.enqueue_snapshot(snapshot_id)
|
||||
_runner_debug(f"crawl {self.crawl.id} waiting for snapshot tasks count={len(self.snapshot_tasks)}")
|
||||
await self._wait_for_snapshot_tasks()
|
||||
_runner_debug(f"crawl {self.crawl.id} finished waiting for snapshot tasks")
|
||||
_runner_debug(f"crawl {self.crawl.id} starting django crawl.cleanup()")
|
||||
await sync_to_async(self.crawl.cleanup, thread_sensitive=True)()
|
||||
_runner_debug(f"crawl {self.crawl.id} finished django crawl.cleanup()")
|
||||
_runner_debug(f"crawl {self.crawl.id} starting abx crawl cleanup root_snapshot={root_snapshot_id}")
|
||||
await self._run_crawl_cleanup(root_snapshot_id)
|
||||
_runner_debug(f"crawl {self.crawl.id} finished abx crawl cleanup root_snapshot={root_snapshot_id}")
|
||||
snapshot_ids = await sync_to_async(self._initial_snapshot_ids, thread_sensitive=True)()
|
||||
if snapshot_ids:
|
||||
root_snapshot_id = snapshot_ids[0]
|
||||
_runner_debug(f"crawl {self.crawl.id} starting crawl setup root_snapshot={root_snapshot_id}")
|
||||
await self._run_crawl_setup(root_snapshot_id)
|
||||
_runner_debug(f"crawl {self.crawl.id} finished crawl setup root_snapshot={root_snapshot_id}")
|
||||
for snapshot_id in snapshot_ids:
|
||||
await self.enqueue_snapshot(snapshot_id)
|
||||
_runner_debug(f"crawl {self.crawl.id} waiting for snapshot tasks count={len(self.snapshot_tasks)}")
|
||||
await self._wait_for_snapshot_tasks()
|
||||
_runner_debug(f"crawl {self.crawl.id} finished waiting for snapshot tasks")
|
||||
_runner_debug(f"crawl {self.crawl.id} starting django crawl.cleanup()")
|
||||
await sync_to_async(self.crawl.cleanup, thread_sensitive=True)()
|
||||
_runner_debug(f"crawl {self.crawl.id} finished django crawl.cleanup()")
|
||||
_runner_debug(f"crawl {self.crawl.id} starting abx crawl cleanup root_snapshot={root_snapshot_id}")
|
||||
await self._run_crawl_cleanup(root_snapshot_id)
|
||||
_runner_debug(f"crawl {self.crawl.id} finished abx crawl cleanup root_snapshot={root_snapshot_id}")
|
||||
if self.abx_services is not None:
|
||||
_runner_debug(f"crawl {self.crawl.id} waiting for main bus background monitors")
|
||||
await self.abx_services.process.wait_for_background_monitors()
|
||||
@@ -404,7 +432,7 @@ class CrawlRunner:
|
||||
interactive_tty=True,
|
||||
)
|
||||
live_ui.print_intro(
|
||||
url=self.primary_url or INSTALL_URL,
|
||||
url=self.primary_url or "crawl",
|
||||
output_dir=Path(self.crawl.output_dir),
|
||||
plugins_label=plugins_label,
|
||||
)
|
||||
@@ -435,30 +463,6 @@ class CrawlRunner:
|
||||
config["PARENT_SNAPSHOT_ID"] = str(snapshot.parent_snapshot_id)
|
||||
return config
|
||||
|
||||
async def _run_install_crawl(self) -> None:
|
||||
install_snapshot = AbxSnapshot(
|
||||
url=self.primary_url or INSTALL_URL,
|
||||
id=str(self.crawl.id),
|
||||
crawl_id=str(self.crawl.id),
|
||||
)
|
||||
await download(
|
||||
url=self.primary_url or INSTALL_URL,
|
||||
plugins=self.plugins,
|
||||
output_dir=Path(self.crawl.output_dir),
|
||||
selected_plugins=self.selected_plugins,
|
||||
config_overrides={
|
||||
**self.base_config,
|
||||
"CRAWL_DIR": str(self.crawl.output_dir),
|
||||
"SNAP_DIR": str(self.crawl.output_dir),
|
||||
"CRAWL_ID": str(self.crawl.id),
|
||||
"SOURCE_URL": self.crawl.urls,
|
||||
},
|
||||
bus=self.bus,
|
||||
emit_jsonl=False,
|
||||
snapshot=install_snapshot,
|
||||
crawl_only=True,
|
||||
)
|
||||
|
||||
async def _run_crawl_setup(self, snapshot_id: str) -> None:
|
||||
from asgiref.sync import sync_to_async
|
||||
|
||||
@@ -625,7 +629,7 @@ async def _run_binary(binary_id: str) -> None:
|
||||
binary = await sync_to_async(Binary.objects.get, thread_sensitive=True)(id=binary_id)
|
||||
plugins = discover_plugins()
|
||||
config = get_config()
|
||||
config.update(_installed_binary_config_overrides(plugins))
|
||||
config.update(await sync_to_async(_installed_binary_config_overrides, thread_sensitive=True)(plugins))
|
||||
config["ABX_RUNTIME"] = "archivebox"
|
||||
bus = create_bus(name=_bus_name("ArchiveBox_binary", str(binary.id)), total_timeout=1800.0)
|
||||
process_service = ProcessService(bus)
|
||||
@@ -645,18 +649,14 @@ async def _run_binary(binary_id: str) -> None:
|
||||
try:
|
||||
_attach_bus_trace(bus)
|
||||
await bus.emit(
|
||||
BinaryEvent(
|
||||
BinaryRequestEvent(
|
||||
name=binary.name,
|
||||
plugin_name="archivebox",
|
||||
hook_name="archivebox_run",
|
||||
hook_name="on_BinaryRequest__archivebox_run",
|
||||
output_dir=str(binary.output_dir),
|
||||
binary_id=str(binary.id),
|
||||
machine_id=str(binary.machine_id),
|
||||
abspath=binary.abspath,
|
||||
version=binary.version,
|
||||
sha256=binary.sha256,
|
||||
binproviders=binary.binproviders,
|
||||
binprovider=binary.binprovider,
|
||||
overrides=binary.overrides or None,
|
||||
),
|
||||
)
|
||||
@@ -670,11 +670,13 @@ def run_binary(binary_id: str) -> None:
|
||||
|
||||
|
||||
async def _run_install(plugin_names: list[str] | None = None) -> None:
|
||||
from asgiref.sync import sync_to_async
|
||||
|
||||
from archivebox.config.configset import get_config
|
||||
|
||||
plugins = discover_plugins()
|
||||
config = get_config()
|
||||
config.update(_installed_binary_config_overrides(plugins))
|
||||
config.update(await sync_to_async(_installed_binary_config_overrides, thread_sensitive=True)(plugins))
|
||||
config["ABX_RUNTIME"] = "archivebox"
|
||||
bus = create_bus(name="ArchiveBox_install", total_timeout=3600.0)
|
||||
process_service = ProcessService(bus)
|
||||
@@ -693,7 +695,9 @@ async def _run_install(plugin_names: list[str] | None = None) -> None:
|
||||
live_stream = None
|
||||
|
||||
try:
|
||||
selected_plugins = prepare_install_plugins(plugins, plugin_names=plugin_names)
|
||||
selected_plugins = filter_plugins(plugins, list(plugin_names), include_providers=True) if plugin_names else plugins
|
||||
if not selected_plugins:
|
||||
return
|
||||
plugins_label = ", ".join(plugin_names) if plugin_names else f"all ({len(plugins)} available)"
|
||||
timeout_seconds = int(config.get("TIMEOUT") or 60)
|
||||
stdout_is_tty = sys.stdout.isatty()
|
||||
@@ -740,7 +744,7 @@ async def _run_install(plugin_names: list[str] | None = None) -> None:
|
||||
interactive_tty=interactive_tty,
|
||||
)
|
||||
live_ui.print_intro(
|
||||
url=INSTALL_URL,
|
||||
url="install",
|
||||
output_dir=output_dir,
|
||||
plugins_label=plugins_label,
|
||||
)
|
||||
|
||||
@@ -1252,7 +1252,7 @@ class TestLiveProgressView:
|
||||
process_type=Process.TypeChoices.HOOK,
|
||||
status=Process.StatusChoices.RUNNING,
|
||||
pid=pid,
|
||||
cmd=["/plugins/chrome/on_Crawl__91_chrome_wait.js", "--url=https://example.com"],
|
||||
cmd=["/plugins/chrome/on_CrawlSetup__91_chrome_wait.js", "--url=https://example.com"],
|
||||
env={
|
||||
"CRAWL_ID": str(snapshot.crawl_id),
|
||||
"SNAPSHOT_ID": str(snapshot.id),
|
||||
|
||||
@@ -5,7 +5,7 @@ import pytest
|
||||
from django.db import connection
|
||||
|
||||
|
||||
from abx_dl.events import BinaryEvent, ProcessCompletedEvent, ProcessStartedEvent
|
||||
from abx_dl.events import BinaryRequestEvent, ProcessCompletedEvent, ProcessStartedEvent
|
||||
from abx_dl.orchestrator import create_bus
|
||||
from abx_dl.output_files import OutputFile
|
||||
|
||||
@@ -515,10 +515,10 @@ def test_binary_event_reuses_existing_installed_binary_row(monkeypatch):
|
||||
)
|
||||
|
||||
service = ArchiveBoxBinaryService(create_bus(name="test_binary_event_reuses_existing_installed_binary_row"))
|
||||
event = BinaryEvent(
|
||||
event = BinaryRequestEvent(
|
||||
name="wget",
|
||||
plugin_name="wget",
|
||||
hook_name="on_Crawl__10_wget_install.finite.bg",
|
||||
hook_name="on_Install__10_wget.finite.bg",
|
||||
output_dir="/tmp/wget",
|
||||
binproviders="provider",
|
||||
)
|
||||
|
||||
@@ -337,7 +337,11 @@ def test_binary_create_stdout_pipes_into_run(initialized_archive):
|
||||
assert create_code == 0, create_stderr
|
||||
_assert_stdout_is_jsonl_only(create_stdout)
|
||||
|
||||
binary = next(record for record in parse_jsonl_output(create_stdout) if record.get("type") == "Binary")
|
||||
binary = next(
|
||||
record
|
||||
for record in parse_jsonl_output(create_stdout)
|
||||
if record.get("type") in {"BinaryRequest", "Binary"}
|
||||
)
|
||||
|
||||
run_stdout, run_stderr, run_code = run_archivebox_cmd(
|
||||
["run"],
|
||||
@@ -349,7 +353,10 @@ def test_binary_create_stdout_pipes_into_run(initialized_archive):
|
||||
_assert_stdout_is_jsonl_only(run_stdout)
|
||||
|
||||
run_records = parse_jsonl_output(run_stdout)
|
||||
assert any(record.get("type") == "Binary" and record.get("id") == binary["id"] for record in run_records)
|
||||
assert any(
|
||||
record.get("type") in {"BinaryRequest", "Binary"} and record.get("id") == binary["id"]
|
||||
for record in run_records
|
||||
)
|
||||
|
||||
status = _db_value(
|
||||
initialized_archive,
|
||||
|
||||
@@ -378,7 +378,7 @@ class TestRecoverOrphanedCrawls:
|
||||
machine=machine,
|
||||
process_type=Process.TypeChoices.HOOK,
|
||||
status=Process.StatusChoices.RUNNING,
|
||||
cmd=["/plugins/chrome/on_Crawl__91_chrome_wait.js"],
|
||||
cmd=["/plugins/chrome/on_CrawlSetup__91_chrome_wait.js"],
|
||||
env={
|
||||
"CRAWL_ID": str(crawl.id),
|
||||
"SNAPSHOT_ID": str(snapshot.id),
|
||||
|
||||
@@ -107,7 +107,7 @@ Hook completed successfully"""
|
||||
stdout = """{"type": "ArchiveResult", "status": "succeeded"}
|
||||
{invalid json here}
|
||||
not json at all
|
||||
{"type": "Binary", "name": "wget"}"""
|
||||
{"type": "BinaryRequest", "name": "wget"}"""
|
||||
from archivebox.machine.models import Process
|
||||
|
||||
records = Process.parse_records_from_text(stdout)
|
||||
@@ -187,7 +187,7 @@ class TestHookDiscovery(unittest.TestCase):
|
||||
wget_dir = self.plugins_dir / "wget"
|
||||
wget_dir.mkdir()
|
||||
(wget_dir / "on_Snapshot__50_wget.py").write_text("# test hook")
|
||||
(wget_dir / "on_Crawl__10_wget_install.finite.bg.py").write_text("# install hook")
|
||||
(wget_dir / "on_Install__10_wget.finite.bg.py").write_text("# install hook")
|
||||
|
||||
chrome_dir = self.plugins_dir / "chrome"
|
||||
chrome_dir.mkdir(exist_ok=True)
|
||||
@@ -231,11 +231,29 @@ class TestHookDiscovery(unittest.TestCase):
|
||||
self.assertEqual(hooks[1].name, "on_Snapshot__21_consolelog.daemon.bg.js")
|
||||
self.assertEqual(hooks[2].name, "on_Snapshot__50_wget.py")
|
||||
|
||||
def test_normalize_hook_event_name_accepts_event_classes(self):
|
||||
"""Hook discovery should normalize bus event class names to hook families."""
|
||||
from archivebox import hooks as hooks_module
|
||||
|
||||
self.assertEqual(hooks_module.normalize_hook_event_name("InstallEvent"), "Install")
|
||||
self.assertEqual(hooks_module.normalize_hook_event_name("BinaryRequestEvent"), "BinaryRequest")
|
||||
self.assertEqual(hooks_module.normalize_hook_event_name("CrawlSetupEvent"), "CrawlSetup")
|
||||
self.assertEqual(hooks_module.normalize_hook_event_name("SnapshotEvent"), "Snapshot")
|
||||
|
||||
def test_normalize_hook_event_name_strips_event_suffix_for_lifecycle_events(self):
|
||||
"""Lifecycle event names should normalize via simple suffix stripping."""
|
||||
from archivebox import hooks as hooks_module
|
||||
|
||||
self.assertEqual(hooks_module.normalize_hook_event_name("BinaryEvent"), "Binary")
|
||||
self.assertEqual(hooks_module.normalize_hook_event_name("CrawlEvent"), "Crawl")
|
||||
self.assertEqual(hooks_module.normalize_hook_event_name("SnapshotCleanupEvent"), "SnapshotCleanup")
|
||||
self.assertEqual(hooks_module.normalize_hook_event_name("CrawlCleanupEvent"), "CrawlCleanup")
|
||||
|
||||
def test_get_plugins_includes_non_snapshot_plugin_dirs(self):
|
||||
"""get_plugins() should include binary-only plugins with standardized metadata."""
|
||||
env_dir = self.plugins_dir / "env"
|
||||
env_dir.mkdir()
|
||||
(env_dir / "on_Binary__15_env_discover.py").write_text("# binary hook")
|
||||
(env_dir / "on_BinaryRequest__15_env.py").write_text("# binary hook")
|
||||
(env_dir / "config.json").write_text('{"type": "object", "properties": {}}')
|
||||
|
||||
from archivebox import hooks as hooks_module
|
||||
@@ -265,7 +283,7 @@ class TestHookDiscovery(unittest.TestCase):
|
||||
|
||||
npm_dir = self.plugins_dir / "npm"
|
||||
npm_dir.mkdir()
|
||||
(npm_dir / "on_Binary__10_npm_install.py").write_text("# npm binary hook")
|
||||
(npm_dir / "on_BinaryRequest__10_npm.py").write_text("# npm binary hook")
|
||||
(npm_dir / "config.json").write_text('{"type": "object", "properties": {}}')
|
||||
|
||||
from archivebox import hooks as hooks_module
|
||||
@@ -275,13 +293,40 @@ class TestHookDiscovery(unittest.TestCase):
|
||||
patch.object(hooks_module, "BUILTIN_PLUGINS_DIR", self.plugins_dir),
|
||||
patch.object(hooks_module, "USER_PLUGINS_DIR", self.test_dir / "user_plugins"),
|
||||
):
|
||||
hooks = hooks_module.discover_hooks("Binary", config={"PLUGINS": "singlefile"})
|
||||
hooks = hooks_module.discover_hooks("BinaryRequest", config={"PLUGINS": "singlefile"})
|
||||
|
||||
hook_names = [hook.name for hook in hooks]
|
||||
self.assertIn("on_Binary__10_npm_install.py", hook_names)
|
||||
self.assertIn("on_BinaryRequest__10_npm.py", hook_names)
|
||||
|
||||
def test_discover_crawl_hooks_only_include_declared_plugin_dependencies(self):
|
||||
"""Crawl hook discovery should include required_plugins without broadening to provider plugins."""
|
||||
def test_discover_hooks_accepts_event_class_names(self):
|
||||
"""discover_hooks should accept InstallEvent / SnapshotEvent class names."""
|
||||
from archivebox import hooks as hooks_module
|
||||
|
||||
hooks_module.get_plugins.cache_clear()
|
||||
with (
|
||||
patch.object(hooks_module, "BUILTIN_PLUGINS_DIR", self.plugins_dir),
|
||||
patch.object(hooks_module, "USER_PLUGINS_DIR", self.test_dir / "user_plugins"),
|
||||
):
|
||||
install_hooks = hooks_module.discover_hooks("InstallEvent", filter_disabled=False)
|
||||
snapshot_hooks = hooks_module.discover_hooks("SnapshotEvent", filter_disabled=False)
|
||||
|
||||
self.assertIn("on_Install__10_wget.finite.bg.py", [hook.name for hook in install_hooks])
|
||||
self.assertIn("on_Snapshot__50_wget.py", [hook.name for hook in snapshot_hooks])
|
||||
|
||||
def test_discover_hooks_returns_empty_for_non_hook_lifecycle_events(self):
|
||||
"""Lifecycle events without a hook family should return no hooks."""
|
||||
from archivebox import hooks as hooks_module
|
||||
|
||||
hooks_module.get_plugins.cache_clear()
|
||||
with (
|
||||
patch.object(hooks_module, "BUILTIN_PLUGINS_DIR", self.plugins_dir),
|
||||
patch.object(hooks_module, "USER_PLUGINS_DIR", self.test_dir / "user_plugins"),
|
||||
):
|
||||
self.assertEqual(hooks_module.discover_hooks("BinaryEvent", filter_disabled=False), [])
|
||||
self.assertEqual(hooks_module.discover_hooks("CrawlCleanupEvent", filter_disabled=False), [])
|
||||
|
||||
def test_discover_install_hooks_only_include_declared_plugin_dependencies(self):
|
||||
"""Install hook discovery should include required_plugins without broadening to provider plugins."""
|
||||
responses_dir = self.plugins_dir / "responses"
|
||||
responses_dir.mkdir()
|
||||
(responses_dir / "config.json").write_text(
|
||||
@@ -297,12 +342,12 @@ class TestHookDiscovery(unittest.TestCase):
|
||||
chrome_dir = self.plugins_dir / "chrome"
|
||||
chrome_dir.mkdir(exist_ok=True)
|
||||
(chrome_dir / "config.json").write_text('{"type": "object", "properties": {}}')
|
||||
(chrome_dir / "on_Crawl__70_chrome_install.finite.bg.py").write_text("# chrome crawl hook")
|
||||
(chrome_dir / "on_Install__70_chrome.finite.bg.py").write_text("# chrome install hook")
|
||||
|
||||
npm_dir = self.plugins_dir / "npm"
|
||||
npm_dir.mkdir()
|
||||
(npm_dir / "on_Binary__10_npm_install.py").write_text("# npm binary hook")
|
||||
(npm_dir / "on_Crawl__00_npm_install.py").write_text("# npm crawl hook")
|
||||
(npm_dir / "on_BinaryRequest__10_npm.py").write_text("# npm binary hook")
|
||||
(npm_dir / "on_Install__00_npm.py").write_text("# npm install hook")
|
||||
(npm_dir / "config.json").write_text('{"type": "object", "properties": {}}')
|
||||
|
||||
from archivebox import hooks as hooks_module
|
||||
@@ -312,11 +357,11 @@ class TestHookDiscovery(unittest.TestCase):
|
||||
patch.object(hooks_module, "BUILTIN_PLUGINS_DIR", self.plugins_dir),
|
||||
patch.object(hooks_module, "USER_PLUGINS_DIR", self.test_dir / "user_plugins"),
|
||||
):
|
||||
hooks = hooks_module.discover_hooks("Crawl", config={"PLUGINS": "responses"})
|
||||
hooks = hooks_module.discover_hooks("Install", config={"PLUGINS": "responses"})
|
||||
|
||||
hook_names = [hook.name for hook in hooks]
|
||||
self.assertIn("on_Crawl__70_chrome_install.finite.bg.py", hook_names)
|
||||
self.assertNotIn("on_Crawl__00_npm_install.py", hook_names)
|
||||
self.assertIn("on_Install__70_chrome.finite.bg.py", hook_names)
|
||||
self.assertNotIn("on_Install__00_npm.py", hook_names)
|
||||
|
||||
|
||||
class TestGetExtractorName(unittest.TestCase):
|
||||
|
||||
@@ -478,7 +478,7 @@ class TestProcessCurrent(TestCase):
|
||||
"""Process.proc should accept a script recorded in DB when wrapped by an interpreter in psutil."""
|
||||
proc = Process.objects.create(
|
||||
machine=Machine.current(),
|
||||
cmd=["/tmp/on_Crawl__90_chrome_launch.daemon.bg.js", "--url=https://example.com/"],
|
||||
cmd=["/tmp/on_CrawlSetup__90_chrome_launch.daemon.bg.js", "--url=https://example.com/"],
|
||||
pid=12345,
|
||||
status=Process.StatusChoices.RUNNING,
|
||||
started_at=timezone.now(),
|
||||
@@ -488,7 +488,7 @@ class TestProcessCurrent(TestCase):
|
||||
os_proc.create_time.return_value = proc.started_at.timestamp()
|
||||
os_proc.cmdline.return_value = [
|
||||
"node",
|
||||
"/tmp/on_Crawl__90_chrome_launch.daemon.bg.js",
|
||||
"/tmp/on_CrawlSetup__90_chrome_launch.daemon.bg.js",
|
||||
"--url=https://example.com/",
|
||||
]
|
||||
|
||||
|
||||
@@ -295,13 +295,35 @@ def test_installed_binary_config_overrides_include_valid_installed_binaries(monk
|
||||
binproviders="env",
|
||||
status=Binary.StatusChoices.INSTALLED,
|
||||
)
|
||||
puppeteer_binary = Binary.objects.create(
|
||||
machine=machine,
|
||||
name="puppeteer",
|
||||
abspath="/tmp/shared-lib/npm/node_modules/.bin/puppeteer",
|
||||
version="24.40.0",
|
||||
binprovider="npm",
|
||||
binproviders="npm",
|
||||
status=Binary.StatusChoices.INSTALLED,
|
||||
)
|
||||
ytdlp_binary = Binary.objects.create(
|
||||
machine=machine,
|
||||
name="yt-dlp",
|
||||
abspath="/tmp/shared-lib/pip/venv/bin/yt-dlp",
|
||||
version="2026.3.17",
|
||||
binprovider="pip",
|
||||
binproviders="pip",
|
||||
status=Binary.StatusChoices.INSTALLED,
|
||||
)
|
||||
|
||||
monkeypatch.setattr(Machine, "current", classmethod(lambda cls: machine))
|
||||
monkeypatch.setattr(Path, "is_file", lambda self: str(self) in {sys.executable, mercury_binary.abspath, wget_binary.abspath})
|
||||
monkeypatch.setattr(
|
||||
Path,
|
||||
"is_file",
|
||||
lambda self: str(self) in {sys.executable, mercury_binary.abspath, wget_binary.abspath, puppeteer_binary.abspath, ytdlp_binary.abspath},
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
runner_module.os,
|
||||
"access",
|
||||
lambda path, mode: str(path) == sys.executable,
|
||||
lambda path, mode: str(path) in {sys.executable, puppeteer_binary.abspath, ytdlp_binary.abspath},
|
||||
)
|
||||
|
||||
overrides = runner_module._installed_binary_config_overrides(
|
||||
@@ -316,8 +338,17 @@ def test_installed_binary_config_overrides_include_valid_installed_binaries(monk
|
||||
)
|
||||
|
||||
assert overrides["MERCURY_BINARY"] == sys.executable
|
||||
assert overrides["POSTLIGHT_PARSER_BINARY"] == sys.executable
|
||||
assert "POSTLIGHT_PARSER_BINARY" not in overrides
|
||||
assert "WGET_BINARY" not in overrides
|
||||
assert overrides["LIB_DIR"] == "/tmp/shared-lib"
|
||||
assert overrides["LIB_BIN_DIR"] == "/tmp/shared-lib/bin"
|
||||
assert overrides["PIP_HOME"] == "/tmp/shared-lib/pip"
|
||||
assert overrides["PIP_BIN_DIR"] == "/tmp/shared-lib/pip/venv/bin"
|
||||
assert overrides["NPM_HOME"] == "/tmp/shared-lib/npm"
|
||||
assert overrides["NPM_BIN_DIR"] == "/tmp/shared-lib/npm/node_modules/.bin"
|
||||
assert overrides["NODE_MODULES_DIR"] == "/tmp/shared-lib/npm/node_modules"
|
||||
assert overrides["NODE_MODULE_DIR"] == "/tmp/shared-lib/npm/node_modules"
|
||||
assert overrides["NODE_PATH"] == "/tmp/shared-lib/npm/node_modules"
|
||||
|
||||
|
||||
def test_run_snapshot_skips_descendant_when_max_size_already_reached(monkeypatch):
|
||||
@@ -707,10 +738,10 @@ def test_abx_process_service_background_monitor_finishes_after_process_exit(monk
|
||||
|
||||
plugin_output_dir = tmp_path / "chrome"
|
||||
plugin_output_dir.mkdir()
|
||||
stdout_file = plugin_output_dir / "on_Crawl__90_chrome_launch.daemon.bg.stdout.log"
|
||||
stderr_file = plugin_output_dir / "on_Crawl__90_chrome_launch.daemon.bg.stderr.log"
|
||||
stdout_file = plugin_output_dir / "on_CrawlSetup__90_chrome_launch.daemon.bg.stdout.log"
|
||||
stderr_file = plugin_output_dir / "on_CrawlSetup__90_chrome_launch.daemon.bg.stderr.log"
|
||||
stderr_file.write_text("")
|
||||
pid_file = plugin_output_dir / "on_Crawl__90_chrome_launch.daemon.bg.pid"
|
||||
pid_file = plugin_output_dir / "on_CrawlSetup__90_chrome_launch.daemon.bg.pid"
|
||||
pid_file.write_text("12345")
|
||||
|
||||
proc = AbxProcess(
|
||||
@@ -719,12 +750,12 @@ def test_abx_process_service_background_monitor_finishes_after_process_exit(monk
|
||||
timeout=60,
|
||||
started_at=now_iso(),
|
||||
plugin="chrome",
|
||||
hook_name="on_Crawl__90_chrome_launch.daemon.bg",
|
||||
hook_name="on_CrawlSetup__90_chrome_launch.daemon.bg",
|
||||
)
|
||||
process = FakeAsyncProcess()
|
||||
event = SimpleNamespace(
|
||||
plugin_name="chrome",
|
||||
hook_name="on_Crawl__90_chrome_launch.daemon.bg",
|
||||
hook_name="on_CrawlSetup__90_chrome_launch.daemon.bg",
|
||||
hook_path="hook",
|
||||
hook_args=["--url=https://example.org/"],
|
||||
env={},
|
||||
|
||||
@@ -644,7 +644,7 @@ Binary(queued) → BinaryMachine → Binary.run() → succeeded/failed
|
||||
|
||||
#### Benefits of Eliminating Dependency
|
||||
1. **No global singleton conflicts**: Binary is per-machine, no race conditions
|
||||
2. **Simpler data model**: One table instead of two (Dependency + InstalledBinary)
|
||||
2. **Simpler data model**: One table instead of two (Dependency + Binary)
|
||||
3. **Static configuration**: dependencies.jsonl in version control, not database
|
||||
4. **Consistent state machine**: Binary follows same pattern as other models
|
||||
5. **Cleaner hooks**: Hooks check bin_providers themselves instead of orchestrator parsing names
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "archivebox"
|
||||
version = "0.9.10rc2"
|
||||
version = "0.9.12rc1"
|
||||
requires-python = ">=3.13"
|
||||
description = "Self-hosted internet archiving solution."
|
||||
authors = [{name = "Nick Sweeting", email = "pyproject.toml@archivebox.io"}]
|
||||
@@ -78,10 +78,10 @@ dependencies = [
|
||||
"w3lib>=2.2.1", # used for parsing content-type encoding from http response headers & html tags
|
||||
### Extractor dependencies (optional binaries detected at runtime via shutil.which)
|
||||
### Binary/Package Management
|
||||
"abxbus>=2.4.2", # explicit direct dep so local dev env resolves sibling abxbus repo, matching abx-dl EventBus API
|
||||
"abx-pkg>=1.9.18", # for: detecting, versioning, and installing binaries via apt/brew/pip/npm
|
||||
"abx-plugins>=1.10.14", # shared ArchiveBox plugin package with install_args-only overrides
|
||||
"abx-dl>=1.10.14", # shared ArchiveBox downloader package with install_args-only overrides
|
||||
"abxbus>=2.4.9", # explicit direct dep so local dev env resolves sibling abxbus repo, matching abx-dl EventBus API
|
||||
"abx-pkg>=1.9.19", # for: detecting, versioning, and installing binaries via apt/brew/pip/npm
|
||||
"abx-plugins>=1.10.19", # shared ArchiveBox plugin package with install_args-only overrides
|
||||
"abx-dl>=1.10.19", # shared ArchiveBox downloader package with install_args-only overrides
|
||||
### UUID7 backport for Python <3.14
|
||||
"uuid7>=0.1.0; python_version < '3.14'", # provides the uuid_extensions module on Python 3.13
|
||||
]
|
||||
|
||||
72
uv.lock
generated
72
uv.lock
generated
@@ -14,7 +14,7 @@ supported-markers = [
|
||||
|
||||
[[package]]
|
||||
name = "abx-dl"
|
||||
version = "1.10.14"
|
||||
version = "1.10.19"
|
||||
source = { editable = "../abx-dl" }
|
||||
dependencies = [
|
||||
{ name = "abx-pkg", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
@@ -59,7 +59,7 @@ dev = [
|
||||
|
||||
[[package]]
|
||||
name = "abx-pkg"
|
||||
version = "1.9.18"
|
||||
version = "1.9.19"
|
||||
source = { editable = "../abx-pkg" }
|
||||
dependencies = [
|
||||
{ name = "pip", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
@@ -101,7 +101,7 @@ dev = [
|
||||
|
||||
[[package]]
|
||||
name = "abx-plugins"
|
||||
version = "1.10.14"
|
||||
version = "1.10.19"
|
||||
source = { editable = "../abx-plugins" }
|
||||
dependencies = [
|
||||
{ name = "abx-pkg", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
@@ -130,7 +130,7 @@ dev = [{ name = "prek", specifier = ">=0.3.6" }]
|
||||
|
||||
[[package]]
|
||||
name = "abxbus"
|
||||
version = "2.4.8"
|
||||
version = "2.4.9"
|
||||
source = { editable = "../abxbus" }
|
||||
dependencies = [
|
||||
{ name = "aiofiles", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
@@ -218,7 +218,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "archivebox"
|
||||
version = "0.9.10rc2"
|
||||
version = "0.9.12rc1"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "abx-dl", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
@@ -524,21 +524,21 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "cbor2"
|
||||
version = "5.8.0"
|
||||
version = "5.9.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/d9/8e/8b4fdde28e42ffcd741a37f4ffa9fb59cd4fe01625b544dfcfd9ccb54f01/cbor2-5.8.0.tar.gz", hash = "sha256:b19c35fcae9688ac01ef75bad5db27300c2537eb4ee00ed07e05d8456a0d4931", size = 107825, upload-time = "2025-12-30T18:44:22.455Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/bd/cb/09939728be094d155b5d4ac262e39877875f5f7e36eea66beb359f647bd0/cbor2-5.9.0.tar.gz", hash = "sha256:85c7a46279ac8f226e1059275221e6b3d0e370d2bb6bd0500f9780781615bcea", size = 111231, upload-time = "2026-03-22T15:56:50.638Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/a6/0d/5a3f20bafaefeb2c1903d961416f051c0950f0d09e7297a3aa6941596b29/cbor2-5.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6d8d104480845e2f28c6165b4c961bbe58d08cb5638f368375cfcae051c28015", size = 70332, upload-time = "2025-12-30T18:43:54.694Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/57/66/177a3f089e69db69c987453ab4934086408c3338551e4984734597be9f80/cbor2-5.8.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:43efee947e5ab67d406d6e0dc61b5dee9d2f5e89ae176f90677a3741a20ca2e7", size = 285985, upload-time = "2025-12-30T18:43:55.733Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b7/8e/9e17b8e4ed80a2ce97e2dfa5915c169dbb31599409ddb830f514b57f96cc/cbor2-5.8.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:be7ae582f50be539e09c134966d0fd63723fc4789b8dff1f6c2e3f24ae3eaf32", size = 285173, upload-time = "2025-12-30T18:43:57.321Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/cc/33/9f92e107d78f88ac22723ac15d0259d220ba98c1d855e51796317f4c4114/cbor2-5.8.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:50f5c709561a71ea7970b4cd2bf9eda4eccacc0aac212577080fdfe64183e7f5", size = 278395, upload-time = "2025-12-30T18:43:58.497Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2f/3f/46b80050a4a35ce5cf7903693864a9fdea7213567dc8faa6e25cb375c182/cbor2-5.8.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a6790ecc73aa93e76d2d9076fc42bf91a9e69f2295e5fa702e776dbe986465bd", size = 278330, upload-time = "2025-12-30T18:43:59.656Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4b/0c/0654233d7543ac8a50f4785f172430ddc97538ba418eb305d6e529d1a120/cbor2-5.8.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ad72381477133046ce217617d839ea4e9454f8b77d9a6351b229e214102daeb7", size = 70710, upload-time = "2025-12-30T18:44:03.209Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/84/62/4671d24e557d7f5a74a01b422c538925140c0495e57decde7e566f91d029/cbor2-5.8.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6da25190fad3434ce99876b11d4ca6b8828df6ca232cf7344cd14ae1166fb718", size = 285005, upload-time = "2025-12-30T18:44:05.109Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/87/85/0c67d763a08e848c9a80d7e4723ba497cce676f41bc7ca1828ae90a0a872/cbor2-5.8.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c13919e3a24c5a6d286551fa288848a4cedc3e507c58a722ccd134e461217d99", size = 282435, upload-time = "2025-12-30T18:44:06.465Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b2/01/0650972b4dbfbebcfbe37cbba7fc3cd9019a8da6397ab3446e07175e342b/cbor2-5.8.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f8c40d32e5972047a777f9bf730870828f3cf1c43b3eb96fd0429c57a1d3b9e6", size = 277493, upload-time = "2025-12-30T18:44:07.609Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b3/6c/7704a4f32adc7f10f3b41ec067f500a4458f7606397af5e4cf2d368fd288/cbor2-5.8.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7627894bc0b3d5d0807f31e3107e11b996205470c4429dc2bb4ef8bfe7f64e1e", size = 276085, upload-time = "2025-12-30T18:44:09.021Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d6/4f/101071f880b4da05771128c0b89f41e334cff044dee05fb013c8f4be661c/cbor2-5.8.0-py3-none-any.whl", hash = "sha256:3727d80f539567b03a7aa11890e57798c67092c38df9e6c23abb059e0f65069c", size = 24374, upload-time = "2025-12-30T18:44:21.476Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/81/c5/4901e21a8afe9448fd947b11e8f383903207cd6dd0800e5f5a386838de5b/cbor2-5.9.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:fbb06f34aa645b4deca66643bba3d400d20c15312d1fe88d429be60c1ab50f27", size = 71284, upload-time = "2026-03-22T15:56:22.836Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1b/10/df643a381aebc3f05486de4813662bc58accb640fc3275cb276a75e89694/cbor2-5.9.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ac684fe195c39821fca70d18afbf748f728aefbfbf88456018d299e559b8cae0", size = 287682, upload-time = "2026-03-22T15:56:24.024Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c6/0c/8aa6b766059ae4a0ca1ec3ff96fe3823a69a7be880dba2e249f7fbe2700b/cbor2-5.9.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2a54fbb32cb828c214f7f333a707e4aec61182e7efdc06ea5d9596d3ecee624a", size = 288009, upload-time = "2026-03-22T15:56:25.305Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/74/07/6236bc25c183a9cf7e8062e5dddf9eae9b0b14ebf14a58a69fe5a1e872c6/cbor2-5.9.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4753a6d1bc71054d9179557bc65740860f185095ccb401d46637fff028a5b3ec", size = 280437, upload-time = "2026-03-22T15:56:26.479Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4e/0a/84328d23c3c68874ac6497edb9b1900579a1028efa54734df3f1762bbc15/cbor2-5.9.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:380e534482b843e43442b87d8777a7bf9bed20cb7526f89b780c3400f617304b", size = 282247, upload-time = "2026-03-22T15:56:28.644Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/08/7d/9ccc36d10ef96e6038e48046ebe1ce35a1e7814da0e1e204d09e6ef09b8d/cbor2-5.9.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:23606d31ba1368bd1b6602e3020ee88fe9523ca80e8630faf6b2fc904fd84560", size = 71500, upload-time = "2026-03-22T15:56:31.876Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/70/e1/a6cca2cc72e13f00030c6a649f57ae703eb2c620806ab70c40db8eab33fa/cbor2-5.9.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0322296b9d52f55880e300ba8ba09ecf644303b99b51138bbb1c0fb644fa7c3e", size = 286953, upload-time = "2026-03-22T15:56:33.292Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/08/3c/24cd5ef488a957d90e016f200a3aad820e4c2f85edd61c9fe4523007a1ee/cbor2-5.9.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:422817286c1d0ce947fb2f7eca9212b39bddd7231e8b452e2d2cc52f15332dba", size = 285454, upload-time = "2026-03-22T15:56:34.703Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a4/35/dca96818494c0ba47cdd73e8d809b27fa91f8fa0ce32a068a09237687454/cbor2-5.9.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9a4907e0c3035bb8836116854ed8e56d8aef23909d601fa59706320897ec2551", size = 279441, upload-time = "2026-03-22T15:56:35.888Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a4/44/d3362378b16e53cf7e535a3f5aed8476e2109068154e24e31981ef5bde9e/cbor2-5.9.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:fb7afe77f8d269e42d7c4b515c6fd14f1ccc0625379fb6829b269f493d16eddd", size = 279673, upload-time = "2026-03-22T15:56:37.08Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/42/ff/b83492b096fbef26e9cb62c1a4bf2d3cef579ea7b33138c6c37c4ae66f67/cbor2-5.9.0-py3-none-any.whl", hash = "sha256:27695cbd70c90b8de5c4a284642c2836449b14e2c2e07e3ffe0744cb7669a01b", size = 24627, upload-time = "2026-03-22T15:56:48.847Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1279,7 +1279,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "logfire"
|
||||
version = "4.29.0"
|
||||
version = "4.30.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "executing", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
@@ -1290,9 +1290,9 @@ dependencies = [
|
||||
{ name = "rich", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/8f/40/3d09fe09cfa63753feada2d41dd909ce0741dd5731014a4b3eb31bdee977/logfire-4.29.0.tar.gz", hash = "sha256:18a306a0b5744aee8ad0a8f5d6b3a47a6d8951c340eaecc42dc5d0224f4bdca0", size = 1057563, upload-time = "2026-03-13T15:30:24.343Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/03/77/ed3b6453c0c8027724ceb968ca17e550c47e58cdb5dc27458392db40e327/logfire-4.30.0.tar.gz", hash = "sha256:460ed1a7433d88570659903f31b6f9b70903110addbb18b1cf7b414cdb516bb5", size = 1058676, upload-time = "2026-03-23T17:08:28.944Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/73/aa/fb8102ea48924fbbb9dfced7bada5717875801808ad53f9a60b6b4fec440/logfire-4.29.0-py3-none-any.whl", hash = "sha256:8dd7fdf6bed21459b8893eaa290d61977b9ebcc901844e365ddee868b5d8bca8", size = 302227, upload-time = "2026-03-13T15:30:20.742Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d9/3a/ead5b87ff38292e0ef800b1d184a9a4eedf9f7ce1cf86264b4798a0a8b14/logfire-4.30.0-py3-none-any.whl", hash = "sha256:a520a2b6da7765bc15143fd4098c6f9ec56a836bf3a046f06c823c73af932f3a", size = 302618, upload-time = "2026-03-23T17:08:25.923Z" },
|
||||
]
|
||||
|
||||
[package.optional-dependencies]
|
||||
@@ -1727,23 +1727,23 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "prek"
|
||||
version = "0.3.6"
|
||||
version = "0.3.8"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/ab/e4/983840179c652feb9793c95b88abfe4b1f1d1aed7a791b45db97241be1a0/prek-0.3.6.tar.gz", hash = "sha256:bdf5c1e13ba0c04c2f488c5f90b1fd97a72aa740dc373b17fbbfc51898fa0377", size = 378106, upload-time = "2026-03-16T08:31:54.302Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/62/ee/03e8180e3fda9de25b6480bd15cc2bde40d573868d50648b0e527b35562f/prek-0.3.8.tar.gz", hash = "sha256:434a214256516f187a3ab15f869d950243be66b94ad47987ee4281b69643a2d9", size = 400224, upload-time = "2026-03-23T08:23:35.981Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/04/05/157631f14fef32361a36956368a1e6559d857443d7585bc4c9225f4a4a18/prek-0.3.6-py3-none-linux_armv6l.whl", hash = "sha256:1713119cf0c390486786f4c84450ea584bcdf43979cc28e1350ec62e5d9a41ed", size = 5126301, upload-time = "2026-03-16T08:31:31.194Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/54/f0/0918501708994d165c4bfc64c5749a263d04a08ae1196f3ad3b2e0d93b12/prek-0.3.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:b68ef211fa60c53ec8866dcf38bacd8cb86b14f0e2b5491dd7a42370bee32e3e", size = 5527520, upload-time = "2026-03-16T08:31:41.948Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e3/9f/0d8ed2eaea58d8a7c5a3b0129914b7a73cd1a1fc7513a1d6b1efa0ec4ce4/prek-0.3.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:327b9030c3424c9fbcdf962992288295e89afe54fa94a7e0928e2691d1d2b53d", size = 5120490, upload-time = "2026-03-16T08:31:29.808Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d8/d5/63e21d19687816082df5bfd234f451b17858b37f500e2a8845cda1a031db/prek-0.3.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:61de3f019f5a082688654139fd9a3e03f74dbd4a09533667714d28833359114d", size = 5355957, upload-time = "2026-03-16T08:31:37.408Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e2/0e/bb52a352e5d7dc92eaebb69aeef4e5b7cddc47c646e24fe9d6a61956b45d/prek-0.3.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5bbba688c5283c8e8c907fb00f7c79fce630129f27f77cbee67e356fcfdedea8", size = 5055675, upload-time = "2026-03-16T08:31:40.311Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/34/8b/7c2a49314eb4909d50ee1c2171e00d524f9e080a5be598effbe36158d35c/prek-0.3.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5dfe26bc2675114734fa626e7dc635f76e53a28fed7470ba6f32caf2f29cc21f", size = 5459285, upload-time = "2026-03-16T08:31:32.764Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/70/11/86cbf205b111f93d45b5c04a61ea2cdcf12970b11277fa6a8eef1b8aaa0d/prek-0.3.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3f8121060b4610411a936570ebb03b0f78c1b637c25d4914885b3bba127cb554", size = 6391127, upload-time = "2026-03-16T08:31:52.587Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/0a/d3/bae4a351b9b095e317ad294817d3dff980d73a907a0449b49a9549894a80/prek-0.3.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a38d8061caae4ffd757316b9ef65409d808ae92482386385413365bad033c26", size = 5734755, upload-time = "2026-03-16T08:31:34.387Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ea/48/5b1d6d91407e14f86daf580a93f073d00b70f4dca8ff441d40971652a38e/prek-0.3.6-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:3d9e3b5031608657bec5d572fa45a41b6c7ddbe98f925f8240addbf57af55ea7", size = 5362190, upload-time = "2026-03-16T08:31:49.403Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/08/18/38d6ea85770bb522d3dad18e8bbe435365e1e3e88f67716c2d8c2e57a36a/prek-0.3.6-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:a581d2903be460a236748fb3cfcb5b7dbe5b4af2409f06c0427b637676d4b78a", size = 5181858, upload-time = "2026-03-16T08:31:43.515Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3b/61/7179e9faffa3722a96fee8d9cebdb3982390410b85fc2aaeacfe49c361b5/prek-0.3.6-py3-none-musllinux_1_1_armv7l.whl", hash = "sha256:d663f1c467dccbd414ab0caa323230f33aa27797c575d98af1013866e1f83a12", size = 5023469, upload-time = "2026-03-16T08:31:35.975Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ad/69/8a496892f8c9c898dea8cfe4917bbd58808367975132457b5ab5ac095269/prek-0.3.6-py3-none-musllinux_1_1_i686.whl", hash = "sha256:cbc7f0b344432630e990a6c6dd512773fbb7253c8df3c3f78eedd80b115ed3c9", size = 5322570, upload-time = "2026-03-16T08:31:51.034Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/95/ee/f174bcfd73e8337a4290cb7eaf70b37aaec228e4f5d5ec6e61e0546ee896/prek-0.3.6-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:6ef02ce9d2389daae85f099fd4f34aa5537e3670b5e2a3174c9110ce69958c10", size = 5848197, upload-time = "2026-03-16T08:31:44.975Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/00/84/40d2ddf362d12c4cd4a25a8c89a862edf87cdfbf1422aa41aac8e315d409/prek-0.3.8-py3-none-linux_armv6l.whl", hash = "sha256:6fb646ada60658fa6dd7771b2e0fb097f005151be222f869dada3eb26d79ed33", size = 5226646, upload-time = "2026-03-23T08:23:18.306Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e1/52/7308a033fa43b7e8e188797bd2b3b017c0f0adda70fa7af575b1f43ea888/prek-0.3.8-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:f3d7fdadb15efc19c09953c7a33cf2061a70f367d1e1957358d3ad5cc49d0616", size = 5620104, upload-time = "2026-03-23T08:23:40.053Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ff/b1/f106ac000a91511a9cd80169868daf2f5b693480ef5232cec5517a38a512/prek-0.3.8-py3-none-macosx_11_0_arm64.whl", hash = "sha256:72728c3295e79ca443f8c1ec037d2a5b914ec73a358f69cf1bc1964511876bf8", size = 5199867, upload-time = "2026-03-23T08:23:38.066Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b3/e9/970713f4b019f69de9844e1bab37b8ddb67558e410916f4eb5869a696165/prek-0.3.8-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:48efc28f2f53b5b8087efca9daaed91572d62df97d5f24a1c7a087fecb5017de", size = 5441801, upload-time = "2026-03-23T08:23:32.617Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/12/a4/7ef44032b181753e19452ec3b09abb3a32607cf6b0a0508f0604becaaf2b/prek-0.3.8-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f6ca9d63bacbc448a5c18e955c78d3ac5176c3a17c3baacdd949b1a623e08a36", size = 5155107, upload-time = "2026-03-23T08:23:31.021Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/bd/77/4d9c8985dbba84149760785dfe07093ea1e29d710257dfb7c89615e2234c/prek-0.3.8-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1000f7029696b4fe712fb1fefd4c55b9c4de72b65509c8e50296370a06f9dc3f", size = 5566541, upload-time = "2026-03-23T08:23:45.694Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1a/1a/81e6769ac1f7f8346d09ce2ab0b47cf06466acd9ff72e87e5d1f0d98cd32/prek-0.3.8-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6ff0bed0e2c1286522987d982168a86cbbd0d069d840506a46c9fda983515517", size = 6552991, upload-time = "2026-03-23T08:23:21.958Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6f/fa/ce2df0dd2dc75a9437a52463239d0782998943d7b04e191fb89b83016c34/prek-0.3.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4fb087ac0ffda3ac65bbbae9a38326a7fd27ee007bb4a94323ce1eb539d8bbec", size = 5832972, upload-time = "2026-03-23T08:23:20.258Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/18/6b/9d4269df9073216d296244595a21c253b6475dfc9076c0bd2906be7a436c/prek-0.3.8-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:2e1e5e206ff7b31bd079cce525daddc96cd6bc544d20dc128921ad92f7a4c85d", size = 5448371, upload-time = "2026-03-23T08:23:41.835Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/60/1d/1e4d8a78abefa5b9d086e5a9f1638a74b5e540eec8a648d9946707701f29/prek-0.3.8-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:dcea3fe23832a4481bccb7c45f55650cb233be7c805602e788bb7dba60f2d861", size = 5270546, upload-time = "2026-03-23T08:23:24.231Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/77/07/34f36551a6319ae36e272bea63a42f59d41d2d47ab0d5fb00eb7b4e88e87/prek-0.3.8-py3-none-musllinux_1_1_armv7l.whl", hash = "sha256:4d25e647e9682f6818ab5c31e7a4b842993c14782a6ffcd128d22b784e0d677f", size = 5124032, upload-time = "2026-03-23T08:23:26.368Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e3/01/6d544009bb655e709993411796af77339f439526db4f3b3509c583ad8eb9/prek-0.3.8-py3-none-musllinux_1_1_i686.whl", hash = "sha256:de528b82935e33074815acff3c7c86026754d1212136295bc88fe9c43b4231d5", size = 5432245, upload-time = "2026-03-23T08:23:47.877Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/54/96/1237ee269e9bfa283ffadbcba1f401f48a47aed2b2563eb1002740d6079d/prek-0.3.8-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:6d660f1c25a126e6d9f682fe61449441226514f412a4469f5d71f8f8cad56db2", size = 5950550, upload-time = "2026-03-23T08:23:43.8Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
Reference in New Issue
Block a user