Reuse cached binaries in archivebox runtime

This commit is contained in:
Nick Sweeting
2026-03-24 11:03:43 -07:00
parent 39450111dd
commit 50286d3c38
19 changed files with 714 additions and 564 deletions

View File

@@ -14,11 +14,10 @@ EVENT_FLOW_DIAGRAM = """
├─────────────────────────────────────────────────────────────────────────────┤
│ │
│ InstallEvent │
│ └─ on_Install__*
│ └─ BinaryRequest records
│ └─ BinaryRequestEvent
│ └─ on_BinaryRequest__*
│ └─ BinaryEvent / MachineEvent │
│ └─ config.json > required_binaries
│ └─ BinaryRequestEvent
│ └─ on_BinaryRequest__*
│ └─ BinaryEvent
│ │
│ CrawlEvent │
│ └─ CrawlSetupEvent │
@@ -70,15 +69,15 @@ def pluginmap(
event_phases = {
"InstallEvent": {
"description": "Pre-run dependency phase. on_Install hooks request binaries and update machine config.",
"emits": ["BinaryRequestEvent", "BinaryEvent", "MachineEvent", "ProcessEvent"],
"description": "Pre-run dependency phase. Enabled plugins emit BinaryRequest events from config.json required_binaries.",
"emits": ["BinaryRequestEvent", "BinaryEvent", "ProcessEvent"],
},
"BinaryRequestEvent": {
"description": "Provider phase. on_BinaryRequest hooks resolve or install requested binaries.",
"emits": ["BinaryEvent", "MachineEvent", "ProcessEvent"],
"emits": ["BinaryEvent", "ProcessEvent"],
},
"BinaryEvent": {
"description": "Resolved binary metadata event. Projected into the DB/runtime config.",
"description": "Resolved binary metadata event. Projected into the DB binary cache.",
"emits": [],
},
"CrawlEvent": {
@@ -87,11 +86,11 @@ def pluginmap(
},
"CrawlSetupEvent": {
"description": "Crawl-scoped setup phase. on_CrawlSetup hooks launch/configure shared daemons and runtime state.",
"emits": ["MachineEvent", "ProcessEvent"],
"emits": ["ProcessEvent"],
},
"SnapshotEvent": {
"description": "Per-snapshot extraction phase. on_Snapshot hooks emit ArchiveResult, Snapshot, Tag, Machine, and BinaryRequest records.",
"emits": ["ArchiveResultEvent", "SnapshotEvent", "TagEvent", "MachineEvent", "BinaryRequestEvent", "ProcessEvent"],
"description": "Per-snapshot extraction phase. on_Snapshot hooks emit ArchiveResult, Snapshot, Tag, and BinaryRequest records.",
"emits": ["ArchiveResultEvent", "SnapshotEvent", "TagEvent", "BinaryRequestEvent", "ProcessEvent"],
},
"SnapshotCleanupEvent": {
"description": "Internal snapshot cleanup phase.",

View File

@@ -5,7 +5,6 @@ __package__ = "archivebox.cli"
import sys
import os
import platform
import logging
from pathlib import Path
from collections.abc import Iterable
@@ -124,17 +123,19 @@ def version(
setup_django()
from archivebox.machine.models import Machine, Binary
from archivebox.config.views import KNOWN_BINARIES, canonical_binary_name
from abx_dl.dependencies import load_binary
machine = Machine.current()
requested_names = {canonical_binary_name(name) for name in binaries} if binaries else set()
if isinstance(binaries, str):
requested_names = {name.strip() for name in binaries.split(",") if name.strip()}
else:
requested_names = {name for name in (binaries or ()) if name}
db_binaries = {
canonical_binary_name(binary.name): binary for binary in Binary.objects.filter(machine=machine).order_by("name", "-modified_at")
}
all_binary_names = sorted(set(KNOWN_BINARIES) | set(db_binaries.keys()))
db_binaries: dict[str, Binary] = {}
for binary in Binary.objects.filter(machine=machine).order_by("name", "-modified_at"):
db_binaries.setdefault(binary.name, binary)
all_binary_names = sorted(requested_names or set(db_binaries.keys()))
if not all_binary_names:
prnt("", "[grey53]No binaries detected. Run [green]archivebox install[/green] to detect dependencies.[/grey53]")
@@ -163,37 +164,10 @@ def version(
any_available = True
continue
loaded = None
try:
abx_pkg_logger = logging.getLogger("abx_pkg")
previous_level = abx_pkg_logger.level
abx_pkg_logger.setLevel(logging.CRITICAL)
try:
loaded = load_binary({"name": name, "binproviders": "env,pip,npm,brew,apt"})
finally:
abx_pkg_logger.setLevel(previous_level)
except Exception:
loaded = None
if loaded and loaded.is_valid and loaded.loaded_abspath:
display_path = str(loaded.loaded_abspath).replace(str(DATA_DIR), ".").replace(str(Path("~").expanduser()), "~")
version_str = str(loaded.loaded_version or "unknown")[:15]
provider = str(getattr(getattr(loaded, "loaded_binprovider", None), "name", "") or "env")[:8]
prnt(
"",
"[green]√[/green]",
"",
name.ljust(18),
version_str.ljust(16),
provider.ljust(8),
display_path,
overflow="ignore",
crop=False,
)
any_available = True
continue
prnt("", "[red]X[/red]", "", name.ljust(18), "[grey53]not installed[/grey53]", overflow="ignore", crop=False)
status = (
"[grey53]not recorded[/grey53]" if name in requested_names and installed is None else "[grey53]not installed[/grey53]"
)
prnt("", "[red]X[/red]", "", name.ljust(18), status, overflow="ignore", crop=False)
failures.append(name)
if not any_available: