This commit is contained in:
Nick Sweeting
2026-03-15 18:45:29 -07:00
parent f97725d16f
commit 934e02695b
111 changed files with 919 additions and 461 deletions

View File

@@ -94,10 +94,10 @@ class Orchestrator:
self.POLL_INTERVAL = 0.25
# Exit quickly once idle in foreground mode
self.IDLE_TIMEOUT = 1
def __repr__(self) -> str:
return f'[underline]Orchestrator[/underline]\\[pid={self.pid}]'
@classmethod
def is_running(cls) -> bool:
"""Check if an orchestrator is already running."""
@@ -223,7 +223,7 @@ class Orchestrator:
process_type=Process.TypeChoices.WORKER,
status=Process.StatusChoices.RUNNING,
)
def should_spawn_worker(self, WorkerClass: Type[Worker], queue_count: int) -> bool:
"""Determine if we should spawn a new worker."""
if queue_count == 0:
@@ -253,7 +253,7 @@ class Orchestrator:
return False
return True
def spawn_worker(self, WorkerClass: Type[Worker]) -> int | None:
"""Spawn a new worker process. Returns PID or None if spawn failed."""
try:
@@ -286,7 +286,10 @@ class Orchestrator:
print(f'[yellow]DEBUG spawn_worker: elapsed={elapsed:.1f}s pid={pid} orchestrator_id={self.db_process.id}[/yellow]')
print(f'[yellow] Found {len(all_procs)} Process records for pid={pid}[/yellow]')
for p in all_procs:
print(f'[yellow] -> type={p.process_type} status={p.status} parent_id={p.parent_id} match={p.parent_id == self.db_process.id}[/yellow]')
print(
f'[yellow] -> type={p.process_type} status={p.status} '
f'parent_id={p.parent_id} match={p.parent_id == self.db_process.id}[/yellow]'
)
worker_process = Process.objects.filter(
pid=pid,
@@ -324,7 +327,7 @@ class Orchestrator:
error=e,
)
return None
def check_queues_and_spawn_workers(self) -> dict[str, int]:
"""
Check Binary and Crawl queues and spawn workers as needed.
@@ -584,11 +587,11 @@ class Orchestrator:
def has_pending_work(self, queue_sizes: dict[str, int]) -> bool:
"""Check if any queue has pending work."""
return any(count > 0 for count in queue_sizes.values())
def has_running_workers(self) -> bool:
"""Check if any workers are still running."""
return self.get_total_worker_count() > 0
def has_future_work(self) -> bool:
"""Check if there's work scheduled for the future (retry_at > now) in Crawl queue."""
from archivebox.crawls.models import Crawl
@@ -605,38 +608,38 @@ class Orchestrator:
qs = qs.filter(id=self.crawl_id)
return qs.count() > 0
def on_tick(self, queue_sizes: dict[str, int]) -> None:
"""Called each orchestrator tick. Override for custom behavior."""
# Tick logging suppressed to reduce noise
pass
def on_idle(self) -> None:
"""Called when orchestrator is idle (no work, no workers)."""
# Idle logging suppressed to reduce noise
pass
def should_exit(self, queue_sizes: dict[str, int]) -> bool:
"""Determine if orchestrator should exit."""
if not self.exit_on_idle:
return False
if self.IDLE_TIMEOUT == 0:
return False
# Don't exit if there's pending or future work
if self.has_pending_work(queue_sizes):
return False
if self.has_running_workers():
return False
if self.has_future_work():
return False
# Exit after idle timeout
return self.idle_count >= self.IDLE_TIMEOUT
def runloop(self) -> None:
"""Main orchestrator loop."""
from rich.live import Live
@@ -702,7 +705,7 @@ class Orchestrator:
os.close(devnull_fd)
os.close(stdout_for_restore)
os.close(stderr_for_restore)
except:
except OSError:
pass
# stdout_for_console is closed by orchestrator_console
@@ -1132,7 +1135,6 @@ class Orchestrator:
# Count hooks by status for debugging
queued = snapshot.archiveresult_set.filter(status='queued').count()
started = snapshot.archiveresult_set.filter(status='started').count()
# Find currently running hook (ordered by hook_name to get lowest step number)
current_ar = snapshot.archiveresult_set.filter(status='started').order_by('hook_name').first()
@@ -1211,7 +1213,7 @@ class Orchestrator:
for snapshot_id in list(snapshot_progress.keys()):
if snapshot_id not in active_ids:
progress_layout.log_event(
f"Snapshot completed/removed",
"Snapshot completed/removed",
style="blue"
)
if snapshot_id in snapshot_progress:
@@ -1263,7 +1265,7 @@ class Orchestrator:
raise
else:
self.on_shutdown()
def start(self) -> int:
"""
Fork orchestrator as a background process.
@@ -1285,7 +1287,7 @@ class Orchestrator:
pid=proc.pid,
)
return proc.pid
@classmethod
def get_or_start(cls, exit_on_idle: bool = True) -> 'Orchestrator':
"""
@@ -1296,6 +1298,6 @@ class Orchestrator:
print('[grey53]👨‍✈️ Orchestrator already running[/grey53]')
# Return a placeholder - actual orchestrator is in another process
return cls(exit_on_idle=exit_on_idle)
orchestrator = cls(exit_on_idle=exit_on_idle)
return orchestrator

View File

@@ -2,7 +2,6 @@ __package__ = 'archivebox.workers'
import sys
import time
import signal
import socket
import psutil
import shutil
@@ -42,7 +41,7 @@ ORCHESTRATOR_WORKER = {
SERVER_WORKER = lambda host, port: {
"name": "worker_daphne",
"command": f"daphne --bind={host} --port={port} --application-close-timeout=600 archivebox.core.asgi:application",
"command": f"{sys.executable} -m daphne --bind={host} --port={port} --application-close-timeout=600 archivebox.core.asgi:application",
"autostart": "false",
"autorestart": "true",
"stdout_logfile": "logs/worker_daphne.log",
@@ -513,8 +512,6 @@ def watch_worker(supervisor, daemon_name, interval=5):
def start_server_workers(host='0.0.0.0', port='8000', daemonize=False):
global _supervisord_proc
supervisor = get_or_create_supervisord_process(daemonize=daemonize)
bg_workers = [
@@ -551,8 +548,6 @@ def start_server_workers(host='0.0.0.0', port='8000', daemonize=False):
def start_cli_workers(watch=False):
global _supervisord_proc
supervisor = get_or_create_supervisord_process(daemonize=False)
start_worker(supervisor, ORCHESTRATOR_WORKER)

View File

@@ -10,9 +10,7 @@ Tests cover:
"""
import os
import tempfile
import time
from pathlib import Path
from datetime import timedelta
from unittest.mock import patch, MagicMock
@@ -217,7 +215,6 @@ class TestOrchestratorWithProcess(TestCase):
def test_orchestrator_scoped_worker_count(self):
"""Orchestrator with crawl_id should count only descendant workers."""
import time
from archivebox.machine.models import Process, Machine
machine = Machine.current()

View File

@@ -13,13 +13,10 @@ __package__ = 'archivebox.workers'
import os
import time
import traceback
from typing import ClassVar, Any
from datetime import timedelta
from typing import TYPE_CHECKING, Any, ClassVar
from pathlib import Path
from multiprocessing import cpu_count
from django.db.models import QuerySet
from django.utils import timezone
from django.conf import settings
@@ -28,6 +25,9 @@ from rich import print
from archivebox.misc.logging_util import log_worker_event
if TYPE_CHECKING:
from archivebox.machine.models import Process
CPU_COUNT = cpu_count()
@@ -314,7 +314,10 @@ class Worker:
process.kill(signal_num=signal.SIGKILL)
log_worker_event(
worker_type=worker_type,
event=f'⚠ Sent SIGKILL to {hook_name} + {len(children_pids) if children_pids else 0} children (exceeded timeout)',
event=(
f'⚠ Sent SIGKILL to {hook_name} + '
f'{len(children_pids) if children_pids else 0} children (exceeded timeout)'
),
indent_level=indent_level,
pid=self.pid,
)
@@ -341,7 +344,6 @@ class Worker:
from archivebox.machine.models import Process, Machine
from archivebox.config.configset import get_config
from pathlib import Path
from django.conf import settings
import sys
refresh_machine_config = bool(
@@ -552,7 +554,7 @@ class CrawlWorker(Worker):
# Check if crawl is done
if self._is_crawl_finished():
print(f'🔄 Crawl finished, sealing...', file=sys.stderr)
print('🔄 Crawl finished, sealing...', file=sys.stderr)
self.crawl.sm.seal()
break
@@ -813,7 +815,8 @@ class SnapshotWorker(Worker):
is_background = is_background_hook(hook_name)
# Create ArchiveResult for THIS HOOK (not per plugin)
# One plugin can have multiple hooks (e.g., chrome/on_Snapshot__20_launch_chrome.js, chrome/on_Snapshot__21_navigate_chrome.js)
# One plugin can have multiple hooks
# (e.g., chrome/on_Snapshot__20_launch_chrome.js, chrome/on_Snapshot__21_navigate_chrome.js)
# Unique key = (snapshot, plugin, hook_name) for idempotency
ar, created = ArchiveResult.objects.get_or_create(
snapshot=self.snapshot,
@@ -868,7 +871,7 @@ class SnapshotWorker(Worker):
self.snapshot.sm.seal()
self.snapshot.refresh_from_db()
except Exception as e:
except Exception:
# Mark snapshot as sealed even on error (still triggers cleanup)
self._finalize_background_hooks()
self.snapshot.sm.seal()
@@ -1019,7 +1022,6 @@ class SnapshotWorker(Worker):
self.background_processes = {}
# Update background results now that hooks are done
from archivebox.core.models import ArchiveResult
bg_results = self.snapshot.archiveresult_set.filter(
hook_name__contains='.bg.',
@@ -1034,7 +1036,6 @@ class SnapshotWorker(Worker):
if not self.background_processes:
return
from archivebox.core.models import ArchiveResult
for hook_name, process in list(self.background_processes.items()):
exit_code = process.poll()
@@ -1165,7 +1166,6 @@ class BinaryWorker(Worker):
def runloop(self) -> None:
"""Install binary(ies)."""
import sys
self.on_startup()
@@ -1216,7 +1216,7 @@ class BinaryWorker(Worker):
except Exception as e:
log_worker_event(
worker_type='BinaryWorker',
event=f'Failed to install binary',
event='Failed to install binary',
indent_level=1,
pid=self.pid,
error=e,