Delete pid_utils.py and migrate to Process model

DELETED:
- workers/pid_utils.py (-192 lines) - replaced by Process model methods

SIMPLIFIED:
- crawls/models.py Crawl.cleanup() (80 lines -> 10 lines)
- hooks.py: deleted process_is_alive() and kill_process() (-45 lines)

UPDATED to use Process model:
- core/models.py: Snapshot.cleanup() and has_running_background_hooks()
- machine/models.py: Binary.cleanup()
- workers/worker.py: Worker.on_startup/shutdown, get_running_workers, start
- workers/orchestrator.py: Orchestrator.on_startup/shutdown, is_running

All subprocess management now uses:
- Process.current() for registering current process
- Process.get_running() / get_running_count() for querying
- Process.cleanup_stale_running() for cleanup
- safe_kill_process() for validated PID killing

Total line reduction: ~250 lines
This commit is contained in:
Claude
2025-12-31 10:15:22 +00:00
parent 2d3a2fec57
commit b822352fc3
7 changed files with 63 additions and 359 deletions

View File

@@ -30,7 +30,7 @@ __package__ = 'archivebox.workers'
import os
import time
from typing import Type
from multiprocessing import Process
from multiprocessing import Process as MPProcess
from django.utils import timezone
@@ -38,12 +38,6 @@ from rich import print
from archivebox.misc.logging_util import log_worker_event
from .worker import Worker, CrawlWorker, SnapshotWorker, ArchiveResultWorker
from .pid_utils import (
write_pid_file,
remove_pid_file,
get_all_worker_pids,
cleanup_stale_pid_files,
)
def _run_orchestrator_process(exit_on_idle: bool) -> None:
@@ -85,16 +79,20 @@ class Orchestrator:
@classmethod
def is_running(cls) -> bool:
"""Check if an orchestrator is already running."""
workers = get_all_worker_pids('orchestrator')
return len(workers) > 0
from archivebox.machine.models import Process
return Process.get_running_count(process_type='orchestrator') > 0
def on_startup(self) -> None:
"""Called when orchestrator starts."""
self.pid = os.getpid()
self.pid_file = write_pid_file('orchestrator', worker_id=0)
from archivebox.machine.models import Process
# Clean up any stale PID files from previous runs
stale_count = cleanup_stale_pid_files()
self.pid = os.getpid()
# Register orchestrator process in database
self.db_process = Process.current()
# Clean up any stale Process records from previous runs
stale_count = Process.cleanup_stale_running()
# Collect startup metadata
metadata = {
@@ -112,11 +110,15 @@ class Orchestrator:
pid=self.pid,
metadata=metadata,
)
def on_shutdown(self, error: BaseException | None = None) -> None:
"""Called when orchestrator shuts down."""
if self.pid_file:
remove_pid_file(self.pid_file)
# Update Process record status
if hasattr(self, 'db_process') and self.db_process:
self.db_process.exit_code = 1 if error else 0
self.db_process.status = self.db_process.StatusChoices.EXITED
self.db_process.ended_at = timezone.now()
self.db_process.save()
log_worker_event(
worker_type='Orchestrator',
@@ -125,10 +127,12 @@ class Orchestrator:
pid=self.pid,
error=error if error and not isinstance(error, KeyboardInterrupt) else None,
)
def get_total_worker_count(self) -> int:
"""Get total count of running workers across all types."""
cleanup_stale_pid_files()
from archivebox.machine.models import Process
Process.cleanup_stale_running()
return sum(len(W.get_running_workers()) for W in self.WORKER_TYPES)
def should_spawn_worker(self, WorkerClass: Type[Worker], queue_count: int) -> bool:
@@ -287,7 +291,7 @@ class Orchestrator:
Returns the PID of the new process.
"""
# Use module-level function to avoid pickle errors with local functions
proc = Process(
proc = MPProcess(
target=_run_orchestrator_process,
args=(self.exit_on_idle,),
name='orchestrator'