mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
Delete pid_utils.py and migrate to Process model
DELETED: - workers/pid_utils.py (-192 lines) - replaced by Process model methods SIMPLIFIED: - crawls/models.py Crawl.cleanup() (80 lines -> 10 lines) - hooks.py: deleted process_is_alive() and kill_process() (-45 lines) UPDATED to use Process model: - core/models.py: Snapshot.cleanup() and has_running_background_hooks() - machine/models.py: Binary.cleanup() - workers/worker.py: Worker.on_startup/shutdown, get_running_workers, start - workers/orchestrator.py: Orchestrator.on_startup/shutdown, is_running All subprocess management now uses: - Process.current() for registering current process - Process.get_running() / get_running_count() for querying - Process.cleanup_stale_running() for cleanup - safe_kill_process() for validated PID killing Total line reduction: ~250 lines
This commit is contained in:
@@ -30,7 +30,7 @@ __package__ = 'archivebox.workers'
|
||||
import os
|
||||
import time
|
||||
from typing import Type
|
||||
from multiprocessing import Process
|
||||
from multiprocessing import Process as MPProcess
|
||||
|
||||
from django.utils import timezone
|
||||
|
||||
@@ -38,12 +38,6 @@ from rich import print
|
||||
|
||||
from archivebox.misc.logging_util import log_worker_event
|
||||
from .worker import Worker, CrawlWorker, SnapshotWorker, ArchiveResultWorker
|
||||
from .pid_utils import (
|
||||
write_pid_file,
|
||||
remove_pid_file,
|
||||
get_all_worker_pids,
|
||||
cleanup_stale_pid_files,
|
||||
)
|
||||
|
||||
|
||||
def _run_orchestrator_process(exit_on_idle: bool) -> None:
|
||||
@@ -85,16 +79,20 @@ class Orchestrator:
|
||||
@classmethod
|
||||
def is_running(cls) -> bool:
|
||||
"""Check if an orchestrator is already running."""
|
||||
workers = get_all_worker_pids('orchestrator')
|
||||
return len(workers) > 0
|
||||
|
||||
from archivebox.machine.models import Process
|
||||
|
||||
return Process.get_running_count(process_type='orchestrator') > 0
|
||||
|
||||
def on_startup(self) -> None:
|
||||
"""Called when orchestrator starts."""
|
||||
self.pid = os.getpid()
|
||||
self.pid_file = write_pid_file('orchestrator', worker_id=0)
|
||||
from archivebox.machine.models import Process
|
||||
|
||||
# Clean up any stale PID files from previous runs
|
||||
stale_count = cleanup_stale_pid_files()
|
||||
self.pid = os.getpid()
|
||||
# Register orchestrator process in database
|
||||
self.db_process = Process.current()
|
||||
|
||||
# Clean up any stale Process records from previous runs
|
||||
stale_count = Process.cleanup_stale_running()
|
||||
|
||||
# Collect startup metadata
|
||||
metadata = {
|
||||
@@ -112,11 +110,15 @@ class Orchestrator:
|
||||
pid=self.pid,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
|
||||
def on_shutdown(self, error: BaseException | None = None) -> None:
|
||||
"""Called when orchestrator shuts down."""
|
||||
if self.pid_file:
|
||||
remove_pid_file(self.pid_file)
|
||||
# Update Process record status
|
||||
if hasattr(self, 'db_process') and self.db_process:
|
||||
self.db_process.exit_code = 1 if error else 0
|
||||
self.db_process.status = self.db_process.StatusChoices.EXITED
|
||||
self.db_process.ended_at = timezone.now()
|
||||
self.db_process.save()
|
||||
|
||||
log_worker_event(
|
||||
worker_type='Orchestrator',
|
||||
@@ -125,10 +127,12 @@ class Orchestrator:
|
||||
pid=self.pid,
|
||||
error=error if error and not isinstance(error, KeyboardInterrupt) else None,
|
||||
)
|
||||
|
||||
|
||||
def get_total_worker_count(self) -> int:
|
||||
"""Get total count of running workers across all types."""
|
||||
cleanup_stale_pid_files()
|
||||
from archivebox.machine.models import Process
|
||||
|
||||
Process.cleanup_stale_running()
|
||||
return sum(len(W.get_running_workers()) for W in self.WORKER_TYPES)
|
||||
|
||||
def should_spawn_worker(self, WorkerClass: Type[Worker], queue_count: int) -> bool:
|
||||
@@ -287,7 +291,7 @@ class Orchestrator:
|
||||
Returns the PID of the new process.
|
||||
"""
|
||||
# Use module-level function to avoid pickle errors with local functions
|
||||
proc = Process(
|
||||
proc = MPProcess(
|
||||
target=_run_orchestrator_process,
|
||||
args=(self.exit_on_idle,),
|
||||
name='orchestrator'
|
||||
|
||||
Reference in New Issue
Block a user