Delete pid_utils.py and migrate to Process model

DELETED:
- workers/pid_utils.py (-192 lines) - replaced by Process model methods

SIMPLIFIED:
- crawls/models.py Crawl.cleanup() (80 lines -> 10 lines)
- hooks.py: deleted process_is_alive() and kill_process() (-45 lines)

UPDATED to use Process model:
- core/models.py: Snapshot.cleanup() and has_running_background_hooks()
- machine/models.py: Binary.cleanup()
- workers/worker.py: Worker.on_startup/shutdown, get_running_workers, start
- workers/orchestrator.py: Orchestrator.on_startup/shutdown, is_running

All subprocess management now uses:
- Process.current() for registering current process
- Process.get_running() / get_running_count() for querying
- Process.cleanup_stale_running() for cleanup
- safe_kill_process() for validated PID killing

Total line reduction: ~250 lines
This commit is contained in:
Claude
2025-12-31 10:15:22 +00:00
parent 2d3a2fec57
commit b822352fc3
7 changed files with 63 additions and 359 deletions

View File

@@ -1385,7 +1385,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
Called by the state machine when entering the 'sealed' state.
Kills any background hooks and finalizes their ArchiveResults.
"""
from archivebox.hooks import kill_process
from archivebox.misc.process_utils import safe_kill_process
# Kill any background ArchiveResult hooks
if not self.OUTPUT_DIR.exists():
@@ -1393,7 +1393,8 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
# Find all .pid files in this snapshot's output directory
for pid_file in self.OUTPUT_DIR.glob('**/*.pid'):
kill_process(pid_file, validate=True)
cmd_file = pid_file.parent / 'cmd.sh'
safe_kill_process(pid_file, cmd_file)
# Update all STARTED ArchiveResults from filesystem
results = self.archiveresult_set.filter(status=ArchiveResult.StatusChoices.STARTED)
@@ -1406,7 +1407,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
Used by state machine to determine if snapshot is finished.
"""
from archivebox.hooks import process_is_alive
from archivebox.misc.process_utils import validate_pid_file
if not self.OUTPUT_DIR.exists():
return False
@@ -1415,7 +1416,8 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
if not plugin_dir.is_dir():
continue
pid_file = plugin_dir / 'hook.pid'
if process_is_alive(pid_file):
cmd_file = plugin_dir / 'cmd.sh'
if validate_pid_file(pid_file, cmd_file):
return True
return False