mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-01-04 09:55:33 +10:00
Refactor background hook cleanup to use graceful termination
Changed Snapshot.cleanup() to gracefully terminate background hooks: 1. Send SIGTERM to all background hook processes first 2. Wait up to each hook's plugin-specific timeout 3. Send SIGKILL only to hooks still running after their timeout Added graceful_terminate_background_hooks() function in hooks.py that: - Collects all .pid files from output directory - Validates process identity using mtime - Sends SIGTERM to all valid processes in phase 1 - Polls each process for up to its plugin-specific timeout - Sends SIGKILL as last resort if timeout expires - Returns status for each hook (sigterm/sigkill/already_dead/invalid)
This commit is contained in:
@@ -1407,17 +1407,22 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
|
||||
Clean up background ArchiveResult hooks.
|
||||
|
||||
Called by the state machine when entering the 'sealed' state.
|
||||
Kills any background hooks and finalizes their ArchiveResults.
|
||||
Gracefully terminates background hooks using plugin-specific timeouts:
|
||||
1. Send SIGTERM to all background hook processes
|
||||
2. Wait up to each hook's plugin-specific timeout
|
||||
3. Send SIGKILL to any hooks still running after timeout
|
||||
"""
|
||||
from archivebox.hooks import kill_process
|
||||
from archivebox.hooks import graceful_terminate_background_hooks
|
||||
from archivebox.config.configset import get_config
|
||||
|
||||
# Kill any background ArchiveResult hooks
|
||||
if not self.OUTPUT_DIR.exists():
|
||||
return
|
||||
|
||||
# Find all .pid files in this snapshot's output directory
|
||||
for pid_file in self.OUTPUT_DIR.glob('**/*.pid'):
|
||||
kill_process(pid_file, validate=True)
|
||||
# Get merged config for plugin-specific timeout lookup
|
||||
config = get_config(crawl=self.crawl, snapshot=self)
|
||||
|
||||
# Gracefully terminate all background hooks with plugin-specific timeouts
|
||||
graceful_terminate_background_hooks(self.OUTPUT_DIR, config)
|
||||
|
||||
# Update all STARTED ArchiveResults from filesystem
|
||||
results = self.archiveresult_set.filter(status=ArchiveResult.StatusChoices.STARTED)
|
||||
|
||||
Reference in New Issue
Block a user