Capture exit codes and stderr from background hooks

Extended graceful_terminate_background_hooks() to:
- Reap processes with os.waitpid() to get exit codes
- Write returncode to .returncode file for update_from_output()
- Return detailed result dict with status, returncode, and pid

Updated update_from_output() to:
- Read .returncode and .stderr.log files
- Determine status from returncode if no ArchiveResult JSONL record
- Include stderr in output_str for failed hooks
- Handle signal termination (negative returncodes like -9 for SIGKILL)
- Clean up .returncode files along with other hook output files
This commit is contained in:
Claude
2025-12-31 09:23:41 +00:00
parent b73199b33e
commit 524e8e98c3
2 changed files with 122 additions and 22 deletions

View File

@@ -2711,7 +2711,20 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
# Read and parse JSONL output from hook-specific stdout log
stdout_file = plugin_dir / f'{hook_basename}.stdout.log'
stderr_file = plugin_dir / f'{hook_basename}.stderr.log'
returncode_file = plugin_dir / f'{hook_basename}.returncode'
stdout = stdout_file.read_text() if stdout_file.exists() else ''
stderr = stderr_file.read_text() if stderr_file.exists() else ''
# Read returncode from file (written by graceful_terminate_background_hooks)
returncode = None
if returncode_file.exists():
try:
rc_text = returncode_file.read_text().strip()
returncode = int(rc_text) if rc_text else None
except (ValueError, OSError):
pass
records = []
for line in stdout.splitlines():
@@ -2746,9 +2759,30 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
self._set_binary_from_cmd(hook_data['cmd'])
# Note: cmd_version is derived from binary.version, not stored on Process
else:
# No ArchiveResult record = failed
self.status = self.StatusChoices.FAILED
self.output_str = 'Hook did not output ArchiveResult record'
# No ArchiveResult JSONL record - determine status from returncode
if returncode is not None:
if returncode == 0:
self.status = self.StatusChoices.SUCCEEDED
self.output_str = 'Hook completed successfully (no JSONL output)'
elif returncode < 0:
# Negative = killed by signal (e.g., -9 for SIGKILL, -15 for SIGTERM)
sig_num = abs(returncode)
sig_name = {9: 'SIGKILL', 15: 'SIGTERM'}.get(sig_num, f'signal {sig_num}')
self.status = self.StatusChoices.FAILED
self.output_str = f'Hook killed by {sig_name}'
if stderr:
self.output_str += f'\n\nstderr:\n{stderr[:2000]}'
else:
self.status = self.StatusChoices.FAILED
self.output_str = f'Hook failed with exit code {returncode}'
if stderr:
self.output_str += f'\n\nstderr:\n{stderr[:2000]}'
else:
# No returncode file and no JSONL = failed
self.status = self.StatusChoices.FAILED
self.output_str = 'Hook did not output ArchiveResult record'
if stderr:
self.output_str += f'\n\nstderr:\n{stderr[:2000]}'
# Walk filesystem and populate output_files, output_size, output_mimetypes
# Exclude hook output files (hook-specific names like on_Snapshot__50_wget.stdout.log)
@@ -2758,6 +2792,7 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
name.endswith('.stdout.log') or
name.endswith('.stderr.log') or
name.endswith('.pid') or
name.endswith('.returncode') or
(name.endswith('.sh') and name.startswith('on_'))
)
@@ -2826,10 +2861,10 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
}
process_hook_records(filtered_records, overrides=overrides)
# Cleanup PID files and empty logs (hook-specific names)
# Cleanup PID files, returncode files, and empty logs (hook-specific names)
pid_file = plugin_dir / f'{hook_basename}.pid'
pid_file.unlink(missing_ok=True)
stderr_file = plugin_dir / f'{hook_basename}.stderr.log'
returncode_file.unlink(missing_ok=True)
if stdout_file.exists() and stdout_file.stat().st_size == 0:
stdout_file.unlink()
if stderr_file.exists() and stderr_file.stat().st_size == 0:

View File

@@ -1270,7 +1270,7 @@ def graceful_terminate_background_hooks(
output_dir: Path,
config: Dict[str, Any],
poll_interval: float = 0.5,
) -> Dict[str, str]:
) -> Dict[str, Dict[str, Any]]:
"""
Gracefully terminate all background hooks in an output directory.
@@ -1278,6 +1278,8 @@ def graceful_terminate_background_hooks(
1. Send SIGTERM to all background hook processes (polite shutdown request)
2. For each hook, wait up to its plugin-specific timeout
3. Send SIGKILL to any hooks still running after their timeout expires
4. Reap each process with waitpid() to get exit code
5. Write returncode to .returncode file for update_from_output()
Args:
output_dir: Snapshot output directory containing plugin subdirs with .pid files
@@ -1285,19 +1287,22 @@ def graceful_terminate_background_hooks(
poll_interval: Seconds between process liveness checks (default: 0.5s)
Returns:
Dict mapping hook names to termination status:
- 'sigterm': Exited cleanly after SIGTERM
- 'sigkill': Required SIGKILL after timeout
- 'already_dead': Process was already dead
- 'invalid': PID file was stale/invalid
Dict mapping hook names to result info:
{
'hook_name': {
'status': 'sigterm' | 'sigkill' | 'already_dead' | 'invalid',
'returncode': int or None,
'pid': int or None,
}
}
Example:
from archivebox.config.configset import get_config
config = get_config(crawl=my_crawl, snapshot=my_snapshot)
results = graceful_terminate_background_hooks(snapshot.OUTPUT_DIR, config)
# {'on_Snapshot__20_chrome_tab.bg': 'sigterm', 'on_Snapshot__63_media.bg': 'sigkill'}
# {'on_Snapshot__20_chrome_tab.bg': {'status': 'sigterm', 'returncode': 0, 'pid': 12345}}
"""
from archivebox.misc.process_utils import validate_pid_file, safe_kill_process
from archivebox.misc.process_utils import validate_pid_file
if not output_dir.exists():
return {}
@@ -1317,20 +1322,23 @@ def graceful_terminate_background_hooks(
# Validate and get PID
if not validate_pid_file(pid_file, cmd_file):
results[hook_name] = 'invalid'
results[hook_name] = {'status': 'invalid', 'returncode': None, 'pid': None}
pid_file.unlink(missing_ok=True)
continue
try:
pid = int(pid_file.read_text().strip())
except (ValueError, OSError):
results[hook_name] = 'invalid'
results[hook_name] = {'status': 'invalid', 'returncode': None, 'pid': None}
pid_file.unlink(missing_ok=True)
continue
# Check if process is still alive
if not process_is_alive(pid_file):
results[hook_name] = 'already_dead'
# Process already dead - try to reap it and get exit code
returncode = _reap_process(pid)
results[hook_name] = {'status': 'already_dead', 'returncode': returncode, 'pid': pid}
_write_returncode_file(pid_file, returncode)
pid_file.unlink(missing_ok=True)
continue
@@ -1345,7 +1353,9 @@ def graceful_terminate_background_hooks(
try:
os.kill(pid, signal.SIGTERM)
except (OSError, ProcessLookupError):
results[hook_name] = 'already_dead'
returncode = _reap_process(pid)
results[hook_name] = {'status': 'already_dead', 'returncode': returncode, 'pid': pid}
_write_returncode_file(pid_file, returncode)
pid_file.unlink(missing_ok=True)
continue
@@ -1364,17 +1374,72 @@ def graceful_terminate_background_hooks(
time.sleep(poll_interval)
if exited_cleanly:
results[hook_name] = 'sigterm'
pid_file.unlink(missing_ok=True)
# Process exited from SIGTERM - reap it to get exit code
returncode = _reap_process(pid)
results[hook_name] = {'status': 'sigterm', 'returncode': returncode, 'pid': pid}
else:
# Timeout expired, send SIGKILL
try:
os.kill(pid, signal.SIGKILL)
results[hook_name] = 'sigkill'
except (OSError, ProcessLookupError):
results[hook_name] = 'sigterm' # Died between check and kill
pid_file.unlink(missing_ok=True)
pass # Process died between check and kill
# Wait briefly for SIGKILL to take effect, then reap
time.sleep(0.1)
returncode = _reap_process(pid)
# returncode from SIGKILL is typically -9 (negative signal number)
results[hook_name] = {'status': 'sigkill', 'returncode': returncode, 'pid': pid}
# Write returncode file for update_from_output() to read
_write_returncode_file(pid_file, results[hook_name]['returncode'])
pid_file.unlink(missing_ok=True)
return results
def _reap_process(pid: int) -> Optional[int]:
"""
Reap a terminated process and return its exit code.
Uses os.waitpid() with WNOHANG to avoid blocking.
Returns None if process cannot be reaped (not a child, already reaped, etc).
"""
try:
# WNOHANG: return immediately if process hasn't exited
# We call this after we know process is dead, so it should return immediately
wpid, status = os.waitpid(pid, os.WNOHANG)
if wpid == 0:
# Process still running (shouldn't happen since we checked)
return None
if os.WIFEXITED(status):
return os.WEXITSTATUS(status)
elif os.WIFSIGNALED(status):
# Killed by signal - return negative signal number (convention)
return -os.WTERMSIG(status)
return None
except ChildProcessError:
# Not our child process (was started by subprocess.Popen which already reaped it,
# or process was started by different parent). This is expected for hooks.
return None
except OSError:
return None
def _write_returncode_file(pid_file: Path, returncode: Optional[int]) -> None:
"""
Write returncode to a .returncode file next to the .pid file.
This allows update_from_output() to know the exit code even for background hooks.
"""
returncode_file = pid_file.with_suffix('.returncode')
try:
if returncode is not None:
returncode_file.write_text(str(returncode))
else:
# Unknown exit code - write empty file to indicate process was terminated
returncode_file.write_text('')
except OSError:
pass # Best effort