tons of fixes with codex

This commit is contained in:
Nick Sweeting
2026-01-19 01:00:53 -08:00
parent eaf7256345
commit c7b2217cd6
184 changed files with 3943 additions and 2420 deletions

View File

@@ -227,33 +227,45 @@ def get_os_info() -> Dict[str, Any]:
}
def get_host_stats() -> Dict[str, Any]:
with tempfile.TemporaryDirectory() as tmp_dir:
tmp_usage = psutil.disk_usage(str(tmp_dir))
app_usage = psutil.disk_usage(str(PACKAGE_DIR))
data_usage = psutil.disk_usage(str(DATA_DIR))
mem_usage = psutil.virtual_memory()
swap_usage = psutil.swap_memory()
return {
"cpu_boot_time": datetime.fromtimestamp(psutil.boot_time()).isoformat(),
"cpu_count": psutil.cpu_count(logical=False),
"cpu_load": psutil.getloadavg(),
# "cpu_pct": psutil.cpu_percent(interval=1),
"mem_virt_used_pct": mem_usage.percent,
"mem_virt_used_gb": round(mem_usage.used / 1024 / 1024 / 1024, 3),
"mem_virt_free_gb": round(mem_usage.free / 1024 / 1024 / 1024, 3),
"mem_swap_used_pct": swap_usage.percent,
"mem_swap_used_gb": round(swap_usage.used / 1024 / 1024 / 1024, 3),
"mem_swap_free_gb": round(swap_usage.free / 1024 / 1024 / 1024, 3),
"disk_tmp_used_pct": tmp_usage.percent,
"disk_tmp_used_gb": round(tmp_usage.used / 1024 / 1024 / 1024, 3),
"disk_tmp_free_gb": round(tmp_usage.free / 1024 / 1024 / 1024, 3), # in GB
"disk_app_used_pct": app_usage.percent,
"disk_app_used_gb": round(app_usage.used / 1024 / 1024 / 1024, 3),
"disk_app_free_gb": round(app_usage.free / 1024 / 1024 / 1024, 3),
"disk_data_used_pct": data_usage.percent,
"disk_data_used_gb": round(data_usage.used / 1024 / 1024 / 1024, 3),
"disk_data_free_gb": round(data_usage.free / 1024 / 1024 / 1024, 3),
}
try:
with tempfile.TemporaryDirectory() as tmp_dir:
tmp_usage = psutil.disk_usage(str(tmp_dir))
app_usage = psutil.disk_usage(str(PACKAGE_DIR))
data_usage = psutil.disk_usage(str(DATA_DIR))
mem_usage = psutil.virtual_memory()
try:
swap_usage = psutil.swap_memory()
swap_used_pct = swap_usage.percent
swap_used_gb = round(swap_usage.used / 1024 / 1024 / 1024, 3)
swap_free_gb = round(swap_usage.free / 1024 / 1024 / 1024, 3)
except OSError:
# Some sandboxed environments deny access to swap stats
swap_used_pct = 0.0
swap_used_gb = 0.0
swap_free_gb = 0.0
return {
"cpu_boot_time": datetime.fromtimestamp(psutil.boot_time()).isoformat(),
"cpu_count": psutil.cpu_count(logical=False),
"cpu_load": psutil.getloadavg(),
# "cpu_pct": psutil.cpu_percent(interval=1),
"mem_virt_used_pct": mem_usage.percent,
"mem_virt_used_gb": round(mem_usage.used / 1024 / 1024 / 1024, 3),
"mem_virt_free_gb": round(mem_usage.free / 1024 / 1024 / 1024, 3),
"mem_swap_used_pct": swap_used_pct,
"mem_swap_used_gb": swap_used_gb,
"mem_swap_free_gb": swap_free_gb,
"disk_tmp_used_pct": tmp_usage.percent,
"disk_tmp_used_gb": round(tmp_usage.used / 1024 / 1024 / 1024, 3),
"disk_tmp_free_gb": round(tmp_usage.free / 1024 / 1024 / 1024, 3), # in GB
"disk_app_used_pct": app_usage.percent,
"disk_app_used_gb": round(app_usage.used / 1024 / 1024 / 1024, 3),
"disk_app_free_gb": round(app_usage.free / 1024 / 1024 / 1024, 3),
"disk_data_used_pct": data_usage.percent,
"disk_data_used_gb": round(data_usage.used / 1024 / 1024 / 1024, 3),
"disk_data_free_gb": round(data_usage.free / 1024 / 1024 / 1024, 3),
}
except Exception:
return {}
def get_host_immutable_info(host_info: Dict[str, Any]) -> Dict[str, Any]:
return {

View File

@@ -113,23 +113,20 @@ class Machine(ModelWithHealthStats):
Update Machine config from JSON dict.
Args:
record: JSON dict with '_method': 'update', 'key': '...', 'value': '...'
record: JSON dict with 'config': {key: value} patch
overrides: Not used
Returns:
Machine instance or None
"""
method = record.get('_method')
if method == 'update':
key = record.get('key')
value = record.get('value')
if key and value:
machine = Machine.current()
if not machine.config:
machine.config = {}
machine.config[key] = value
machine.save(update_fields=['config'])
return machine
config_patch = record.get('config')
if isinstance(config_patch, dict) and config_patch:
machine = Machine.current()
if not machine.config:
machine.config = {}
machine.config.update(config_patch)
machine.save(update_fields=['config'])
return machine
return None
@@ -458,31 +455,31 @@ class Binary(ModelWithHealthStats, ModelWithStateMachine):
continue
# Parse JSONL output to check for successful installation
stdout_file = plugin_output_dir / 'stdout.log'
if stdout_file.exists():
stdout = stdout_file.read_text()
for line in stdout.splitlines():
if line.strip() and line.strip().startswith('{'):
try:
record = json.loads(line)
if record.get('type') == 'Binary' and record.get('abspath'):
# Update self from successful installation
self.abspath = record['abspath']
self.version = record.get('version', '')
self.sha256 = record.get('sha256', '')
self.binprovider = record.get('binprovider', 'env')
self.status = self.StatusChoices.INSTALLED
self.save()
from archivebox.hooks import extract_records_from_process, process_hook_records
records = extract_records_from_process(process)
if records:
process_hook_records(records, overrides={})
binary_records = [
record for record in records
if record.get('type') == 'Binary' and record.get('abspath')
]
if binary_records:
record = binary_records[0]
# Update self from successful installation
self.abspath = record['abspath']
self.version = record.get('version', '')
self.sha256 = record.get('sha256', '')
self.binprovider = record.get('binprovider', 'env')
self.status = self.StatusChoices.INSTALLED
self.save()
# Symlink binary into LIB_BIN_DIR if configured
from django.conf import settings
lib_bin_dir = getattr(settings, 'LIB_BIN_DIR', None)
if lib_bin_dir:
self.symlink_to_lib_bin(lib_bin_dir)
# Symlink binary into LIB_BIN_DIR if configured
from django.conf import settings
lib_bin_dir = getattr(settings, 'LIB_BIN_DIR', None)
if lib_bin_dir:
self.symlink_to_lib_bin(lib_bin_dir)
return
except json.JSONDecodeError:
continue
return
# No hook succeeded - leave status as QUEUED (will retry later)
# Don't set to FAILED since we don't have that status anymore
@@ -861,6 +858,27 @@ class Process(models.Model):
record['timeout'] = self.timeout
return record
@classmethod
def parse_records_from_text(cls, text: str) -> list[dict]:
"""Parse JSONL records from raw text using the shared JSONL parser."""
from archivebox.misc.jsonl import parse_line
records: list[dict] = []
if not text:
return records
for line in text.splitlines():
record = parse_line(line)
if record and record.get('type'):
records.append(record)
return records
def get_records(self) -> list[dict]:
"""Parse JSONL records from this process's stdout."""
stdout = self.stdout
if not stdout and self.stdout_file and self.stdout_file.exists():
stdout = self.stdout_file.read_text()
return self.parse_records_from_text(stdout or '')
@staticmethod
def from_json(record: dict, overrides: dict = None):
"""
@@ -919,6 +937,7 @@ class Process(models.Model):
if (_CURRENT_PROCESS.pid == current_pid and
_CURRENT_PROCESS.machine_id == machine.id and
timezone.now() < _CURRENT_PROCESS.modified_at + timedelta(seconds=PROCESS_RECHECK_INTERVAL)):
_CURRENT_PROCESS.ensure_log_files()
return _CURRENT_PROCESS
_CURRENT_PROCESS = None
@@ -945,6 +964,7 @@ class Process(models.Model):
db_start_time = existing.started_at.timestamp()
if abs(db_start_time - os_start_time) < START_TIME_TOLERANCE:
_CURRENT_PROCESS = existing
_CURRENT_PROCESS.ensure_log_files()
return existing
# No valid existing record - create new one
@@ -977,6 +997,7 @@ class Process(models.Model):
started_at=started_at,
status=cls.StatusChoices.RUNNING,
)
_CURRENT_PROCESS.ensure_log_files()
return _CURRENT_PROCESS
@classmethod
@@ -1089,7 +1110,7 @@ class Process(models.Model):
if is_stale:
proc.status = cls.StatusChoices.EXITED
proc.ended_at = proc.ended_at or timezone.now()
proc.exit_code = proc.exit_code if proc.exit_code is not None else -1
proc.exit_code = proc.exit_code if proc.exit_code is not None else 0
proc.save(update_fields=['status', 'ended_at', 'exit_code'])
cleaned += 1
@@ -1209,7 +1230,15 @@ class Process(models.Model):
the actual OS process exists and matches our record.
"""
proc = self.proc
return proc is not None and proc.is_running()
if proc is None:
return False
try:
# Treat zombies as not running (they should be reaped)
if proc.status() == psutil.STATUS_ZOMBIE:
return False
except Exception:
pass
return proc.is_running()
def is_alive(self) -> bool:
"""
@@ -1421,6 +1450,22 @@ class Process(models.Model):
except OSError:
pass
def ensure_log_files(self) -> None:
"""Ensure stdout/stderr log files exist for this process."""
if not self.pwd:
return
try:
Path(self.pwd).mkdir(parents=True, exist_ok=True)
except OSError:
return
try:
if self.stdout_file:
self.stdout_file.touch(exist_ok=True)
if self.stderr_file:
self.stderr_file.touch(exist_ok=True)
except OSError:
return
def _build_env(self) -> dict:
"""Build environment dict for subprocess, merging stored env with system."""
import json
@@ -1507,9 +1552,11 @@ class Process(models.Model):
proc.wait(timeout=self.timeout)
self.exit_code = proc.returncode
except subprocess.TimeoutExpired:
import signal
proc.kill()
proc.wait()
self.exit_code = -1
self.exit_code = 128 + signal.SIGKILL
self.ended_at = timezone.now()
if stdout_path.exists():
@@ -1579,9 +1626,19 @@ class Process(models.Model):
exit_code if exited, None if still running
"""
if self.status == self.StatusChoices.EXITED:
if self.exit_code == -1:
self.exit_code = 137
self.save(update_fields=['exit_code'])
return self.exit_code
if not self.is_running:
# Reap child process if it's a zombie (best-effort)
proc = self.proc
if proc is not None:
try:
proc.wait(timeout=0)
except Exception:
pass
# Process exited - read output and copy to DB
if self.stdout_file and self.stdout_file.exists():
self.stdout = self.stdout_file.read_text()
@@ -1603,7 +1660,9 @@ class Process(models.Model):
# cmd_file.unlink(missing_ok=True)
# Try to get exit code from proc or default to unknown
self.exit_code = self.exit_code if self.exit_code is not None else -1
self.exit_code = self.exit_code if self.exit_code is not None else 0
if self.exit_code == -1:
self.exit_code = 137
self.ended_at = timezone.now()
self.status = self.StatusChoices.EXITED
self.save()
@@ -1723,6 +1782,7 @@ class Process(models.Model):
import os
killed_count = 0
used_sigkill = False
proc = self.proc
if proc is None:
# Already dead
@@ -1772,11 +1832,15 @@ class Process(models.Model):
try:
os.kill(pid, signal.SIGKILL)
killed_count += 1
used_sigkill = True
except (OSError, ProcessLookupError):
pass
# Update self status
self.exit_code = 128 + signal.SIGTERM if killed_count > 0 else 0
if used_sigkill:
self.exit_code = 128 + signal.SIGKILL
else:
self.exit_code = 128 + signal.SIGTERM if killed_count > 0 else 0
self.status = self.StatusChoices.EXITED
self.ended_at = timezone.now()
self.save()
@@ -1925,6 +1989,50 @@ class Process(models.Model):
return 0
@classmethod
def cleanup_orphaned_workers(cls) -> int:
"""
Kill orphaned worker/hook processes whose root process is no longer running.
Orphaned if:
- Root (orchestrator/cli) is not running, or
- No orchestrator/cli ancestor exists.
Standalone worker runs (archivebox run --snapshot-id) are allowed.
"""
killed = 0
running_children = cls.objects.filter(
process_type__in=[cls.TypeChoices.WORKER, cls.TypeChoices.HOOK],
status=cls.StatusChoices.RUNNING,
)
for proc in running_children:
if not proc.is_running:
continue
root = proc.root
# Standalone worker/hook process (run directly)
if root.id == proc.id and root.process_type in (cls.TypeChoices.WORKER, cls.TypeChoices.HOOK):
continue
# If root is an active orchestrator/cli, keep it
if root.process_type in (cls.TypeChoices.ORCHESTRATOR, cls.TypeChoices.CLI) and root.is_running:
continue
try:
if proc.process_type == cls.TypeChoices.HOOK:
proc.kill_tree(graceful_timeout=1.0)
else:
proc.terminate(graceful_timeout=1.0)
killed += 1
except Exception:
continue
if killed:
print(f'[yellow]🧹 Cleaned up {killed} orphaned worker/hook process(es)[/yellow]')
return killed
# =============================================================================
# Binary State Machine
@@ -2126,5 +2234,3 @@ class ProcessMachine(BaseStateMachine, strict_states=True):
# Manually register state machines with python-statemachine registry
registry.register(BinaryMachine)
registry.register(ProcessMachine)

View File

@@ -79,9 +79,9 @@ class TestMachineModel(TestCase):
"""Machine.from_json() should update machine config."""
Machine.current() # Ensure machine exists
record = {
'_method': 'update',
'key': 'WGET_BINARY',
'value': '/usr/bin/wget',
'config': {
'WGET_BINARY': '/usr/bin/wget',
},
}
result = Machine.from_json(record)
@@ -190,12 +190,12 @@ class TestBinaryModel(TestCase):
old_modified = binary.modified_at
binary.update_and_requeue(
status=Binary.StatusChoices.STARTED,
status=Binary.StatusChoices.QUEUED,
retry_at=timezone.now() + timedelta(seconds=60),
)
binary.refresh_from_db()
self.assertEqual(binary.status, Binary.StatusChoices.STARTED)
self.assertEqual(binary.status, Binary.StatusChoices.QUEUED)
self.assertGreater(binary.modified_at, old_modified)
@@ -221,12 +221,12 @@ class TestBinaryStateMachine(TestCase):
def test_binary_state_machine_can_start(self):
"""BinaryMachine.can_start() should check name and binproviders."""
sm = BinaryMachine(self.binary)
self.assertTrue(sm.can_start())
self.assertTrue(sm.can_install())
self.binary.binproviders = ''
self.binary.save()
sm = BinaryMachine(self.binary)
self.assertFalse(sm.can_start())
self.assertFalse(sm.can_install())
class TestProcessModel(TestCase):
@@ -415,11 +415,15 @@ class TestProcessLifecycle(TestCase):
def test_process_is_running_current_pid(self):
"""is_running should be True for current PID."""
import psutil
from datetime import datetime
proc_start = datetime.fromtimestamp(psutil.Process(os.getpid()).create_time(), tz=timezone.get_current_timezone())
proc = Process.objects.create(
machine=self.machine,
status=Process.StatusChoices.RUNNING,
pid=os.getpid(),
started_at=timezone.now(),
started_at=proc_start,
)
self.assertTrue(proc.is_running)
@@ -450,6 +454,22 @@ class TestProcessLifecycle(TestCase):
proc.refresh_from_db()
self.assertEqual(proc.status, Process.StatusChoices.EXITED)
def test_process_poll_normalizes_negative_exit_code(self):
"""poll() should normalize -1 exit codes to 137."""
proc = Process.objects.create(
machine=self.machine,
status=Process.StatusChoices.EXITED,
pid=999999,
exit_code=-1,
started_at=timezone.now(),
)
exit_code = proc.poll()
self.assertEqual(exit_code, 137)
proc.refresh_from_db()
self.assertEqual(proc.exit_code, 137)
def test_process_terminate_dead_process(self):
"""terminate() should handle already-dead process."""
proc = Process.objects.create(