Improve scheduling, runtime paths, and API behavior

This commit is contained in:
Nick Sweeting
2026-03-15 18:31:56 -07:00
parent 7d42c6c8b5
commit 70c9358cf9
37 changed files with 1058 additions and 398 deletions

View File

@@ -336,6 +336,7 @@ class Orchestrator:
queue_sizes = {}
self._enforce_hard_timeouts()
self._materialize_due_schedules()
# Check Binary queue
machine = Machine.current()
@@ -399,6 +400,24 @@ class Orchestrator:
return queue_sizes
def _should_process_schedules(self) -> bool:
return (not self.exit_on_idle) and (self.crawl_id is None)
def _materialize_due_schedules(self) -> None:
if not self._should_process_schedules():
return
from archivebox.crawls.models import CrawlSchedule
now = timezone.now()
due_schedules = CrawlSchedule.objects.filter(is_enabled=True).select_related('template', 'template__created_by')
for schedule in due_schedules:
if not schedule.is_due(now):
continue
schedule.enqueue(queued_at=now)
def _enforce_hard_timeouts(self) -> None:
"""Force-kill and seal hooks/archiveresults/snapshots that exceed hard limits."""
import time