Improve scheduling, runtime paths, and API behavior

2026-04-06 07:47:53 +10:00 · 2026-03-15 18:31:56 -07:00
parent 7d42c6c8b5
commit 70c9358cf9
37 changed files with 1058 additions and 398 deletions
--- a/archivebox/workers/orchestrator.py
+++ b/archivebox/workers/orchestrator.py
@@ -336,6 +336,7 @@ class Orchestrator:
        queue_sizes = {}

        self._enforce_hard_timeouts()
+        self._materialize_due_schedules()

        # Check Binary queue
        machine = Machine.current()
@@ -399,6 +400,24 @@ class Orchestrator:

        return queue_sizes

+    def _should_process_schedules(self) -> bool:
+        return (not self.exit_on_idle) and (self.crawl_id is None)
+
+    def _materialize_due_schedules(self) -> None:
+        if not self._should_process_schedules():
+            return
+
+        from archivebox.crawls.models import CrawlSchedule
+
+        now = timezone.now()
+        due_schedules = CrawlSchedule.objects.filter(is_enabled=True).select_related('template', 'template__created_by')
+
+        for schedule in due_schedules:
+            if not schedule.is_due(now):
+                continue
+
+            schedule.enqueue(queued_at=now)
+
    def _enforce_hard_timeouts(self) -> None:
        """Force-kill and seal hooks/archiveresults/snapshots that exceed hard limits."""
        import time