Files
ArchiveBox/archivebox/services/crawl_service.py
2026-03-22 20:25:18 -07:00

45 lines
1.6 KiB
Python

from __future__ import annotations
from abx_dl.events import CrawlCleanupEvent, CrawlCompletedEvent, CrawlSetupEvent, CrawlStartEvent
from abx_dl.services.base import BaseService
from .db import run_db_op
class CrawlService(BaseService):
LISTENS_TO = [CrawlSetupEvent, CrawlStartEvent, CrawlCleanupEvent, CrawlCompletedEvent]
EMITS = []
def __init__(self, bus, *, crawl_id: str):
self.crawl_id = crawl_id
super().__init__(bus)
async def on_CrawlSetupEvent__Outer(self, event: CrawlSetupEvent) -> None:
await run_db_op(self._mark_started)
async def on_CrawlStartEvent__Outer(self, event: CrawlStartEvent) -> None:
await run_db_op(self._mark_started)
async def on_CrawlCleanupEvent__Outer(self, event: CrawlCleanupEvent) -> None:
await run_db_op(self._mark_started)
async def on_CrawlCompletedEvent__Outer(self, event: CrawlCompletedEvent) -> None:
await run_db_op(self._mark_completed)
def _mark_started(self) -> None:
from archivebox.crawls.models import Crawl
crawl = Crawl.objects.get(id=self.crawl_id)
if crawl.status != Crawl.StatusChoices.SEALED:
crawl.status = Crawl.StatusChoices.STARTED
crawl.retry_at = None
crawl.save(update_fields=["status", "retry_at", "modified_at"])
def _mark_completed(self) -> None:
from archivebox.crawls.models import Crawl
crawl = Crawl.objects.get(id=self.crawl_id)
crawl.status = Crawl.StatusChoices.SEALED
crawl.retry_at = None
crawl.save(update_fields=["status", "retry_at", "modified_at"])