Files
ArchiveBox/archivebox/services/crawl_service.py
2026-03-25 05:36:07 -07:00

53 lines
2.3 KiB
Python

from __future__ import annotations
from abx_dl.events import CrawlCleanupEvent, CrawlCompletedEvent, CrawlSetupEvent, CrawlStartEvent
from abx_dl.services.base import BaseService
class CrawlService(BaseService):
LISTENS_TO = [CrawlSetupEvent, CrawlStartEvent, CrawlCleanupEvent, CrawlCompletedEvent]
EMITS = []
def __init__(self, bus, *, crawl_id: str):
self.crawl_id = crawl_id
super().__init__(bus)
self.bus.on(CrawlSetupEvent, self.on_CrawlSetupEvent__save_to_db)
self.bus.on(CrawlStartEvent, self.on_CrawlStartEvent__save_to_db)
self.bus.on(CrawlCleanupEvent, self.on_CrawlCleanupEvent__save_to_db)
self.bus.on(CrawlCompletedEvent, self.on_CrawlCompletedEvent__save_to_db)
async def on_CrawlSetupEvent__save_to_db(self, event: CrawlSetupEvent) -> None:
from archivebox.crawls.models import Crawl
crawl = await Crawl.objects.aget(id=self.crawl_id)
if crawl.status != Crawl.StatusChoices.SEALED:
crawl.status = Crawl.StatusChoices.STARTED
crawl.retry_at = None
await crawl.asave(update_fields=["status", "retry_at", "modified_at"])
async def on_CrawlStartEvent__save_to_db(self, event: CrawlStartEvent) -> None:
from archivebox.crawls.models import Crawl
crawl = await Crawl.objects.aget(id=self.crawl_id)
if crawl.status != Crawl.StatusChoices.SEALED:
crawl.status = Crawl.StatusChoices.STARTED
crawl.retry_at = None
await crawl.asave(update_fields=["status", "retry_at", "modified_at"])
async def on_CrawlCleanupEvent__save_to_db(self, event: CrawlCleanupEvent) -> None:
from archivebox.crawls.models import Crawl
crawl = await Crawl.objects.aget(id=self.crawl_id)
if crawl.status != Crawl.StatusChoices.SEALED:
crawl.status = Crawl.StatusChoices.STARTED
crawl.retry_at = None
await crawl.asave(update_fields=["status", "retry_at", "modified_at"])
async def on_CrawlCompletedEvent__save_to_db(self, event: CrawlCompletedEvent) -> None:
from archivebox.crawls.models import Crawl
crawl = await Crawl.objects.aget(id=self.crawl_id)
crawl.status = Crawl.StatusChoices.SEALED
crawl.retry_at = None
await crawl.asave(update_fields=["status", "retry_at", "modified_at"])