mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-05 23:37:58 +10:00
53 lines
2.3 KiB
Python
53 lines
2.3 KiB
Python
from __future__ import annotations
|
|
|
|
from abx_dl.events import CrawlCleanupEvent, CrawlCompletedEvent, CrawlSetupEvent, CrawlStartEvent
|
|
from abx_dl.services.base import BaseService
|
|
|
|
|
|
class CrawlService(BaseService):
|
|
LISTENS_TO = [CrawlSetupEvent, CrawlStartEvent, CrawlCleanupEvent, CrawlCompletedEvent]
|
|
EMITS = []
|
|
|
|
def __init__(self, bus, *, crawl_id: str):
|
|
self.crawl_id = crawl_id
|
|
super().__init__(bus)
|
|
self.bus.on(CrawlSetupEvent, self.on_CrawlSetupEvent__save_to_db)
|
|
self.bus.on(CrawlStartEvent, self.on_CrawlStartEvent__save_to_db)
|
|
self.bus.on(CrawlCleanupEvent, self.on_CrawlCleanupEvent__save_to_db)
|
|
self.bus.on(CrawlCompletedEvent, self.on_CrawlCompletedEvent__save_to_db)
|
|
|
|
async def on_CrawlSetupEvent__save_to_db(self, event: CrawlSetupEvent) -> None:
|
|
from archivebox.crawls.models import Crawl
|
|
|
|
crawl = await Crawl.objects.aget(id=self.crawl_id)
|
|
if crawl.status != Crawl.StatusChoices.SEALED:
|
|
crawl.status = Crawl.StatusChoices.STARTED
|
|
crawl.retry_at = None
|
|
await crawl.asave(update_fields=["status", "retry_at", "modified_at"])
|
|
|
|
async def on_CrawlStartEvent__save_to_db(self, event: CrawlStartEvent) -> None:
|
|
from archivebox.crawls.models import Crawl
|
|
|
|
crawl = await Crawl.objects.aget(id=self.crawl_id)
|
|
if crawl.status != Crawl.StatusChoices.SEALED:
|
|
crawl.status = Crawl.StatusChoices.STARTED
|
|
crawl.retry_at = None
|
|
await crawl.asave(update_fields=["status", "retry_at", "modified_at"])
|
|
|
|
async def on_CrawlCleanupEvent__save_to_db(self, event: CrawlCleanupEvent) -> None:
|
|
from archivebox.crawls.models import Crawl
|
|
|
|
crawl = await Crawl.objects.aget(id=self.crawl_id)
|
|
if crawl.status != Crawl.StatusChoices.SEALED:
|
|
crawl.status = Crawl.StatusChoices.STARTED
|
|
crawl.retry_at = None
|
|
await crawl.asave(update_fields=["status", "retry_at", "modified_at"])
|
|
|
|
async def on_CrawlCompletedEvent__save_to_db(self, event: CrawlCompletedEvent) -> None:
|
|
from archivebox.crawls.models import Crawl
|
|
|
|
crawl = await Crawl.objects.aget(id=self.crawl_id)
|
|
crawl.status = Crawl.StatusChoices.SEALED
|
|
crawl.retry_at = None
|
|
await crawl.asave(update_fields=["status", "retry_at", "modified_at"])
|