mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
156 lines
6.4 KiB
Python
156 lines
6.4 KiB
Python
from __future__ import annotations
|
|
|
|
from datetime import datetime
|
|
from typing import ClassVar
|
|
|
|
from asgiref.sync import sync_to_async
|
|
from django.utils import timezone
|
|
|
|
from abxbus import BaseEvent
|
|
from abx_dl.events import ProcessCompletedEvent, ProcessStartedEvent
|
|
from abx_dl.services.base import BaseService
|
|
|
|
|
|
def parse_event_datetime(value: str | None):
|
|
if not value:
|
|
return None
|
|
try:
|
|
dt = datetime.fromisoformat(value)
|
|
except ValueError:
|
|
return None
|
|
if timezone.is_naive(dt):
|
|
return timezone.make_aware(dt, timezone.get_current_timezone())
|
|
return dt
|
|
|
|
|
|
class ProcessService(BaseService):
|
|
LISTENS_TO: ClassVar[list[type[BaseEvent]]] = [ProcessStartedEvent, ProcessCompletedEvent]
|
|
EMITS: ClassVar[list[type[BaseEvent]]] = []
|
|
|
|
def __init__(self, bus):
|
|
super().__init__(bus)
|
|
self.bus.on(ProcessStartedEvent, self.on_ProcessStartedEvent__save_to_db)
|
|
self.bus.on(ProcessCompletedEvent, self.on_ProcessCompletedEvent__save_to_db)
|
|
|
|
async def on_ProcessStartedEvent__save_to_db(self, event: ProcessStartedEvent) -> None:
|
|
from archivebox.machine.models import NetworkInterface, Process
|
|
|
|
iface = await sync_to_async(NetworkInterface.current, thread_sensitive=True)(refresh=True)
|
|
process_type = event.process_type or (
|
|
Process.TypeChoices.BINARY if event.hook_name.startswith("on_BinaryRequest") else Process.TypeChoices.HOOK
|
|
)
|
|
worker_type = event.worker_type or ""
|
|
started_at = parse_event_datetime(event.start_ts)
|
|
if started_at is None:
|
|
raise ValueError("ProcessStartedEvent.start_ts is required")
|
|
process_query = Process.objects.filter(
|
|
process_type=process_type,
|
|
worker_type=worker_type,
|
|
pwd=event.output_dir,
|
|
cmd=[event.hook_path, *event.hook_args],
|
|
started_at=started_at,
|
|
)
|
|
if event.pid:
|
|
process_query = process_query.filter(pid=event.pid)
|
|
process = await process_query.order_by("-modified_at").afirst()
|
|
if process is None:
|
|
process = await Process.objects.acreate(
|
|
machine=iface.machine,
|
|
iface=iface,
|
|
process_type=process_type,
|
|
worker_type=worker_type,
|
|
pwd=event.output_dir,
|
|
cmd=[event.hook_path, *event.hook_args],
|
|
env=event.env,
|
|
timeout=event.timeout,
|
|
pid=event.pid or None,
|
|
url=event.url or None,
|
|
started_at=started_at,
|
|
status=Process.StatusChoices.RUNNING,
|
|
retry_at=None,
|
|
)
|
|
elif process.iface_id != iface.id or process.machine_id != iface.machine_id:
|
|
process.iface = iface
|
|
process.machine = iface.machine
|
|
await process.asave(update_fields=["iface", "machine", "modified_at"])
|
|
|
|
process.pwd = event.output_dir
|
|
process.cmd = [event.hook_path, *event.hook_args]
|
|
process.env = event.env
|
|
process.timeout = event.timeout
|
|
process.pid = event.pid or None
|
|
process.url = event.url or process.url
|
|
process.process_type = process_type or process.process_type
|
|
process.worker_type = worker_type or process.worker_type
|
|
process.started_at = started_at
|
|
process.status = process.StatusChoices.RUNNING
|
|
process.retry_at = None
|
|
await sync_to_async(process.hydrate_binary_from_context, thread_sensitive=True)(
|
|
plugin_name=event.plugin_name,
|
|
hook_path=event.hook_path,
|
|
)
|
|
await process.asave()
|
|
|
|
async def on_ProcessCompletedEvent__save_to_db(self, event: ProcessCompletedEvent) -> None:
|
|
from archivebox.machine.models import NetworkInterface, Process
|
|
|
|
iface = await sync_to_async(NetworkInterface.current, thread_sensitive=True)(refresh=True)
|
|
process_type = event.process_type or (
|
|
Process.TypeChoices.BINARY if event.hook_name.startswith("on_BinaryRequest") else Process.TypeChoices.HOOK
|
|
)
|
|
worker_type = event.worker_type or ""
|
|
started_at = parse_event_datetime(event.start_ts)
|
|
if started_at is None:
|
|
raise ValueError("ProcessCompletedEvent.start_ts is required")
|
|
process_query = Process.objects.filter(
|
|
process_type=process_type,
|
|
worker_type=worker_type,
|
|
pwd=event.output_dir,
|
|
cmd=[event.hook_path, *event.hook_args],
|
|
started_at=started_at,
|
|
)
|
|
if event.pid:
|
|
process_query = process_query.filter(pid=event.pid)
|
|
process = await process_query.order_by("-modified_at").afirst()
|
|
if process is None:
|
|
process = await Process.objects.acreate(
|
|
machine=iface.machine,
|
|
iface=iface,
|
|
process_type=process_type,
|
|
worker_type=worker_type,
|
|
pwd=event.output_dir,
|
|
cmd=[event.hook_path, *event.hook_args],
|
|
env=event.env,
|
|
timeout=event.timeout,
|
|
pid=event.pid or None,
|
|
url=event.url or None,
|
|
started_at=started_at,
|
|
status=Process.StatusChoices.RUNNING,
|
|
retry_at=None,
|
|
)
|
|
elif process.iface_id != iface.id or process.machine_id != iface.machine_id:
|
|
process.iface = iface
|
|
process.machine = iface.machine
|
|
await process.asave(update_fields=["iface", "machine", "modified_at"])
|
|
|
|
process.pwd = event.output_dir
|
|
if not process.cmd:
|
|
process.cmd = [event.hook_path, *event.hook_args]
|
|
process.env = event.env
|
|
process.pid = event.pid or process.pid
|
|
process.url = event.url or process.url
|
|
process.process_type = process_type or process.process_type
|
|
process.worker_type = worker_type or process.worker_type
|
|
process.started_at = started_at
|
|
process.ended_at = parse_event_datetime(event.end_ts) or timezone.now()
|
|
process.stdout = event.stdout
|
|
process.stderr = event.stderr
|
|
process.exit_code = event.exit_code
|
|
process.status = process.StatusChoices.EXITED
|
|
process.retry_at = None
|
|
await sync_to_async(process.hydrate_binary_from_context, thread_sensitive=True)(
|
|
plugin_name=event.plugin_name,
|
|
hook_path=event.hook_path,
|
|
)
|
|
await process.asave()
|