mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-04 23:07:56 +10:00
remove huey
This commit is contained in:
@@ -42,6 +42,7 @@ def register_urls(api: NinjaAPI) -> NinjaAPI:
|
||||
api.add_router('/crawls/', 'api.v1_crawls.router')
|
||||
api.add_router('/cli/', 'api.v1_cli.router')
|
||||
api.add_router('/workers/', 'api.v1_workers.router')
|
||||
api.add_router('/machine/', 'api.v1_machine.router')
|
||||
return api
|
||||
|
||||
|
||||
|
||||
@@ -107,7 +107,7 @@ class RemoveCommandSchema(Schema):
|
||||
@router.post("/add", response=CLICommandResponseSchema, summary='archivebox add [args] [urls]')
|
||||
def cli_add(request, args: AddCommandSchema):
|
||||
from archivebox.cli.archivebox_add import add
|
||||
|
||||
|
||||
result = add(
|
||||
urls=args.urls,
|
||||
tag=args.tag,
|
||||
@@ -115,8 +115,9 @@ def cli_add(request, args: AddCommandSchema):
|
||||
update=args.update,
|
||||
index_only=args.index_only,
|
||||
overwrite=args.overwrite,
|
||||
extract=args.extract,
|
||||
plugins=args.extract, # extract in API maps to plugins param
|
||||
parser=args.parser,
|
||||
bg=True, # Always run in background for API calls
|
||||
)
|
||||
|
||||
return {
|
||||
|
||||
206
archivebox/api/v1_machine.py
Normal file
206
archivebox/api/v1_machine.py
Normal file
@@ -0,0 +1,206 @@
|
||||
__package__ = 'archivebox.api'
|
||||
|
||||
from uuid import UUID
|
||||
from typing import List, Optional
|
||||
from datetime import datetime
|
||||
|
||||
from ninja import Router, Schema, FilterSchema, Field, Query
|
||||
from ninja.pagination import paginate
|
||||
|
||||
from api.v1_core import CustomPagination
|
||||
|
||||
|
||||
router = Router(tags=['Machine and Dependencies'])
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Machine Schemas
|
||||
# ============================================================================
|
||||
|
||||
class MachineSchema(Schema):
|
||||
"""Schema for Machine model."""
|
||||
TYPE: str = 'machine.Machine'
|
||||
id: UUID
|
||||
created_at: datetime
|
||||
modified_at: datetime
|
||||
guid: str
|
||||
hostname: str
|
||||
hw_in_docker: bool
|
||||
hw_in_vm: bool
|
||||
hw_manufacturer: str
|
||||
hw_product: str
|
||||
hw_uuid: str
|
||||
os_arch: str
|
||||
os_family: str
|
||||
os_platform: str
|
||||
os_release: str
|
||||
os_kernel: str
|
||||
stats: dict
|
||||
num_uses_succeeded: int
|
||||
num_uses_failed: int
|
||||
|
||||
|
||||
class MachineFilterSchema(FilterSchema):
|
||||
id: Optional[str] = Field(None, q='id__startswith')
|
||||
hostname: Optional[str] = Field(None, q='hostname__icontains')
|
||||
os_platform: Optional[str] = Field(None, q='os_platform__icontains')
|
||||
os_arch: Optional[str] = Field(None, q='os_arch')
|
||||
hw_in_docker: Optional[bool] = Field(None, q='hw_in_docker')
|
||||
hw_in_vm: Optional[bool] = Field(None, q='hw_in_vm')
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Dependency Schemas
|
||||
# ============================================================================
|
||||
|
||||
class DependencySchema(Schema):
|
||||
"""Schema for Dependency model."""
|
||||
TYPE: str = 'machine.Dependency'
|
||||
id: UUID
|
||||
created_at: datetime
|
||||
modified_at: datetime
|
||||
bin_name: str
|
||||
bin_providers: str
|
||||
custom_cmds: dict
|
||||
config: dict
|
||||
is_installed: bool
|
||||
installed_count: int
|
||||
|
||||
@staticmethod
|
||||
def resolve_is_installed(obj) -> bool:
|
||||
return obj.is_installed
|
||||
|
||||
@staticmethod
|
||||
def resolve_installed_count(obj) -> int:
|
||||
return obj.installed_binaries.count()
|
||||
|
||||
|
||||
class DependencyFilterSchema(FilterSchema):
|
||||
id: Optional[str] = Field(None, q='id__startswith')
|
||||
bin_name: Optional[str] = Field(None, q='bin_name__icontains')
|
||||
bin_providers: Optional[str] = Field(None, q='bin_providers__icontains')
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# InstalledBinary Schemas
|
||||
# ============================================================================
|
||||
|
||||
class InstalledBinarySchema(Schema):
|
||||
"""Schema for InstalledBinary model."""
|
||||
TYPE: str = 'machine.InstalledBinary'
|
||||
id: UUID
|
||||
created_at: datetime
|
||||
modified_at: datetime
|
||||
machine_id: UUID
|
||||
machine_hostname: str
|
||||
dependency_id: Optional[UUID]
|
||||
dependency_bin_name: Optional[str]
|
||||
name: str
|
||||
binprovider: str
|
||||
abspath: str
|
||||
version: str
|
||||
sha256: str
|
||||
is_valid: bool
|
||||
num_uses_succeeded: int
|
||||
num_uses_failed: int
|
||||
|
||||
@staticmethod
|
||||
def resolve_machine_hostname(obj) -> str:
|
||||
return obj.machine.hostname
|
||||
|
||||
@staticmethod
|
||||
def resolve_dependency_id(obj) -> Optional[UUID]:
|
||||
return obj.dependency_id
|
||||
|
||||
@staticmethod
|
||||
def resolve_dependency_bin_name(obj) -> Optional[str]:
|
||||
return obj.dependency.bin_name if obj.dependency else None
|
||||
|
||||
@staticmethod
|
||||
def resolve_is_valid(obj) -> bool:
|
||||
return obj.is_valid
|
||||
|
||||
|
||||
class InstalledBinaryFilterSchema(FilterSchema):
|
||||
id: Optional[str] = Field(None, q='id__startswith')
|
||||
name: Optional[str] = Field(None, q='name__icontains')
|
||||
binprovider: Optional[str] = Field(None, q='binprovider')
|
||||
machine_id: Optional[str] = Field(None, q='machine_id__startswith')
|
||||
dependency_id: Optional[str] = Field(None, q='dependency_id__startswith')
|
||||
version: Optional[str] = Field(None, q='version__icontains')
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Machine Endpoints
|
||||
# ============================================================================
|
||||
|
||||
@router.get("/machines", response=List[MachineSchema], url_name="get_machines")
|
||||
@paginate(CustomPagination)
|
||||
def get_machines(request, filters: MachineFilterSchema = Query(...)):
|
||||
"""List all machines."""
|
||||
from machine.models import Machine
|
||||
return filters.filter(Machine.objects.all()).distinct()
|
||||
|
||||
|
||||
@router.get("/machine/{machine_id}", response=MachineSchema, url_name="get_machine")
|
||||
def get_machine(request, machine_id: str):
|
||||
"""Get a specific machine by ID."""
|
||||
from machine.models import Machine
|
||||
from django.db.models import Q
|
||||
return Machine.objects.get(Q(id__startswith=machine_id) | Q(hostname__iexact=machine_id))
|
||||
|
||||
|
||||
@router.get("/machine/current", response=MachineSchema, url_name="get_current_machine")
|
||||
def get_current_machine(request):
|
||||
"""Get the current machine."""
|
||||
from machine.models import Machine
|
||||
return Machine.current()
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Dependency Endpoints
|
||||
# ============================================================================
|
||||
|
||||
@router.get("/dependencies", response=List[DependencySchema], url_name="get_dependencies")
|
||||
@paginate(CustomPagination)
|
||||
def get_dependencies(request, filters: DependencyFilterSchema = Query(...)):
|
||||
"""List all dependencies."""
|
||||
from machine.models import Dependency
|
||||
return filters.filter(Dependency.objects.all()).distinct()
|
||||
|
||||
|
||||
@router.get("/dependency/{dependency_id}", response=DependencySchema, url_name="get_dependency")
|
||||
def get_dependency(request, dependency_id: str):
|
||||
"""Get a specific dependency by ID or bin_name."""
|
||||
from machine.models import Dependency
|
||||
from django.db.models import Q
|
||||
try:
|
||||
return Dependency.objects.get(Q(id__startswith=dependency_id))
|
||||
except Dependency.DoesNotExist:
|
||||
return Dependency.objects.get(bin_name__iexact=dependency_id)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# InstalledBinary Endpoints
|
||||
# ============================================================================
|
||||
|
||||
@router.get("/binaries", response=List[InstalledBinarySchema], url_name="get_binaries")
|
||||
@paginate(CustomPagination)
|
||||
def get_binaries(request, filters: InstalledBinaryFilterSchema = Query(...)):
|
||||
"""List all installed binaries."""
|
||||
from machine.models import InstalledBinary
|
||||
return filters.filter(InstalledBinary.objects.all().select_related('machine', 'dependency')).distinct()
|
||||
|
||||
|
||||
@router.get("/binary/{binary_id}", response=InstalledBinarySchema, url_name="get_binary")
|
||||
def get_binary(request, binary_id: str):
|
||||
"""Get a specific installed binary by ID."""
|
||||
from machine.models import InstalledBinary
|
||||
return InstalledBinary.objects.select_related('machine', 'dependency').get(id__startswith=binary_id)
|
||||
|
||||
|
||||
@router.get("/binary/by-name/{name}", response=List[InstalledBinarySchema], url_name="get_binaries_by_name")
|
||||
def get_binaries_by_name(request, name: str):
|
||||
"""Get all installed binaries with the given name."""
|
||||
from machine.models import InstalledBinary
|
||||
return list(InstalledBinary.objects.filter(name__iexact=name).select_related('machine', 'dependency'))
|
||||
@@ -4,125 +4,157 @@ from uuid import UUID
|
||||
from typing import List, Any
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
from ninja import Router, Schema
|
||||
|
||||
|
||||
router = Router(tags=['Workers and Tasks'])
|
||||
|
||||
|
||||
class TaskSchema(Schema):
|
||||
class QueueItemSchema(Schema):
|
||||
"""Schema for a single item in a worker's queue."""
|
||||
TYPE: str
|
||||
|
||||
id: UUID
|
||||
description: str
|
||||
|
||||
status: str
|
||||
retry_at: datetime | None
|
||||
|
||||
created_at: datetime
|
||||
modified_at: datetime
|
||||
created_by_id: int
|
||||
|
||||
description: str
|
||||
|
||||
@staticmethod
|
||||
def resolve_TYPE(obj) -> str:
|
||||
return f'{obj._meta.app_label}.{obj._meta.model_name}'
|
||||
|
||||
@staticmethod
|
||||
def resolve_description(obj) -> str:
|
||||
return str(obj)
|
||||
|
||||
|
||||
class ActorSchema(Schema):
|
||||
# TYPE: str = 'workers.actor.ActorType'
|
||||
|
||||
# name: str
|
||||
#pid: int | None
|
||||
idle_count: int
|
||||
launch_kwargs: dict[str, Any]
|
||||
mode: str
|
||||
|
||||
class WorkerSchema(Schema):
|
||||
"""Schema for a Worker type."""
|
||||
name: str
|
||||
model: str
|
||||
statemachine: str
|
||||
ACTIVE_STATE: str
|
||||
EVENT_NAME: str
|
||||
CLAIM_ORDER: list[str]
|
||||
CLAIM_FROM_TOP_N: int
|
||||
CLAIM_ATOMIC: bool
|
||||
MAX_TICK_TIME: int
|
||||
MAX_CONCURRENT_ACTORS: int
|
||||
|
||||
future: list[TaskSchema]
|
||||
pending: list[TaskSchema]
|
||||
stalled: list[TaskSchema]
|
||||
active: list[TaskSchema]
|
||||
past: list[TaskSchema]
|
||||
|
||||
max_tick_time: int
|
||||
max_concurrent_tasks: int
|
||||
poll_interval: float
|
||||
idle_timeout: int
|
||||
running_count: int
|
||||
running_workers: List[dict[str, Any]]
|
||||
queue_count: int
|
||||
queue: List[QueueItemSchema]
|
||||
|
||||
@staticmethod
|
||||
def resolve_model(obj) -> str:
|
||||
return obj.Model.__name__
|
||||
|
||||
@staticmethod
|
||||
def resolve_statemachine(obj) -> str:
|
||||
return obj.StateMachineClass.__name__
|
||||
|
||||
@staticmethod
|
||||
def resolve_name(obj) -> str:
|
||||
return str(obj)
|
||||
Model = obj.get_model()
|
||||
return f'{Model._meta.app_label}.{Model._meta.model_name}'
|
||||
|
||||
@staticmethod
|
||||
def resolve_ACTIVE_STATE(obj) -> str:
|
||||
return str(obj.ACTIVE_STATE)
|
||||
|
||||
@staticmethod
|
||||
def resolve_FINAL_STATES(obj) -> list[str]:
|
||||
return [str(state) for state in obj.FINAL_STATES]
|
||||
|
||||
@staticmethod
|
||||
def resolve_future(obj) -> list[TaskSchema]:
|
||||
return [obj for obj in obj.qs.filter(obj.future_q).order_by('-retry_at')]
|
||||
|
||||
@staticmethod
|
||||
def resolve_pending(obj) -> list[TaskSchema]:
|
||||
return [obj for obj in obj.qs.filter(obj.pending_q).order_by('-retry_at')]
|
||||
|
||||
@staticmethod
|
||||
def resolve_stalled(obj) -> list[TaskSchema]:
|
||||
return [obj for obj in obj.qs.filter(obj.stalled_q).order_by('-retry_at')]
|
||||
|
||||
@staticmethod
|
||||
def resolve_active(obj) -> list[TaskSchema]:
|
||||
return [obj for obj in obj.qs.filter(obj.active_q).order_by('-retry_at')]
|
||||
def resolve_max_tick_time(obj) -> int:
|
||||
return obj.MAX_TICK_TIME
|
||||
|
||||
@staticmethod
|
||||
def resolve_past(obj) -> list[TaskSchema]:
|
||||
return [obj for obj in obj.qs.filter(obj.final_q).order_by('-modified_at')]
|
||||
def resolve_max_concurrent_tasks(obj) -> int:
|
||||
return obj.MAX_CONCURRENT_TASKS
|
||||
|
||||
@staticmethod
|
||||
def resolve_poll_interval(obj) -> float:
|
||||
return obj.POLL_INTERVAL
|
||||
|
||||
@staticmethod
|
||||
def resolve_idle_timeout(obj) -> int:
|
||||
return obj.IDLE_TIMEOUT
|
||||
|
||||
@staticmethod
|
||||
def resolve_running_count(obj) -> int:
|
||||
return len(obj.get_running_workers())
|
||||
|
||||
@staticmethod
|
||||
def resolve_running_workers(obj) -> List[dict[str, Any]]:
|
||||
return obj.get_running_workers()
|
||||
|
||||
@staticmethod
|
||||
def resolve_queue_count(obj) -> int:
|
||||
return obj.get_queue().count()
|
||||
|
||||
@staticmethod
|
||||
def resolve_queue(obj) -> List[QueueItemSchema]:
|
||||
return list(obj.get_queue()[:50]) # Limit to 50 items
|
||||
|
||||
|
||||
class OrchestratorSchema(Schema):
|
||||
# TYPE: str = 'workers.orchestrator.Orchestrator'
|
||||
|
||||
#pid: int | None
|
||||
exit_on_idle: bool
|
||||
mode: str
|
||||
|
||||
actors: list[ActorSchema]
|
||||
|
||||
@staticmethod
|
||||
def resolve_actors(obj) -> list[ActorSchema]:
|
||||
return [actor() for actor in obj.actor_types.values()]
|
||||
"""Schema for the Orchestrator."""
|
||||
is_running: bool
|
||||
poll_interval: float
|
||||
idle_timeout: int
|
||||
max_workers_per_type: int
|
||||
max_total_workers: int
|
||||
total_worker_count: int
|
||||
workers: List[WorkerSchema]
|
||||
|
||||
|
||||
@router.get("/orchestrators", response=List[OrchestratorSchema], url_name="get_orchestrators")
|
||||
def get_orchestrators(request):
|
||||
"""List all the task orchestrators (aka Orchestrators) that are currently running"""
|
||||
|
||||
@router.get("/orchestrator", response=OrchestratorSchema, url_name="get_orchestrator")
|
||||
def get_orchestrator(request):
|
||||
"""Get the orchestrator status and all worker queues."""
|
||||
from workers.orchestrator import Orchestrator
|
||||
from workers.worker import CrawlWorker, SnapshotWorker, ArchiveResultWorker
|
||||
|
||||
orchestrator = Orchestrator()
|
||||
|
||||
return [orchestrator]
|
||||
# Create temporary worker instances to query their queues
|
||||
workers = [
|
||||
CrawlWorker(worker_id=-1),
|
||||
SnapshotWorker(worker_id=-1),
|
||||
ArchiveResultWorker(worker_id=-1),
|
||||
]
|
||||
|
||||
return {
|
||||
'is_running': orchestrator.is_running(),
|
||||
'poll_interval': orchestrator.POLL_INTERVAL,
|
||||
'idle_timeout': orchestrator.IDLE_TIMEOUT,
|
||||
'max_workers_per_type': orchestrator.MAX_WORKERS_PER_TYPE,
|
||||
'max_total_workers': orchestrator.MAX_TOTAL_WORKERS,
|
||||
'total_worker_count': orchestrator.get_total_worker_count(),
|
||||
'workers': workers,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/actors", response=List[ActorSchema], url_name="get_actors")
|
||||
def get_actors(request):
|
||||
"""List all the task consumer workers (aka Actors) that are currently running"""
|
||||
@router.get("/workers", response=List[WorkerSchema], url_name="get_workers")
|
||||
def get_workers(request):
|
||||
"""List all worker types and their current status."""
|
||||
from workers.worker import CrawlWorker, SnapshotWorker, ArchiveResultWorker
|
||||
|
||||
from workers.orchestrator import Orchestrator
|
||||
orchestrator = Orchestrator()
|
||||
return orchestrator.actor_types.values()
|
||||
# Create temporary instances to query their queues
|
||||
return [
|
||||
CrawlWorker(worker_id=-1),
|
||||
SnapshotWorker(worker_id=-1),
|
||||
ArchiveResultWorker(worker_id=-1),
|
||||
]
|
||||
|
||||
|
||||
@router.get("/worker/{worker_name}", response=WorkerSchema, url_name="get_worker")
|
||||
def get_worker(request, worker_name: str):
|
||||
"""Get status and queue for a specific worker type."""
|
||||
from workers.worker import WORKER_TYPES
|
||||
|
||||
if worker_name not in WORKER_TYPES:
|
||||
from ninja.errors import HttpError
|
||||
raise HttpError(404, f"Unknown worker type: {worker_name}. Valid types: {list(WORKER_TYPES.keys())}")
|
||||
|
||||
WorkerClass = WORKER_TYPES[worker_name]
|
||||
return WorkerClass(worker_id=-1)
|
||||
|
||||
|
||||
@router.get("/worker/{worker_name}/queue", response=List[QueueItemSchema], url_name="get_worker_queue")
|
||||
def get_worker_queue(request, worker_name: str, limit: int = 100):
|
||||
"""Get the current queue for a specific worker type."""
|
||||
from workers.worker import WORKER_TYPES
|
||||
|
||||
if worker_name not in WORKER_TYPES:
|
||||
from ninja.errors import HttpError
|
||||
raise HttpError(404, f"Unknown worker type: {worker_name}. Valid types: {list(WORKER_TYPES.keys())}")
|
||||
|
||||
WorkerClass = WORKER_TYPES[worker_name]
|
||||
worker = WorkerClass(worker_id=-1)
|
||||
return list(worker.get_queue()[:limit])
|
||||
|
||||
|
||||
# Progress endpoint moved to core.views.live_progress_view for simplicity
|
||||
|
||||
Reference in New Issue
Block a user