mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-04 23:07:56 +10:00
wip
This commit is contained in:
@@ -3,5 +3,5 @@ __order__ = 100
|
||||
|
||||
|
||||
def register_admin(admin_site):
|
||||
from workers.admin import register_admin
|
||||
from archivebox.workers.admin import register_admin
|
||||
register_admin(admin_site)
|
||||
|
||||
@@ -3,5 +3,5 @@ from django.apps import AppConfig
|
||||
|
||||
class WorkersConfig(AppConfig):
|
||||
default_auto_field = 'django.db.models.BigAutoField'
|
||||
name = 'workers'
|
||||
name = 'archivebox.workers'
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from workers.orchestrator import Orchestrator
|
||||
from archivebox.workers.orchestrator import Orchestrator
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
|
||||
@@ -42,6 +42,7 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
|
||||
retry_at_field_name: ClassVar[str]
|
||||
|
||||
class Meta:
|
||||
app_label = 'workers'
|
||||
abstract = True
|
||||
|
||||
@classmethod
|
||||
@@ -163,9 +164,9 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
|
||||
def bump_retry_at(self, seconds: int = 10):
|
||||
self.RETRY_AT = timezone.now() + timedelta(seconds=seconds)
|
||||
|
||||
def update_for_workers(self, **kwargs) -> bool:
|
||||
def update_and_requeue(self, **kwargs) -> bool:
|
||||
"""
|
||||
Atomically update the object's fields for worker processing.
|
||||
Atomically update fields and schedule retry_at for next worker tick.
|
||||
Returns True if the update was successful, False if the object was modified by another worker.
|
||||
"""
|
||||
# Get the current retry_at to use as optimistic lock
|
||||
@@ -307,7 +308,7 @@ class ModelWithStateMachine(BaseModelWithStateMachine):
|
||||
status: models.CharField = BaseModelWithStateMachine.StatusField()
|
||||
retry_at: models.DateTimeField = BaseModelWithStateMachine.RetryAtField()
|
||||
|
||||
state_machine_name: ClassVar[str] # e.g. 'core.statemachines.ArchiveResultMachine'
|
||||
state_machine_name: ClassVar[str] # e.g. 'core.models.ArchiveResultMachine'
|
||||
state_field_name: ClassVar[str] = 'status'
|
||||
state_machine_attr: ClassVar[str] = 'sm'
|
||||
bind_events_as_methods: ClassVar[bool] = True
|
||||
@@ -316,4 +317,41 @@ class ModelWithStateMachine(BaseModelWithStateMachine):
|
||||
retry_at_field_name: ClassVar[str] = 'retry_at'
|
||||
|
||||
class Meta:
|
||||
app_label = 'workers'
|
||||
abstract = True
|
||||
|
||||
|
||||
class BaseStateMachine(StateMachine):
|
||||
"""
|
||||
Base class for all ArchiveBox state machines.
|
||||
|
||||
Eliminates boilerplate __init__, __repr__, __str__ methods that were
|
||||
duplicated across all 4 state machines (Snapshot, ArchiveResult, Crawl, Binary).
|
||||
|
||||
Subclasses must set model_attr_name to specify the attribute name
|
||||
(e.g., 'snapshot', 'archiveresult', 'crawl', 'binary').
|
||||
|
||||
Example usage:
|
||||
class SnapshotMachine(BaseStateMachine, strict_states=True):
|
||||
model_attr_name = 'snapshot'
|
||||
|
||||
# States and transitions...
|
||||
queued = State(value=Snapshot.StatusChoices.QUEUED, initial=True)
|
||||
# ...
|
||||
|
||||
The model instance is accessible via self.{model_attr_name}
|
||||
(e.g., self.snapshot, self.archiveresult, etc.)
|
||||
"""
|
||||
|
||||
model_attr_name: str = 'obj' # Override in subclasses
|
||||
|
||||
def __init__(self, obj, *args, **kwargs):
|
||||
setattr(self, self.model_attr_name, obj)
|
||||
super().__init__(obj, *args, **kwargs)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
obj = getattr(self, self.model_attr_name)
|
||||
return f'{self.__class__.__name__}[{obj.id}]'
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.__repr__()
|
||||
|
||||
@@ -41,7 +41,7 @@ def bg_archive_snapshots(snapshots, kwargs: dict | None = None) -> int:
|
||||
|
||||
Returns the number of snapshots queued.
|
||||
"""
|
||||
from core.models import Snapshot
|
||||
from archivebox.core.models import Snapshot
|
||||
|
||||
kwargs = kwargs or {}
|
||||
|
||||
@@ -68,7 +68,7 @@ def bg_archive_snapshot(snapshot, overwrite: bool = False, methods: list | None
|
||||
|
||||
Returns 1 if queued, 0 otherwise.
|
||||
"""
|
||||
from core.models import Snapshot
|
||||
from archivebox.core.models import Snapshot
|
||||
|
||||
# Queue the snapshot by setting status to queued
|
||||
if hasattr(snapshot, 'id'):
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
from django.views.generic import TemplateView
|
||||
from django.contrib.auth.mixins import UserPassesTestMixin
|
||||
from django.utils import timezone
|
||||
from api.auth import get_or_create_api_token
|
||||
from archivebox.api.auth import get_or_create_api_token
|
||||
|
||||
|
||||
class JobsDashboardView(UserPassesTestMixin, TemplateView):
|
||||
|
||||
@@ -322,7 +322,7 @@ class CrawlWorker(Worker):
|
||||
MAX_TICK_TIME: ClassVar[int] = 60
|
||||
|
||||
def get_model(self):
|
||||
from crawls.models import Crawl
|
||||
from archivebox.crawls.models import Crawl
|
||||
return Crawl
|
||||
|
||||
|
||||
@@ -333,7 +333,7 @@ class SnapshotWorker(Worker):
|
||||
MAX_TICK_TIME: ClassVar[int] = 60
|
||||
|
||||
def get_model(self):
|
||||
from core.models import Snapshot
|
||||
from archivebox.core.models import Snapshot
|
||||
return Snapshot
|
||||
|
||||
|
||||
@@ -348,7 +348,7 @@ class ArchiveResultWorker(Worker):
|
||||
self.plugin = plugin
|
||||
|
||||
def get_model(self):
|
||||
from core.models import ArchiveResult
|
||||
from archivebox.core.models import ArchiveResult
|
||||
return ArchiveResult
|
||||
|
||||
def get_queue(self) -> QuerySet:
|
||||
@@ -358,7 +358,7 @@ class ArchiveResultWorker(Worker):
|
||||
Uses step-based filtering: only claims ARs where hook step <= snapshot.current_step.
|
||||
This ensures hooks execute in order (step 0 → 1 → 2 ... → 9).
|
||||
"""
|
||||
from core.models import ArchiveResult
|
||||
from archivebox.core.models import ArchiveResult
|
||||
from archivebox.hooks import extract_step
|
||||
|
||||
qs = super().get_queue()
|
||||
|
||||
Reference in New Issue
Block a user