mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-05 07:17:52 +10:00
remove huey
This commit is contained in:
@@ -1,23 +1,13 @@
|
||||
"""
|
||||
Workers admin module.
|
||||
|
||||
The orchestrator/worker system doesn't need Django admin registration
|
||||
as workers are managed via CLI commands and the orchestrator.
|
||||
"""
|
||||
|
||||
__package__ = 'archivebox.workers'
|
||||
|
||||
from django.contrib.auth import get_permission_codename
|
||||
|
||||
from huey_monitor.apps import HueyMonitorConfig
|
||||
from huey_monitor.admin import TaskModel, TaskModelAdmin, SignalInfoModel, SignalInfoModelAdmin
|
||||
|
||||
|
||||
HueyMonitorConfig.verbose_name = 'Background Workers'
|
||||
|
||||
|
||||
class CustomTaskModelAdmin(TaskModelAdmin):
|
||||
actions = ["delete_selected"]
|
||||
|
||||
def has_delete_permission(self, request, obj=None):
|
||||
codename = get_permission_codename("delete", self.opts)
|
||||
return request.user.has_perm("%s.%s" % (self.opts.app_label, codename))
|
||||
|
||||
|
||||
|
||||
def register_admin(admin_site):
|
||||
admin_site.register(TaskModel, CustomTaskModelAdmin)
|
||||
admin_site.register(SignalInfoModel, SignalInfoModelAdmin)
|
||||
"""No models to register - workers are process-based, not Django models."""
|
||||
pass
|
||||
|
||||
0
archivebox/workers/management/__init__.py
Normal file
0
archivebox/workers/management/__init__.py
Normal file
0
archivebox/workers/management/commands/__init__.py
Normal file
0
archivebox/workers/management/commands/__init__.py
Normal file
15
archivebox/workers/management/commands/orchestrator.py
Normal file
15
archivebox/workers/management/commands/orchestrator.py
Normal file
@@ -0,0 +1,15 @@
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from workers.orchestrator import Orchestrator
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = 'Run the archivebox orchestrator'
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument('--daemon', '-d', action='store_true', help="Run forever (don't exit on idle)")
|
||||
|
||||
def handle(self, *args, **kwargs):
|
||||
daemon = kwargs.get('daemon', False)
|
||||
orchestrator = Orchestrator(exit_on_idle=not daemon)
|
||||
orchestrator.runloop()
|
||||
@@ -35,6 +35,7 @@ from django.utils import timezone
|
||||
|
||||
from rich import print
|
||||
|
||||
from archivebox.misc.logging_util import log_worker_event
|
||||
from .worker import Worker, CrawlWorker, SnapshotWorker, ArchiveResultWorker
|
||||
from .pid_utils import (
|
||||
write_pid_file,
|
||||
@@ -82,22 +83,39 @@ class Orchestrator:
|
||||
"""Called when orchestrator starts."""
|
||||
self.pid = os.getpid()
|
||||
self.pid_file = write_pid_file('orchestrator', worker_id=0)
|
||||
print(f'[green]👨✈️ {self} STARTED[/green]')
|
||||
|
||||
|
||||
# Clean up any stale PID files from previous runs
|
||||
stale_count = cleanup_stale_pid_files()
|
||||
|
||||
# Collect startup metadata
|
||||
metadata = {
|
||||
'max_workers_per_type': self.MAX_WORKERS_PER_TYPE,
|
||||
'max_total_workers': self.MAX_TOTAL_WORKERS,
|
||||
'poll_interval': self.POLL_INTERVAL,
|
||||
}
|
||||
if stale_count:
|
||||
print(f'[yellow]👨✈️ {self} cleaned up {stale_count} stale PID files[/yellow]')
|
||||
metadata['cleaned_stale_pids'] = stale_count
|
||||
|
||||
log_worker_event(
|
||||
worker_type='Orchestrator',
|
||||
event='Starting...',
|
||||
indent_level=0,
|
||||
pid=self.pid,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
def on_shutdown(self, error: BaseException | None = None) -> None:
|
||||
"""Called when orchestrator shuts down."""
|
||||
if self.pid_file:
|
||||
remove_pid_file(self.pid_file)
|
||||
|
||||
if error and not isinstance(error, KeyboardInterrupt):
|
||||
print(f'[red]👨✈️ {self} SHUTDOWN with error:[/red] {type(error).__name__}: {error}')
|
||||
else:
|
||||
print(f'[grey53]👨✈️ {self} SHUTDOWN[/grey53]')
|
||||
|
||||
log_worker_event(
|
||||
worker_type='Orchestrator',
|
||||
event='Shutting down',
|
||||
indent_level=0,
|
||||
pid=self.pid,
|
||||
error=error if error and not isinstance(error, KeyboardInterrupt) else None,
|
||||
)
|
||||
|
||||
def get_total_worker_count(self) -> int:
|
||||
"""Get total count of running workers across all types."""
|
||||
@@ -129,10 +147,17 @@ class Orchestrator:
|
||||
"""Spawn a new worker process. Returns PID or None if spawn failed."""
|
||||
try:
|
||||
pid = WorkerClass.start(daemon=False)
|
||||
print(f'[blue]👨✈️ {self} spawned {WorkerClass.name} worker[/blue] pid={pid}')
|
||||
# Worker spawning is logged by the worker itself in on_startup()
|
||||
return pid
|
||||
except Exception as e:
|
||||
print(f'[red]👨✈️ {self} failed to spawn {WorkerClass.name} worker:[/red] {e}')
|
||||
log_worker_event(
|
||||
worker_type='Orchestrator',
|
||||
event='Failed to spawn worker',
|
||||
indent_level=0,
|
||||
pid=self.pid,
|
||||
metadata={'worker_type': WorkerClass.name},
|
||||
error=e,
|
||||
)
|
||||
return None
|
||||
|
||||
def check_queues_and_spawn_workers(self) -> dict[str, int]:
|
||||
@@ -181,26 +206,13 @@ class Orchestrator:
|
||||
|
||||
def on_tick(self, queue_sizes: dict[str, int]) -> None:
|
||||
"""Called each orchestrator tick. Override for custom behavior."""
|
||||
total_queued = sum(queue_sizes.values())
|
||||
total_workers = self.get_total_worker_count()
|
||||
|
||||
if total_queued > 0 or total_workers > 0:
|
||||
# Build status line
|
||||
status_parts = []
|
||||
for WorkerClass in self.WORKER_TYPES:
|
||||
name = WorkerClass.name
|
||||
queued = queue_sizes.get(name, 0)
|
||||
workers = len(WorkerClass.get_running_workers())
|
||||
if queued > 0 or workers > 0:
|
||||
status_parts.append(f'{name}={queued}q/{workers}w')
|
||||
|
||||
if status_parts:
|
||||
print(f'[grey53]👨✈️ {self} tick:[/grey53] {" ".join(status_parts)}')
|
||||
# Tick logging suppressed to reduce noise
|
||||
pass
|
||||
|
||||
def on_idle(self) -> None:
|
||||
"""Called when orchestrator is idle (no work, no workers)."""
|
||||
if self.idle_count == 1:
|
||||
print(f'[grey53]👨✈️ {self} idle, waiting for work...[/grey53]')
|
||||
# Idle logging suppressed to reduce noise
|
||||
pass
|
||||
|
||||
def should_exit(self, queue_sizes: dict[str, int]) -> bool:
|
||||
"""Determine if orchestrator should exit."""
|
||||
@@ -242,7 +254,12 @@ class Orchestrator:
|
||||
|
||||
# Check if we should exit
|
||||
if self.should_exit(queue_sizes):
|
||||
print(f'[green]👨✈️ {self} all work complete, exiting[/green]')
|
||||
log_worker_event(
|
||||
worker_type='Orchestrator',
|
||||
event='All work complete',
|
||||
indent_level=0,
|
||||
pid=self.pid,
|
||||
)
|
||||
break
|
||||
|
||||
time.sleep(self.POLL_INTERVAL)
|
||||
@@ -267,9 +284,14 @@ class Orchestrator:
|
||||
|
||||
proc = Process(target=run_orchestrator, name='orchestrator')
|
||||
proc.start()
|
||||
|
||||
|
||||
assert proc.pid is not None
|
||||
print(f'[green]👨✈️ Orchestrator started in background[/green] pid={proc.pid}')
|
||||
log_worker_event(
|
||||
worker_type='Orchestrator',
|
||||
event='Started in background',
|
||||
indent_level=0,
|
||||
pid=proc.pid,
|
||||
)
|
||||
return proc.pid
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -26,22 +26,6 @@ CONFIG_FILE_NAME = "supervisord.conf"
|
||||
PID_FILE_NAME = "supervisord.pid"
|
||||
WORKERS_DIR_NAME = "workers"
|
||||
|
||||
SCHEDULER_WORKER = {
|
||||
"name": "worker_scheduler",
|
||||
"command": "archivebox manage djangohuey --queue system_tasks -w 4 -k thread --disable-health-check --flush-locks",
|
||||
"autostart": "true",
|
||||
"autorestart": "true",
|
||||
"stdout_logfile": "logs/worker_scheduler.log",
|
||||
"redirect_stderr": "true",
|
||||
}
|
||||
COMMAND_WORKER = {
|
||||
"name": "worker_commands",
|
||||
"command": "archivebox manage djangohuey --queue commands -w 4 -k thread --no-periodic --disable-health-check",
|
||||
"autostart": "true",
|
||||
"autorestart": "true",
|
||||
"stdout_logfile": "logs/worker_commands.log",
|
||||
"redirect_stderr": "true",
|
||||
}
|
||||
ORCHESTRATOR_WORKER = {
|
||||
"name": "worker_orchestrator",
|
||||
"command": "archivebox manage orchestrator",
|
||||
@@ -391,10 +375,8 @@ def watch_worker(supervisor, daemon_name, interval=5):
|
||||
|
||||
def start_server_workers(host='0.0.0.0', port='8000', daemonize=False):
|
||||
supervisor = get_or_create_supervisord_process(daemonize=daemonize)
|
||||
|
||||
|
||||
bg_workers = [
|
||||
SCHEDULER_WORKER,
|
||||
COMMAND_WORKER,
|
||||
ORCHESTRATOR_WORKER,
|
||||
]
|
||||
|
||||
@@ -422,8 +404,7 @@ def start_server_workers(host='0.0.0.0', port='8000', daemonize=False):
|
||||
|
||||
def start_cli_workers(watch=False):
|
||||
supervisor = get_or_create_supervisord_process(daemonize=False)
|
||||
|
||||
start_worker(supervisor, COMMAND_WORKER)
|
||||
|
||||
start_worker(supervisor, ORCHESTRATOR_WORKER)
|
||||
|
||||
if watch:
|
||||
@@ -434,13 +415,12 @@ def start_cli_workers(watch=False):
|
||||
except SystemExit:
|
||||
pass
|
||||
except BaseException as e:
|
||||
STDERR.print(f"\n[🛑] Got {e.__class__.__name__} exception, stopping web server gracefully...")
|
||||
STDERR.print(f"\n[🛑] Got {e.__class__.__name__} exception, stopping orchestrator gracefully...")
|
||||
raise
|
||||
finally:
|
||||
stop_worker(supervisor, COMMAND_WORKER['name'])
|
||||
stop_worker(supervisor, ORCHESTRATOR_WORKER['name'])
|
||||
time.sleep(0.5)
|
||||
return [COMMAND_WORKER, ORCHESTRATOR_WORKER]
|
||||
return [ORCHESTRATOR_WORKER]
|
||||
|
||||
|
||||
# def main(daemons):
|
||||
|
||||
@@ -1,89 +1,60 @@
|
||||
"""
|
||||
Background task functions for queuing work to the orchestrator.
|
||||
|
||||
These functions queue Snapshots/Crawls for processing by setting their status
|
||||
to QUEUED, which the orchestrator workers will pick up and process.
|
||||
"""
|
||||
|
||||
__package__ = 'archivebox.workers'
|
||||
|
||||
from functools import wraps
|
||||
# from django.utils import timezone
|
||||
from django.utils import timezone
|
||||
|
||||
from django_huey import db_task, task
|
||||
|
||||
from huey_monitor.models import TaskModel
|
||||
from huey_monitor.tqdm import ProcessInfo
|
||||
def ensure_orchestrator_running():
|
||||
"""Ensure the orchestrator is running to process queued items."""
|
||||
from .orchestrator import Orchestrator
|
||||
|
||||
from .supervisord_util import get_or_create_supervisord_process
|
||||
if not Orchestrator.is_running():
|
||||
# Start orchestrator in background
|
||||
orchestrator = Orchestrator(exit_on_idle=True)
|
||||
orchestrator.start()
|
||||
|
||||
# @db_task(queue="commands", context=True, schedule=1)
|
||||
# def scheduler_tick():
|
||||
# print('SCHEDULER TICK', timezone.now().isoformat())
|
||||
# # abx.archivebox.events.on_scheduler_runloop_start(timezone.now(), machine=Machine.objects.get_current_machine())
|
||||
|
||||
# # abx.archivebox.events.on_scheduler_tick_start(timezone.now(), machine=Machine.objects.get_current_machine())
|
||||
|
||||
# scheduled_crawls = CrawlSchedule.objects.filter(is_enabled=True)
|
||||
# scheduled_crawls_due = scheduled_crawls.filter(next_run_at__lte=timezone.now())
|
||||
|
||||
# for scheduled_crawl in scheduled_crawls_due:
|
||||
# try:
|
||||
# abx.archivebox.events.on_crawl_schedule_tick(scheduled_crawl)
|
||||
# except Exception as e:
|
||||
# abx.archivebox.events.on_crawl_schedule_failure(timezone.now(), machine=Machine.objects.get_current_machine(), error=e, schedule=scheduled_crawl)
|
||||
|
||||
# # abx.archivebox.events.on_scheduler_tick_end(timezone.now(), machine=Machine.objects.get_current_machine(), tasks=scheduled_tasks_due)
|
||||
def bg_add(add_kwargs: dict) -> int:
|
||||
"""
|
||||
Add URLs and queue them for archiving.
|
||||
|
||||
def db_task_with_parent(func):
|
||||
"""Decorator for db_task that sets the parent task for the db_task"""
|
||||
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
task = kwargs.get('task')
|
||||
parent_task_id = kwargs.get('parent_task_id')
|
||||
|
||||
if task and parent_task_id:
|
||||
TaskModel.objects.set_parent_task(main_task_id=parent_task_id, sub_task_id=task.id)
|
||||
|
||||
return func(*args, **kwargs)
|
||||
|
||||
return wrapper
|
||||
|
||||
@db_task(queue="commands", context=True)
|
||||
def bg_add(add_kwargs, task=None, parent_task_id=None):
|
||||
get_or_create_supervisord_process(daemonize=False)
|
||||
|
||||
from ..main import add
|
||||
|
||||
if task and parent_task_id:
|
||||
TaskModel.objects.set_parent_task(main_task_id=parent_task_id, sub_task_id=task.id)
|
||||
Returns the number of snapshots created.
|
||||
"""
|
||||
from archivebox.cli.archivebox_add import add
|
||||
|
||||
assert add_kwargs and add_kwargs.get("urls")
|
||||
rough_url_count = add_kwargs["urls"].count("://")
|
||||
|
||||
process_info = ProcessInfo(task, desc="add", parent_task_id=parent_task_id, total=rough_url_count)
|
||||
# When called as background task, always run in background mode
|
||||
add_kwargs = add_kwargs.copy()
|
||||
add_kwargs['bg'] = True
|
||||
|
||||
result = add(**add_kwargs)
|
||||
process_info.update(n=rough_url_count)
|
||||
return result
|
||||
|
||||
# Ensure orchestrator is running to process the new snapshots
|
||||
ensure_orchestrator_running()
|
||||
|
||||
return len(result) if result else 0
|
||||
|
||||
|
||||
@task(queue="commands", context=True)
|
||||
def bg_archive_snapshots(snapshots, kwargs=None, task=None, parent_task_id=None):
|
||||
def bg_archive_snapshots(snapshots, kwargs: dict | None = None) -> int:
|
||||
"""
|
||||
Queue multiple snapshots for archiving via the state machine system.
|
||||
|
||||
This sets snapshots to 'queued' status so the orchestrator workers pick them up.
|
||||
The actual archiving happens through ArchiveResult.run().
|
||||
"""
|
||||
get_or_create_supervisord_process(daemonize=False)
|
||||
The actual archiving happens through the worker's process_item() method.
|
||||
|
||||
from django.utils import timezone
|
||||
Returns the number of snapshots queued.
|
||||
"""
|
||||
from core.models import Snapshot
|
||||
|
||||
if task and parent_task_id:
|
||||
TaskModel.objects.set_parent_task(main_task_id=parent_task_id, sub_task_id=task.id)
|
||||
|
||||
assert snapshots
|
||||
kwargs = kwargs or {}
|
||||
|
||||
rough_count = len(snapshots) if hasattr(snapshots, '__len__') else snapshots.count()
|
||||
process_info = ProcessInfo(task, desc="archive_snapshots", parent_task_id=parent_task_id, total=rough_count)
|
||||
|
||||
# Queue snapshots by setting status to queued with immediate retry_at
|
||||
queued_count = 0
|
||||
for snapshot in snapshots:
|
||||
@@ -95,36 +66,33 @@ def bg_archive_snapshots(snapshots, kwargs=None, task=None, parent_task_id=None)
|
||||
)
|
||||
queued_count += 1
|
||||
|
||||
process_info.update(n=queued_count)
|
||||
# Ensure orchestrator is running to process the queued snapshots
|
||||
if queued_count > 0:
|
||||
ensure_orchestrator_running()
|
||||
|
||||
return queued_count
|
||||
|
||||
|
||||
@task(queue="commands", context=True)
|
||||
def bg_archive_snapshot(snapshot, overwrite=False, methods=None, task=None, parent_task_id=None):
|
||||
def bg_archive_snapshot(snapshot, overwrite: bool = False, methods: list | None = None) -> int:
|
||||
"""
|
||||
Queue a single snapshot for archiving via the state machine system.
|
||||
|
||||
This sets the snapshot to 'queued' status so the orchestrator workers pick it up.
|
||||
The actual archiving happens through ArchiveResult.run().
|
||||
The actual archiving happens through the worker's process_item() method.
|
||||
|
||||
Returns 1 if queued, 0 otherwise.
|
||||
"""
|
||||
get_or_create_supervisord_process(daemonize=False)
|
||||
|
||||
from django.utils import timezone
|
||||
from core.models import Snapshot
|
||||
|
||||
if task and parent_task_id:
|
||||
TaskModel.objects.set_parent_task(main_task_id=parent_task_id, sub_task_id=task.id)
|
||||
|
||||
process_info = ProcessInfo(task, desc="archive_snapshot", parent_task_id=parent_task_id, total=1)
|
||||
|
||||
# Queue the snapshot by setting status to queued
|
||||
if hasattr(snapshot, 'id'):
|
||||
Snapshot.objects.filter(id=snapshot.id).update(
|
||||
status=Snapshot.StatusChoices.QUEUED,
|
||||
retry_at=timezone.now(),
|
||||
)
|
||||
process_info.update(n=1)
|
||||
|
||||
# Ensure orchestrator is running to process the queued snapshot
|
||||
ensure_orchestrator_running()
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ from django.conf import settings
|
||||
|
||||
from rich import print
|
||||
|
||||
from archivebox.misc.logging_util import log_worker_event
|
||||
from .pid_utils import (
|
||||
write_pid_file,
|
||||
remove_pid_file,
|
||||
@@ -126,7 +127,7 @@ class Worker:
|
||||
obj.sm.tick()
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f'[red]{self} error processing {obj.pk}:[/red] {e}')
|
||||
# Error will be logged in runloop's completion event
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
@@ -134,7 +135,28 @@ class Worker:
|
||||
"""Called when worker starts."""
|
||||
self.pid = os.getpid()
|
||||
self.pid_file = write_pid_file(self.name, self.worker_id)
|
||||
print(f'[green]{self} STARTED[/green] pid_file={self.pid_file}')
|
||||
|
||||
# Determine worker type for logging
|
||||
worker_type_name = self.__class__.__name__
|
||||
indent_level = 1 # Default for most workers
|
||||
|
||||
# Adjust indent level based on worker type
|
||||
if 'Snapshot' in worker_type_name:
|
||||
indent_level = 2
|
||||
elif 'ArchiveResult' in worker_type_name:
|
||||
indent_level = 3
|
||||
|
||||
log_worker_event(
|
||||
worker_type=worker_type_name,
|
||||
event='Starting...',
|
||||
indent_level=indent_level,
|
||||
pid=self.pid,
|
||||
worker_id=str(self.worker_id),
|
||||
metadata={
|
||||
'max_concurrent': self.MAX_CONCURRENT_TASKS,
|
||||
'poll_interval': self.POLL_INTERVAL,
|
||||
},
|
||||
)
|
||||
|
||||
def on_shutdown(self, error: BaseException | None = None) -> None:
|
||||
"""Called when worker shuts down."""
|
||||
@@ -142,10 +164,23 @@ class Worker:
|
||||
if self.pid_file:
|
||||
remove_pid_file(self.pid_file)
|
||||
|
||||
if error and not isinstance(error, KeyboardInterrupt):
|
||||
print(f'[red]{self} SHUTDOWN with error:[/red] {type(error).__name__}: {error}')
|
||||
else:
|
||||
print(f'[grey53]{self} SHUTDOWN[/grey53]')
|
||||
# Determine worker type for logging
|
||||
worker_type_name = self.__class__.__name__
|
||||
indent_level = 1
|
||||
|
||||
if 'Snapshot' in worker_type_name:
|
||||
indent_level = 2
|
||||
elif 'ArchiveResult' in worker_type_name:
|
||||
indent_level = 3
|
||||
|
||||
log_worker_event(
|
||||
worker_type=worker_type_name,
|
||||
event='Shutting down',
|
||||
indent_level=indent_level,
|
||||
pid=self.pid,
|
||||
worker_id=str(self.worker_id),
|
||||
error=error if error and not isinstance(error, KeyboardInterrupt) else None,
|
||||
)
|
||||
|
||||
def should_exit(self) -> bool:
|
||||
"""Check if worker should exit due to idle timeout."""
|
||||
@@ -161,6 +196,15 @@ class Worker:
|
||||
"""Main worker loop - polls queue, processes items."""
|
||||
self.on_startup()
|
||||
|
||||
# Determine worker type for logging
|
||||
worker_type_name = self.__class__.__name__
|
||||
indent_level = 1
|
||||
|
||||
if 'Snapshot' in worker_type_name:
|
||||
indent_level = 2
|
||||
elif 'ArchiveResult' in worker_type_name:
|
||||
indent_level = 3
|
||||
|
||||
try:
|
||||
while True:
|
||||
# Try to claim and process an item
|
||||
@@ -168,25 +212,62 @@ class Worker:
|
||||
|
||||
if obj is not None:
|
||||
self.idle_count = 0
|
||||
print(f'[blue]{self} processing:[/blue] {obj.pk}')
|
||||
|
||||
# Build metadata for task start
|
||||
start_metadata = {'task_id': str(obj.pk)}
|
||||
if hasattr(obj, 'url'):
|
||||
# SnapshotWorker
|
||||
url = str(obj.url) if obj.url else None
|
||||
else:
|
||||
url = None
|
||||
|
||||
extractor = None
|
||||
if hasattr(obj, 'extractor'):
|
||||
# ArchiveResultWorker
|
||||
extractor = obj.extractor
|
||||
start_metadata['extractor'] = extractor
|
||||
|
||||
log_worker_event(
|
||||
worker_type=worker_type_name,
|
||||
event='Processing...',
|
||||
indent_level=indent_level,
|
||||
pid=self.pid,
|
||||
worker_id=str(self.worker_id),
|
||||
url=url,
|
||||
extractor=extractor,
|
||||
metadata=start_metadata,
|
||||
)
|
||||
|
||||
start_time = time.time()
|
||||
success = self.process_item(obj)
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
if success:
|
||||
print(f'[green]{self} completed ({elapsed:.1f}s):[/green] {obj.pk}')
|
||||
else:
|
||||
print(f'[red]{self} failed ({elapsed:.1f}s):[/red] {obj.pk}')
|
||||
# Build metadata for task completion
|
||||
complete_metadata = {
|
||||
'task_id': str(obj.pk),
|
||||
'duration': elapsed,
|
||||
'status': 'success' if success else 'failed',
|
||||
}
|
||||
if hasattr(obj, 'status'):
|
||||
complete_metadata['final_status'] = str(obj.status)
|
||||
|
||||
log_worker_event(
|
||||
worker_type=worker_type_name,
|
||||
event='Completed' if success else 'Failed',
|
||||
indent_level=indent_level,
|
||||
pid=self.pid,
|
||||
worker_id=str(self.worker_id),
|
||||
url=url,
|
||||
extractor=extractor,
|
||||
metadata=complete_metadata,
|
||||
)
|
||||
else:
|
||||
# No work available
|
||||
# No work available - idle logging suppressed
|
||||
self.idle_count += 1
|
||||
if self.idle_count == 1:
|
||||
print(f'[grey53]{self} idle, waiting for work...[/grey53]')
|
||||
|
||||
# Check if we should exit
|
||||
if self.should_exit():
|
||||
print(f'[grey53]{self} idle timeout reached, exiting[/grey53]')
|
||||
# Exit logging suppressed - shutdown will be logged by on_shutdown()
|
||||
break
|
||||
|
||||
time.sleep(self.POLL_INTERVAL)
|
||||
@@ -293,7 +374,7 @@ class ArchiveResultWorker(Worker):
|
||||
obj.sm.tick()
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f'[red]{self} error processing {obj.pk}:[/red] {e}')
|
||||
# Error will be logged in runloop's completion event
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
|
||||
Reference in New Issue
Block a user