Files
ArchiveBox/archivebox/cli/archivebox_worker.py
2025-12-29 04:02:11 -08:00

51 lines
1.6 KiB
Python

#!/usr/bin/env python3
__package__ = 'archivebox.cli'
__command__ = 'archivebox worker'
import sys
import rich_click as click
from archivebox.misc.util import docstring
def worker(worker_type: str, daemon: bool = False, plugin: str | None = None):
"""
Start a worker process to process items from the queue.
Worker types:
- crawl: Process Crawl objects (parse seeds, create snapshots)
- snapshot: Process Snapshot objects (create archive results)
- archiveresult: Process ArchiveResult objects (run plugins)
Workers poll the database for queued items, claim them atomically,
and spawn subprocess tasks to handle each item.
"""
from archivebox.workers.worker import get_worker_class
WorkerClass = get_worker_class(worker_type)
# Build kwargs
kwargs = {'daemon': daemon}
if plugin and worker_type == 'archiveresult':
kwargs['extractor'] = plugin # internal field still called extractor
# Create and run worker
worker_instance = WorkerClass(**kwargs)
worker_instance.runloop()
@click.command()
@click.argument('worker_type', type=click.Choice(['crawl', 'snapshot', 'archiveresult']))
@click.option('--daemon', '-d', is_flag=True, help="Run forever (don't exit on idle)")
@click.option('--plugin', '-p', default=None, help='Filter by plugin (archiveresult only)')
@docstring(worker.__doc__)
def main(worker_type: str, daemon: bool, plugin: str | None):
"""Start an ArchiveBox worker process"""
worker(worker_type, daemon=daemon, plugin=plugin)
if __name__ == '__main__':
main()