mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-05 23:37:58 +10:00
wip major changes
This commit is contained in:
@@ -4,29 +4,46 @@ __package__ = 'archivebox.cli'
|
||||
__command__ = 'archivebox worker'
|
||||
|
||||
import sys
|
||||
import json
|
||||
|
||||
import rich_click as click
|
||||
|
||||
from archivebox.misc.util import docstring
|
||||
|
||||
|
||||
def worker(worker_type: str, daemon: bool = False, plugin: str | None = None):
|
||||
"""
|
||||
Start a worker process to process items from the queue.
|
||||
|
||||
Worker types:
|
||||
- crawl: Process Crawl objects (parse seeds, create snapshots)
|
||||
- snapshot: Process Snapshot objects (create archive results)
|
||||
- archiveresult: Process ArchiveResult objects (run plugins)
|
||||
|
||||
Workers poll the database for queued items, claim them atomically,
|
||||
and spawn subprocess tasks to handle each item.
|
||||
"""
|
||||
from workers.worker import get_worker_class
|
||||
|
||||
WorkerClass = get_worker_class(worker_type)
|
||||
|
||||
# Build kwargs
|
||||
kwargs = {'daemon': daemon}
|
||||
if plugin and worker_type == 'archiveresult':
|
||||
kwargs['extractor'] = plugin # internal field still called extractor
|
||||
|
||||
# Create and run worker
|
||||
worker_instance = WorkerClass(**kwargs)
|
||||
worker_instance.runloop()
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument('worker_type')
|
||||
@click.option('--wait-for-first-event', is_flag=True)
|
||||
@click.option('--exit-on-idle', is_flag=True)
|
||||
def main(worker_type: str, wait_for_first_event: bool, exit_on_idle: bool):
|
||||
"""Start an ArchiveBox worker process of the given type"""
|
||||
|
||||
from workers.worker import get_worker_type
|
||||
|
||||
# allow piping in events to process from stdin
|
||||
# if not sys.stdin.isatty():
|
||||
# for line in sys.stdin.readlines():
|
||||
# Event.dispatch(event=json.loads(line), parent=None)
|
||||
|
||||
# run the actor
|
||||
Worker = get_worker_type(worker_type)
|
||||
for event in Worker.run(wait_for_first_event=wait_for_first_event, exit_on_idle=exit_on_idle):
|
||||
print(event)
|
||||
@click.argument('worker_type', type=click.Choice(['crawl', 'snapshot', 'archiveresult']))
|
||||
@click.option('--daemon', '-d', is_flag=True, help="Run forever (don't exit on idle)")
|
||||
@click.option('--plugin', '-p', default=None, help='Filter by plugin (archiveresult only)')
|
||||
@docstring(worker.__doc__)
|
||||
def main(worker_type: str, daemon: bool, plugin: str | None):
|
||||
"""Start an ArchiveBox worker process"""
|
||||
worker(worker_type, daemon=daemon, plugin=plugin)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
Reference in New Issue
Block a user