mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
rename archive_org to archivedotorg, add BinaryWorker, fix config pass-through
This commit is contained in:
@@ -59,10 +59,11 @@ def process_stdin_records() -> int:
|
||||
"""
|
||||
from django.utils import timezone
|
||||
|
||||
from archivebox.misc.jsonl import read_stdin, write_record, TYPE_CRAWL, TYPE_SNAPSHOT, TYPE_ARCHIVERESULT
|
||||
from archivebox.misc.jsonl import read_stdin, write_record, TYPE_CRAWL, TYPE_SNAPSHOT, TYPE_ARCHIVERESULT, TYPE_BINARY
|
||||
from archivebox.base_models.models import get_or_create_system_user_pk
|
||||
from archivebox.core.models import Snapshot, ArchiveResult
|
||||
from archivebox.crawls.models import Crawl
|
||||
from archivebox.machine.models import Binary
|
||||
from archivebox.workers.orchestrator import Orchestrator
|
||||
|
||||
records = list(read_stdin())
|
||||
@@ -137,6 +138,26 @@ def process_stdin_records() -> int:
|
||||
output_records.append(archiveresult.to_json())
|
||||
queued_count += 1
|
||||
|
||||
elif record_type == TYPE_BINARY:
|
||||
# Binary records - create or update and queue for installation
|
||||
if record_id:
|
||||
# Existing binary - re-queue
|
||||
try:
|
||||
binary = Binary.objects.get(id=record_id)
|
||||
except Binary.DoesNotExist:
|
||||
binary = Binary.from_json(record)
|
||||
else:
|
||||
# New binary - create it
|
||||
binary = Binary.from_json(record)
|
||||
|
||||
if binary:
|
||||
binary.retry_at = timezone.now()
|
||||
if binary.status != Binary.StatusChoices.INSTALLED:
|
||||
binary.status = Binary.StatusChoices.QUEUED
|
||||
binary.save()
|
||||
output_records.append(binary.to_json())
|
||||
queued_count += 1
|
||||
|
||||
else:
|
||||
# Unknown type - pass through
|
||||
output_records.append(record)
|
||||
@@ -222,7 +243,8 @@ def run_snapshot_worker(snapshot_id: str) -> int:
|
||||
@click.option('--daemon', '-d', is_flag=True, help="Run forever (don't exit on idle)")
|
||||
@click.option('--crawl-id', help="Run orchestrator for specific crawl only")
|
||||
@click.option('--snapshot-id', help="Run worker for specific snapshot only")
|
||||
def main(daemon: bool, crawl_id: str, snapshot_id: str):
|
||||
@click.option('--binary-id', help="Run worker for specific binary only")
|
||||
def main(daemon: bool, crawl_id: str, snapshot_id: str, binary_id: str):
|
||||
"""
|
||||
Process queued work.
|
||||
|
||||
@@ -231,11 +253,27 @@ def main(daemon: bool, crawl_id: str, snapshot_id: str):
|
||||
- No args + TTY: Run orchestrator for all work
|
||||
- --crawl-id: Run orchestrator for that crawl only
|
||||
- --snapshot-id: Run worker for that snapshot only
|
||||
- --binary-id: Run worker for that binary only
|
||||
"""
|
||||
# Snapshot worker mode
|
||||
if snapshot_id:
|
||||
sys.exit(run_snapshot_worker(snapshot_id))
|
||||
|
||||
# Binary worker mode
|
||||
if binary_id:
|
||||
from archivebox.workers.worker import BinaryWorker
|
||||
try:
|
||||
worker = BinaryWorker(binary_id=binary_id, worker_id=0)
|
||||
worker.runloop()
|
||||
sys.exit(0)
|
||||
except KeyboardInterrupt:
|
||||
sys.exit(0)
|
||||
except Exception as e:
|
||||
rprint(f'[red]Worker error: {type(e).__name__}: {e}[/red]', file=sys.stderr)
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
||||
# Crawl worker mode
|
||||
if crawl_id:
|
||||
from archivebox.workers.worker import CrawlWorker
|
||||
|
||||
Reference in New Issue
Block a user