mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-01-02 17:05:38 +10:00
Phase 1: Model Prerequisites - Add ArchiveResult.from_json() and from_jsonl() methods - Fix Snapshot.to_json() to use tags_str (consistent with Crawl) Phase 2: Shared Utilities - Create archivebox/cli/cli_utils.py with shared apply_filters() - Update 7 CLI files to import from cli_utils.py instead of duplicating Phase 3: Pass-Through Behavior - Add pass-through to crawl create (non-Crawl records pass unchanged) - Add pass-through to snapshot create (Crawl records + others pass through) - Add pass-through to archiveresult create (Snapshot records + others) - Add create-or-update behavior to run command: - Records WITHOUT id: Create via Model.from_json() - Records WITH id: Lookup existing, re-queue - Outputs JSONL of all processed records for chaining Phase 4: Test Infrastructure - Create archivebox/tests/conftest.py with pytest-django fixtures - Include CLI helpers, output assertions, database assertions Phase 6: Config Update - Update supervisord_util.py: orchestrator -> run command This enables Unix-style piping: archivebox crawl create URL | archivebox run archivebox archiveresult list --status=failed | archivebox run curl API | jq transform | archivebox crawl create | archivebox run
100 lines
2.6 KiB
Python
100 lines
2.6 KiB
Python
#!/usr/bin/env python3
|
|
|
|
"""
|
|
archivebox machine <action> [--filters]
|
|
|
|
Manage Machine records (system-managed, mostly read-only).
|
|
|
|
Machine records track the host machines where ArchiveBox runs.
|
|
They are created automatically by the system and are primarily for debugging.
|
|
|
|
Actions:
|
|
list - List Machines as JSONL (with optional filters)
|
|
|
|
Examples:
|
|
# List all machines
|
|
archivebox machine list
|
|
|
|
# List machines by hostname
|
|
archivebox machine list --hostname__icontains=myserver
|
|
"""
|
|
|
|
__package__ = 'archivebox.cli'
|
|
__command__ = 'archivebox machine'
|
|
|
|
import sys
|
|
from typing import Optional
|
|
|
|
import rich_click as click
|
|
from rich import print as rprint
|
|
|
|
from archivebox.cli.cli_utils import apply_filters
|
|
|
|
|
|
# =============================================================================
|
|
# LIST
|
|
# =============================================================================
|
|
|
|
def list_machines(
|
|
hostname__icontains: Optional[str] = None,
|
|
os_platform: Optional[str] = None,
|
|
limit: Optional[int] = None,
|
|
) -> int:
|
|
"""
|
|
List Machines as JSONL with optional filters.
|
|
|
|
Exit codes:
|
|
0: Success (even if no results)
|
|
"""
|
|
from archivebox.misc.jsonl import write_record
|
|
from archivebox.machine.models import Machine
|
|
|
|
is_tty = sys.stdout.isatty()
|
|
|
|
queryset = Machine.objects.all().order_by('-created_at')
|
|
|
|
# Apply filters
|
|
filter_kwargs = {
|
|
'hostname__icontains': hostname__icontains,
|
|
'os_platform': os_platform,
|
|
}
|
|
queryset = apply_filters(queryset, filter_kwargs, limit=limit)
|
|
|
|
count = 0
|
|
for machine in queryset:
|
|
if is_tty:
|
|
rprint(f'[cyan]{machine.hostname:30}[/cyan] [dim]{machine.os_platform:10}[/dim] {machine.id}')
|
|
else:
|
|
write_record(machine.to_json())
|
|
count += 1
|
|
|
|
rprint(f'[dim]Listed {count} machines[/dim]', file=sys.stderr)
|
|
return 0
|
|
|
|
|
|
# =============================================================================
|
|
# CLI Commands
|
|
# =============================================================================
|
|
|
|
@click.group()
|
|
def main():
|
|
"""Manage Machine records (read-only, system-managed)."""
|
|
pass
|
|
|
|
|
|
@main.command('list')
|
|
@click.option('--hostname__icontains', help='Filter by hostname contains')
|
|
@click.option('--os-platform', help='Filter by OS platform')
|
|
@click.option('--limit', '-n', type=int, help='Limit number of results')
|
|
def list_cmd(hostname__icontains: Optional[str], os_platform: Optional[str], limit: Optional[int]):
|
|
"""List Machines as JSONL."""
|
|
sys.exit(list_machines(
|
|
hostname__icontains=hostname__icontains,
|
|
os_platform=os_platform,
|
|
limit=limit,
|
|
))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|