fix: rename --plugin to --plugins for consistency

Changed from singular --plugin to plural --plugins in both snapshot and extract
commands to match the pattern in archivebox add command. Updated to accept
comma-separated plugin names (e.g., --plugins=screenshot,singlefile,title).

- Updated CLI option from --plugin to --plugins
- Added parsing for comma-separated plugin names
- Updated function signatures and logic to handle multiple plugins
- Updated help text, docstrings, and examples

Co-authored-by: Nick Sweeting <pirate@users.noreply.github.com>
This commit is contained in:
claude[bot]
2025-12-30 20:20:29 +00:00
parent 64db6deab3
commit 251fe33e49
2 changed files with 60 additions and 50 deletions

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
""" """
archivebox extract [snapshot_ids...] [--plugin=NAME] archivebox extract [snapshot_ids...] [--plugins=NAMES]
Run plugins on Snapshots. Accepts snapshot IDs as arguments, from stdin, or via JSONL. Run plugins on Snapshots. Accepts snapshot IDs as arguments, from stdin, or via JSONL.
@@ -20,8 +20,8 @@ Examples:
# Pipe from snapshot command # Pipe from snapshot command
archivebox snapshot https://example.com | archivebox extract archivebox snapshot https://example.com | archivebox extract
# Run specific plugin only # Run specific plugins only
archivebox extract --plugin=screenshot 01234567-89ab-cdef-0123-456789abcdef archivebox extract --plugins=screenshot,singlefile 01234567-89ab-cdef-0123-456789abcdef
# Chain commands # Chain commands
archivebox crawl https://example.com | archivebox snapshot | archivebox extract archivebox crawl https://example.com | archivebox snapshot | archivebox extract
@@ -76,7 +76,7 @@ def process_archiveresult_by_id(archiveresult_id: str) -> int:
def run_plugins( def run_plugins(
args: tuple, args: tuple,
plugin: str = '', plugins: str = '',
wait: bool = True, wait: bool = True,
) -> int: ) -> int:
""" """
@@ -147,21 +147,25 @@ def run_plugins(
continue continue
# Create pending ArchiveResults if needed # Create pending ArchiveResults if needed
if plugin: if plugins:
# Only create for specific plugin # Parse comma-separated plugins list
result, created = ArchiveResult.objects.get_or_create( plugins_list = [p.strip() for p in plugins.split(',') if p.strip()]
snapshot=snapshot,
plugin=plugin, # Only create for specific plugins
defaults={ for plugin_name in plugins_list:
'status': ArchiveResult.StatusChoices.QUEUED, result, created = ArchiveResult.objects.get_or_create(
'retry_at': timezone.now(), snapshot=snapshot,
} plugin=plugin_name,
) defaults={
if not created and result.status in [ArchiveResult.StatusChoices.FAILED, ArchiveResult.StatusChoices.SKIPPED]: 'status': ArchiveResult.StatusChoices.QUEUED,
# Reset for retry 'retry_at': timezone.now(),
result.status = ArchiveResult.StatusChoices.QUEUED }
result.retry_at = timezone.now() )
result.save() if not created and result.status in [ArchiveResult.StatusChoices.FAILED, ArchiveResult.StatusChoices.SKIPPED]:
# Reset for retry
result.status = ArchiveResult.StatusChoices.QUEUED
result.retry_at = timezone.now()
result.save()
else: else:
# Create all pending plugins # Create all pending plugins
snapshot.create_pending_archiveresults() snapshot.create_pending_archiveresults()
@@ -191,8 +195,10 @@ def run_plugins(
try: try:
snapshot = Snapshot.objects.get(id=snapshot_id) snapshot = Snapshot.objects.get(id=snapshot_id)
results = snapshot.archiveresult_set.all() results = snapshot.archiveresult_set.all()
if plugin: if plugins:
results = results.filter(plugin=plugin) # Parse comma-separated plugins list
plugins_list = [p.strip() for p in plugins.split(',') if p.strip()]
results = results.filter(plugin__in=plugins_list)
for result in results: for result in results:
if is_tty: if is_tty:
@@ -222,10 +228,10 @@ def is_archiveresult_id(value: str) -> bool:
@click.command() @click.command()
@click.option('--plugin', '-p', default='', help='Run only this plugin (e.g., screenshot, singlefile)') @click.option('--plugins', '-p', default='', help='Comma-separated list of plugins to run (e.g., screenshot,singlefile)')
@click.option('--wait/--no-wait', default=True, help='Wait for plugins to complete (default: wait)') @click.option('--wait/--no-wait', default=True, help='Wait for plugins to complete (default: wait)')
@click.argument('args', nargs=-1) @click.argument('args', nargs=-1)
def main(plugin: str, wait: bool, args: tuple): def main(plugins: str, wait: bool, args: tuple):
"""Run plugins on Snapshots, or process existing ArchiveResults by ID""" """Run plugins on Snapshots, or process existing ArchiveResults by ID"""
from archivebox.misc.jsonl import read_args_or_stdin from archivebox.misc.jsonl import read_args_or_stdin
@@ -254,7 +260,7 @@ def main(plugin: str, wait: bool, args: tuple):
sys.exit(exit_code) sys.exit(exit_code)
else: else:
# Default behavior: run plugins on Snapshots from input # Default behavior: run plugins on Snapshots from input
sys.exit(run_plugins(args, plugin=plugin, wait=wait)) sys.exit(run_plugins(args, plugins=plugins, wait=wait))
if __name__ == '__main__': if __name__ == '__main__':

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
""" """
archivebox snapshot [urls_or_crawl_ids...] [--tag=TAG] [--plugin=NAME] archivebox snapshot [urls_or_crawl_ids...] [--tag=TAG] [--plugins=NAMES]
Create Snapshots from URLs or Crawl jobs. Accepts URLs, Crawl JSONL, or Crawl IDs. Create Snapshots from URLs or Crawl jobs. Accepts URLs, Crawl JSONL, or Crawl IDs.
@@ -24,8 +24,8 @@ Examples:
# Chain with extract # Chain with extract
archivebox crawl https://example.com | archivebox snapshot | archivebox extract archivebox crawl https://example.com | archivebox snapshot | archivebox extract
# Run specific plugin after creating snapshots # Run specific plugins after creating snapshots
archivebox snapshot --plugin=screenshot https://example.com archivebox snapshot --plugins=screenshot,singlefile https://example.com
# Process existing Snapshot by ID # Process existing Snapshot by ID
archivebox snapshot 01234567-89ab-cdef-0123-456789abcdef archivebox snapshot 01234567-89ab-cdef-0123-456789abcdef
@@ -74,14 +74,14 @@ def process_snapshot_by_id(snapshot_id: str) -> int:
def create_snapshots( def create_snapshots(
args: tuple, args: tuple,
tag: str = '', tag: str = '',
plugin: str = '', plugins: str = '',
created_by_id: Optional[int] = None, created_by_id: Optional[int] = None,
) -> int: ) -> int:
""" """
Create Snapshots from URLs, Crawl JSONL, or Crawl IDs. Create Snapshots from URLs, Crawl JSONL, or Crawl IDs.
Reads from args or stdin, creates Snapshot objects, outputs JSONL. Reads from args or stdin, creates Snapshot objects, outputs JSONL.
If --plugin is passed, also runs specified plugin (blocking). If --plugins is passed, also runs specified plugins (blocking).
Exit codes: Exit codes:
0: Success 0: Success
@@ -179,28 +179,32 @@ def create_snapshots(
for snapshot in created_snapshots: for snapshot in created_snapshots:
rprint(f' [dim]{snapshot.id}[/dim] {snapshot.url[:60]}', file=sys.stderr) rprint(f' [dim]{snapshot.id}[/dim] {snapshot.url[:60]}', file=sys.stderr)
# If --plugin is passed, create ArchiveResults and run the orchestrator # If --plugins is passed, create ArchiveResults and run the orchestrator
if plugin: if plugins:
from archivebox.core.models import ArchiveResult from archivebox.core.models import ArchiveResult
from archivebox.workers.orchestrator import Orchestrator from archivebox.workers.orchestrator import Orchestrator
# Create ArchiveResults for the specific plugin on each snapshot # Parse comma-separated plugins list
for snapshot in created_snapshots: plugins_list = [p.strip() for p in plugins.split(',') if p.strip()]
result, created = ArchiveResult.objects.get_or_create(
snapshot=snapshot,
plugin=plugin,
defaults={
'status': ArchiveResult.StatusChoices.QUEUED,
'retry_at': timezone.now(),
}
)
if not created and result.status in [ArchiveResult.StatusChoices.FAILED, ArchiveResult.StatusChoices.SKIPPED]:
# Reset for retry
result.status = ArchiveResult.StatusChoices.QUEUED
result.retry_at = timezone.now()
result.save()
rprint(f'[blue]Running plugin: {plugin}...[/blue]', file=sys.stderr) # Create ArchiveResults for the specific plugins on each snapshot
for snapshot in created_snapshots:
for plugin_name in plugins_list:
result, created = ArchiveResult.objects.get_or_create(
snapshot=snapshot,
plugin=plugin_name,
defaults={
'status': ArchiveResult.StatusChoices.QUEUED,
'retry_at': timezone.now(),
}
)
if not created and result.status in [ArchiveResult.StatusChoices.FAILED, ArchiveResult.StatusChoices.SKIPPED]:
# Reset for retry
result.status = ArchiveResult.StatusChoices.QUEUED
result.retry_at = timezone.now()
result.save()
rprint(f'[blue]Running plugins: {plugins}...[/blue]', file=sys.stderr)
orchestrator = Orchestrator(exit_on_idle=True) orchestrator = Orchestrator(exit_on_idle=True)
orchestrator.runloop() orchestrator.runloop()
@@ -220,9 +224,9 @@ def is_snapshot_id(value: str) -> bool:
@click.command() @click.command()
@click.option('--tag', '-t', default='', help='Comma-separated tags to add to each snapshot') @click.option('--tag', '-t', default='', help='Comma-separated tags to add to each snapshot')
@click.option('--plugin', '-p', default='', help='Run only this plugin after creating snapshots (e.g., screenshot, singlefile)') @click.option('--plugins', '-p', default='', help='Comma-separated list of plugins to run after creating snapshots (e.g., screenshot,singlefile)')
@click.argument('args', nargs=-1) @click.argument('args', nargs=-1)
def main(tag: str, plugin: str, args: tuple): def main(tag: str, plugins: str, args: tuple):
"""Create Snapshots from URLs/Crawls, or process existing Snapshots by ID""" """Create Snapshots from URLs/Crawls, or process existing Snapshots by ID"""
from archivebox.misc.jsonl import read_args_or_stdin from archivebox.misc.jsonl import read_args_or_stdin
@@ -256,7 +260,7 @@ def main(tag: str, plugin: str, args: tuple):
sys.exit(exit_code) sys.exit(exit_code)
else: else:
# Create new Snapshots from URLs or Crawls # Create new Snapshots from URLs or Crawls
sys.exit(create_snapshots(args, tag=tag, plugin=plugin)) sys.exit(create_snapshots(args, tag=tag, plugins=plugins))
if __name__ == '__main__': if __name__ == '__main__':