From 64db6deab3324c279b1847c3f4dc6ba21b230bf9 Mon Sep 17 00:00:00 2001
From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com>
Date: Tue, 30 Dec 2025 20:15:48 +0000
Subject: [PATCH] fix: revert incorrect --extract renaming, restore --plugin
 parameter

The --plugins parameter was incorrectly renamed to --extract (boolean).
This restores --plugin (singular, matching extract command) with correct
semantics: specify which plugin to run after creating snapshots.

- Changed --extract/--no-extract back to --plugin (string parameter)
- Updated function signature and logic to use plugin parameter
- Added ArchiveResult creation for specific plugin when --plugin is passed
- Updated docstring and examples

Co-authored-by: Nick Sweeting <pirate@users.noreply.github.com>
---
 archivebox/cli/archivebox_snapshot.py | 39 ++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 9 deletions(-)

diff --git a/archivebox/cli/archivebox_snapshot.py b/archivebox/cli/archivebox_snapshot.py
index b9876bb1..b104fb92 100644
--- a/archivebox/cli/archivebox_snapshot.py
+++ b/archivebox/cli/archivebox_snapshot.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 """
-archivebox snapshot [urls_or_crawl_ids...] [--tag=TAG] [--extract]
+archivebox snapshot [urls_or_crawl_ids...] [--tag=TAG] [--plugin=NAME]
 
 Create Snapshots from URLs or Crawl jobs. Accepts URLs, Crawl JSONL, or Crawl IDs.
 
@@ -24,6 +24,9 @@ Examples:
     # Chain with extract
     archivebox crawl https://example.com | archivebox snapshot | archivebox extract
 
+    # Run specific plugin after creating snapshots
+    archivebox snapshot --plugin=screenshot https://example.com
+
     # Process existing Snapshot by ID
     archivebox snapshot 01234567-89ab-cdef-0123-456789abcdef
 """
@@ -71,14 +74,14 @@ def process_snapshot_by_id(snapshot_id: str) -> int:
 def create_snapshots(
     args: tuple,
     tag: str = '',
-    extract: bool = False,
+    plugin: str = '',
     created_by_id: Optional[int] = None,
 ) -> int:
     """
     Create Snapshots from URLs, Crawl JSONL, or Crawl IDs.
 
     Reads from args or stdin, creates Snapshot objects, outputs JSONL.
-    If input is Crawl JSONL, creates Snapshots for all URLs in the Crawl.
+    If --plugin is passed, also runs specified plugin (blocking).
 
     Exit codes:
         0: Success
@@ -176,10 +179,28 @@ def create_snapshots(
         for snapshot in created_snapshots:
             rprint(f'  [dim]{snapshot.id}[/dim] {snapshot.url[:60]}', file=sys.stderr)
 
-    # If --extract is passed, run the orchestrator
-    if extract:
+    # If --plugin is passed, create ArchiveResults and run the orchestrator
+    if plugin:
+        from archivebox.core.models import ArchiveResult
         from archivebox.workers.orchestrator import Orchestrator
-        rprint('[blue]Running extractors...[/blue]', file=sys.stderr)
+
+        # Create ArchiveResults for the specific plugin on each snapshot
+        for snapshot in created_snapshots:
+            result, created = ArchiveResult.objects.get_or_create(
+                snapshot=snapshot,
+                plugin=plugin,
+                defaults={
+                    'status': ArchiveResult.StatusChoices.QUEUED,
+                    'retry_at': timezone.now(),
+                }
+            )
+            if not created and result.status in [ArchiveResult.StatusChoices.FAILED, ArchiveResult.StatusChoices.SKIPPED]:
+                # Reset for retry
+                result.status = ArchiveResult.StatusChoices.QUEUED
+                result.retry_at = timezone.now()
+                result.save()
+
+        rprint(f'[blue]Running plugin: {plugin}...[/blue]', file=sys.stderr)
         orchestrator = Orchestrator(exit_on_idle=True)
         orchestrator.runloop()
 
@@ -199,9 +220,9 @@ def is_snapshot_id(value: str) -> bool:
 
 @click.command()
 @click.option('--tag', '-t', default='', help='Comma-separated tags to add to each snapshot')
-@click.option('--extract/--no-extract', default=False, help='Run extractors after creating snapshots')
+@click.option('--plugin', '-p', default='', help='Run only this plugin after creating snapshots (e.g., screenshot, singlefile)')
 @click.argument('args', nargs=-1)
-def main(tag: str, extract: bool, args: tuple):
+def main(tag: str, plugin: str, args: tuple):
     """Create Snapshots from URLs/Crawls, or process existing Snapshots by ID"""
     from archivebox.misc.jsonl import read_args_or_stdin
 
@@ -235,7 +256,7 @@ def main(tag: str, extract: bool, args: tuple):
         sys.exit(exit_code)
     else:
         # Create new Snapshots from URLs or Crawls
-        sys.exit(create_snapshots(args, tag=tag, extract=extract))
+        sys.exit(create_snapshots(args, tag=tag, plugin=plugin))
 
 
 if __name__ == '__main__':