diff --git a/archivebox/core/models.py b/archivebox/core/models.py
index 928aa990..403c441e 100755
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -1515,6 +1515,12 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
         parent_snapshot = overrides.get('snapshot')  # Parent snapshot
         created_by_id = overrides.get('created_by_id') or (parent_snapshot.created_by.pk if parent_snapshot else get_or_create_system_user_pk())
 
+        # DEBUG: Check if crawl_id in record matches overrides crawl
+        import sys
+        record_crawl_id = record.get('crawl_id')
+        if record_crawl_id and crawl and str(crawl.id) != str(record_crawl_id):
+            print(f"[yellow]⚠️  Snapshot.from_json crawl mismatch: record has crawl_id={record_crawl_id}, overrides has crawl={crawl.id}[/yellow]", file=sys.stderr)
+
         # If no crawl provided, inherit from parent or auto-create one
         if not crawl:
             if parent_snapshot:
@@ -1536,6 +1542,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
                     label=f'auto-created for {url[:50]}',
                     created_by_id=created_by_id,
                 )
+                print(f"[red]⚠️  Snapshot.from_json auto-created new crawl {crawl.id} for url={url}[/red]", file=sys.stderr)
 
         # Parse tags
         tags_str = record.get('tags', '')
@@ -1546,8 +1553,9 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
                 if tag.strip()
             ))
 
-        # Get most recent snapshot with this URL (URLs can exist in multiple crawls)
-        snapshot = Snapshot.objects.filter(url=url).order_by('-created_at').first()
+        # Check for existing snapshot with same URL in same crawl
+        # (URLs can exist in multiple crawls, but should be unique within a crawl)
+        snapshot = Snapshot.objects.filter(url=url, crawl=crawl).order_by('-created_at').first()
 
         title = record.get('title')
         timestamp = record.get('timestamp')
diff --git a/archivebox/hooks.py b/archivebox/hooks.py
index f955974b..116671ac 100644
--- a/archivebox/hooks.py
+++ b/archivebox/hooks.py
@@ -892,15 +892,34 @@ def get_plugin_special_config(plugin_name: str, config: Dict[str, Any]) -> Dict[
     """
     plugin_upper = plugin_name.upper()
 
-    # 1. Enabled: PLUGINNAME_ENABLED (default True)
+    # 1. Enabled: Check PLUGINS whitelist first, then PLUGINNAME_ENABLED (default True)
     # Old names (USE_*, SAVE_*) are aliased in config.json via x-aliases
-    enabled_key = f'{plugin_upper}_ENABLED'
-    enabled = config.get(enabled_key)
-    if enabled is None:
-        enabled = True
-    elif isinstance(enabled, str):
-        # Handle string values from config file ("true"/"false")
-        enabled = enabled.lower() not in ('false', '0', 'no', '')
+
+    # Check if PLUGINS whitelist is specified (e.g., --plugins=wget,favicon)
+    plugins_whitelist = config.get('PLUGINS', '')
+    if plugins_whitelist:
+        # PLUGINS whitelist is specified - only enable plugins in the list
+        plugin_names = [p.strip().lower() for p in plugins_whitelist.split(',') if p.strip()]
+        if plugin_name.lower() not in plugin_names:
+            # Plugin not in whitelist - explicitly disabled
+            enabled = False
+        else:
+            # Plugin is in whitelist - check if explicitly disabled by PLUGINNAME_ENABLED
+            enabled_key = f'{plugin_upper}_ENABLED'
+            enabled = config.get(enabled_key)
+            if enabled is None:
+                enabled = True  # Default to enabled if in whitelist
+            elif isinstance(enabled, str):
+                enabled = enabled.lower() not in ('false', '0', 'no', '')
+    else:
+        # No PLUGINS whitelist - use PLUGINNAME_ENABLED (default True)
+        enabled_key = f'{plugin_upper}_ENABLED'
+        enabled = config.get(enabled_key)
+        if enabled is None:
+            enabled = True
+        elif isinstance(enabled, str):
+            # Handle string values from config file ("true"/"false")
+            enabled = enabled.lower() not in ('false', '0', 'no', '')
 
     # 2. Timeout: PLUGINNAME_TIMEOUT (fallback to TIMEOUT, default 300)
     timeout_key = f'{plugin_upper}_TIMEOUT'
diff --git a/archivebox/plugins/accessibility/tests/test_accessibility.py b/archivebox/plugins/accessibility/tests/test_accessibility.py
index 0c85b145..4fc8a1fe 100644
--- a/archivebox/plugins/accessibility/tests/test_accessibility.py
+++ b/archivebox/plugins/accessibility/tests/test_accessibility.py
@@ -80,7 +80,8 @@ class TestAccessibilityWithChrome(TestCase):
                 # Run accessibility hook with the active Chrome session
                 result = subprocess.run(
                     ['node', str(ACCESSIBILITY_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                    cwd=str(snapshot_chrome_dir),
+                    cwd=str(snapshot_chrome_dir,
+            env=get_test_env()),
                     capture_output=True,
                     text=True,
                     timeout=60,
diff --git a/archivebox/plugins/chrome/tests/test_chrome.py b/archivebox/plugins/chrome/tests/test_chrome.py
index d455ba41..6c801a5e 100644
--- a/archivebox/plugins/chrome/tests/test_chrome.py
+++ b/archivebox/plugins/chrome/tests/test_chrome.py
@@ -208,7 +208,8 @@ def test_chrome_launch_and_tab_creation():
         env['CRAWL_OUTPUT_DIR'] = str(crawl_dir)
         result = subprocess.run(
             ['node', str(CHROME_TAB_HOOK), '--url=https://example.com', '--snapshot-id=snap-123', '--crawl-id=test-crawl-123'],
-            cwd=str(snapshot_chrome_dir),
+            cwd=str(snapshot_chrome_dir,
+            env=get_test_env()),
             capture_output=True,
             text=True,
             timeout=60,
@@ -268,7 +269,8 @@ def test_chrome_navigation():
 
         result = subprocess.run(
             ['node', str(CHROME_TAB_HOOK), '--url=https://example.com', '--snapshot-id=snap-nav-123', '--crawl-id=test-crawl-nav'],
-            cwd=str(snapshot_chrome_dir),
+            cwd=str(snapshot_chrome_dir,
+            env=get_test_env()),
             capture_output=True,
             text=True,
             timeout=60,
@@ -279,7 +281,8 @@ def test_chrome_navigation():
         # Navigate to URL
         result = subprocess.run(
             ['node', str(CHROME_NAVIGATE_HOOK), '--url=https://example.com', '--snapshot-id=snap-nav-123'],
-            cwd=str(snapshot_chrome_dir),
+            cwd=str(snapshot_chrome_dir,
+            env=get_test_env()),
             capture_output=True,
             text=True,
             timeout=120,
@@ -414,7 +417,8 @@ def test_multiple_snapshots_share_chrome():
             # Create tab for this snapshot
             result = subprocess.run(
                 ['node', str(CHROME_TAB_HOOK), f'--url=https://example.com/{snap_num}', f'--snapshot-id=snap-{snap_num}', '--crawl-id=test-multi-crawl'],
-                cwd=str(snapshot_chrome_dir),
+                cwd=str(snapshot_chrome_dir,
+            env=get_test_env()),
                 capture_output=True,
                 text=True,
                 timeout=60,
diff --git a/archivebox/plugins/consolelog/tests/test_consolelog.py b/archivebox/plugins/consolelog/tests/test_consolelog.py
index 741776f0..ca75f130 100644
--- a/archivebox/plugins/consolelog/tests/test_consolelog.py
+++ b/archivebox/plugins/consolelog/tests/test_consolelog.py
@@ -80,7 +80,8 @@ class TestConsolelogWithChrome(TestCase):
                 # Run consolelog hook with the active Chrome session
                 result = subprocess.run(
                     ['node', str(CONSOLELOG_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                    cwd=str(snapshot_chrome_dir),
+                    cwd=str(snapshot_chrome_dir,
+            env=get_test_env()),
                     capture_output=True,
                     text=True,
                     timeout=120,  # Longer timeout as it waits for navigation
diff --git a/archivebox/plugins/dom/tests/test_dom.py b/archivebox/plugins/dom/tests/test_dom.py
index 7fe69d64..fea41b8d 100644
--- a/archivebox/plugins/dom/tests/test_dom.py
+++ b/archivebox/plugins/dom/tests/test_dom.py
@@ -68,7 +68,8 @@ def test_extracts_dom_from_example_com():
             capture_output=True,
             text=True,
             timeout=120
-        )
+        ,
+            env=get_test_env())
 
         assert result.returncode == 0, f"Extraction failed: {result.stderr}"
 
@@ -152,7 +153,8 @@ def test_staticfile_present_skips():
             capture_output=True,
             text=True,
             timeout=30
-        )
+        ,
+            env=get_test_env())
 
         assert result.returncode == 0, "Should exit 0 when permanently skipping"
 
diff --git a/archivebox/plugins/headers/tests/test_headers.py b/archivebox/plugins/headers/tests/test_headers.py
index e9fd1298..0930737c 100644
--- a/archivebox/plugins/headers/tests/test_headers.py
+++ b/archivebox/plugins/headers/tests/test_headers.py
@@ -50,7 +50,8 @@ def test_node_is_available():
         capture_output=True,
         text=True,
         timeout=10
-    )
+    ,
+            env=get_test_env())
     assert result.returncode == 0, f"node not executable: {result.stderr}"
     assert result.stdout.startswith('v'), f"Unexpected node version format: {result.stdout}"
 
@@ -72,7 +73,8 @@ def test_extracts_headers_from_example_com():
             capture_output=True,
             text=True,
             timeout=60
-        )
+        ,
+            env=get_test_env())
 
         assert result.returncode == 0, f"Extraction failed: {result.stderr}"
 
@@ -133,7 +135,8 @@ def test_headers_output_structure():
             capture_output=True,
             text=True,
             timeout=60
-        )
+        ,
+            env=get_test_env())
 
         assert result.returncode == 0, f"Extraction failed: {result.stderr}"
 
@@ -192,7 +195,8 @@ def test_falls_back_to_http_when_chrome_unavailable():
             capture_output=True,
             text=True,
             timeout=60
-        )
+        ,
+            env=get_test_env())
 
         assert result.returncode == 0, f"Extraction failed: {result.stderr}"
 
@@ -309,7 +313,8 @@ def test_handles_https_urls():
             capture_output=True,
             text=True,
             timeout=60
-        )
+        ,
+            env=get_test_env())
 
         if result.returncode == 0:
             output_headers_file = tmpdir / 'headers.json'
@@ -334,7 +339,8 @@ def test_handles_404_gracefully():
             capture_output=True,
             text=True,
             timeout=60
-        )
+        ,
+            env=get_test_env())
 
         # May succeed or fail depending on server behavior
         # If it succeeds, verify 404 status is captured
diff --git a/archivebox/plugins/infiniscroll/tests/test_infiniscroll.py b/archivebox/plugins/infiniscroll/tests/test_infiniscroll.py
index eee44ce4..16a7631d 100644
--- a/archivebox/plugins/infiniscroll/tests/test_infiniscroll.py
+++ b/archivebox/plugins/infiniscroll/tests/test_infiniscroll.py
@@ -123,7 +123,8 @@ def test_scrolls_page_and_outputs_stats():
 
             result = subprocess.run(
                 ['node', str(INFINISCROLL_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-infiniscroll'],
-                cwd=str(infiniscroll_dir),
+                cwd=str(infiniscroll_dir,
+            env=get_test_env()),
                 capture_output=True,
                 text=True,
                 timeout=60,
@@ -187,7 +188,8 @@ def test_config_scroll_limit_honored():
 
             result = subprocess.run(
                 ['node', str(INFINISCROLL_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-limit'],
-                cwd=str(infiniscroll_dir),
+                cwd=str(infiniscroll_dir,
+            env=get_test_env()),
                 capture_output=True,
                 text=True,
                 timeout=60,
@@ -246,7 +248,8 @@ def test_config_timeout_honored():
             start_time = time.time()
             result = subprocess.run(
                 ['node', str(INFINISCROLL_HOOK), f'--url={TEST_URL}', '--snapshot-id=snap-timeout'],
-                cwd=str(infiniscroll_dir),
+                cwd=str(infiniscroll_dir,
+            env=get_test_env()),
                 capture_output=True,
                 text=True,
                 timeout=30,
diff --git a/archivebox/plugins/istilldontcareaboutcookies/tests/test_istilldontcareaboutcookies.py b/archivebox/plugins/istilldontcareaboutcookies/tests/test_istilldontcareaboutcookies.py
index 13a62e58..a9525c89 100644
--- a/archivebox/plugins/istilldontcareaboutcookies/tests/test_istilldontcareaboutcookies.py
+++ b/archivebox/plugins/istilldontcareaboutcookies/tests/test_istilldontcareaboutcookies.py
@@ -154,7 +154,8 @@ def test_extension_loads_in_chromium():
         # Step 1: Install the extension
         result = subprocess.run(
             ['node', str(INSTALL_SCRIPT)],
-            cwd=str(tmpdir),
+            cwd=str(tmpdir,
+            env=get_test_env()),
             capture_output=True,
             text=True,
             env=env,
@@ -291,7 +292,8 @@ const puppeteer = require('puppeteer-core');
 
             result = subprocess.run(
                 ['node', str(script_path)],
-                cwd=str(tmpdir),
+                cwd=str(tmpdir,
+            env=get_test_env()),
                 capture_output=True,
                 text=True,
                 env=env,
@@ -443,7 +445,8 @@ const puppeteer = require('puppeteer-core');
 
     result = subprocess.run(
         ['node', str(script_path)],
-        cwd=str(script_dir),
+        cwd=str(script_dir,
+            env=get_test_env()),
         capture_output=True,
         text=True,
         env=env,
@@ -557,7 +560,8 @@ def test_hides_cookie_consent_on_filmin():
 
         result = subprocess.run(
             ['node', str(INSTALL_SCRIPT)],
-            cwd=str(tmpdir),
+            cwd=str(tmpdir,
+            env=get_test_env()),
             capture_output=True,
             text=True,
             env=env_with_ext,
diff --git a/archivebox/plugins/parse_dom_outlinks/tests/test_parse_dom_outlinks.py b/archivebox/plugins/parse_dom_outlinks/tests/test_parse_dom_outlinks.py
index 7f519ea2..d87df28d 100644
--- a/archivebox/plugins/parse_dom_outlinks/tests/test_parse_dom_outlinks.py
+++ b/archivebox/plugins/parse_dom_outlinks/tests/test_parse_dom_outlinks.py
@@ -80,7 +80,8 @@ class TestParseDomOutlinksWithChrome(TestCase):
                 # Run outlinks hook with the active Chrome session
                 result = subprocess.run(
                     ['node', str(OUTLINKS_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                    cwd=str(snapshot_chrome_dir),
+                    cwd=str(snapshot_chrome_dir,
+            env=get_test_env()),
                     capture_output=True,
                     text=True,
                     timeout=60,
diff --git a/archivebox/plugins/pdf/tests/test_pdf.py b/archivebox/plugins/pdf/tests/test_pdf.py
index c160cfdc..8751faef 100644
--- a/archivebox/plugins/pdf/tests/test_pdf.py
+++ b/archivebox/plugins/pdf/tests/test_pdf.py
@@ -69,7 +69,8 @@ def test_extracts_pdf_from_example_com():
             capture_output=True,
             text=True,
             timeout=120
-        )
+        ,
+            env=get_test_env())
 
         # Parse clean JSONL output (hook might fail due to network issues)
         result_json = None
diff --git a/archivebox/plugins/redirects/tests/test_redirects.py b/archivebox/plugins/redirects/tests/test_redirects.py
index 06d95246..934b14c7 100644
--- a/archivebox/plugins/redirects/tests/test_redirects.py
+++ b/archivebox/plugins/redirects/tests/test_redirects.py
@@ -81,7 +81,8 @@ class TestRedirectsWithChrome(TestCase):
                 # Run redirects hook with the active Chrome session
                 result = subprocess.run(
                     ['node', str(REDIRECTS_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                    cwd=str(snapshot_chrome_dir),
+                    cwd=str(snapshot_chrome_dir,
+            env=get_test_env()),
                     capture_output=True,
                     text=True,
                     timeout=60,
diff --git a/archivebox/plugins/responses/tests/test_responses.py b/archivebox/plugins/responses/tests/test_responses.py
index 129d92a3..ea710c83 100644
--- a/archivebox/plugins/responses/tests/test_responses.py
+++ b/archivebox/plugins/responses/tests/test_responses.py
@@ -80,7 +80,8 @@ class TestResponsesWithChrome(TestCase):
                 # Run responses hook with the active Chrome session
                 result = subprocess.run(
                     ['node', str(RESPONSES_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                    cwd=str(snapshot_chrome_dir),
+                    cwd=str(snapshot_chrome_dir,
+            env=get_test_env()),
                     capture_output=True,
                     text=True,
                     timeout=120,  # Longer timeout as it waits for navigation
diff --git a/archivebox/plugins/screenshot/tests/test_screenshot.py b/archivebox/plugins/screenshot/tests/test_screenshot.py
index 24d4960d..1aed35e6 100644
--- a/archivebox/plugins/screenshot/tests/test_screenshot.py
+++ b/archivebox/plugins/screenshot/tests/test_screenshot.py
@@ -65,7 +65,8 @@ def test_extracts_screenshot_from_example_com():
             cwd=tmpdir,
             capture_output=True,
             text=True,
-            timeout=120
+            timeout=120,
+            env=get_test_env()
         )
 
         assert result.returncode == 0, f"Extraction failed: {result.stderr}"
diff --git a/archivebox/plugins/seo/tests/test_seo.py b/archivebox/plugins/seo/tests/test_seo.py
index 2b01a356..23beaa76 100644
--- a/archivebox/plugins/seo/tests/test_seo.py
+++ b/archivebox/plugins/seo/tests/test_seo.py
@@ -80,7 +80,8 @@ class TestSEOWithChrome(TestCase):
                 # Run SEO hook with the active Chrome session
                 result = subprocess.run(
                     ['node', str(SEO_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                    cwd=str(snapshot_chrome_dir),
+                    cwd=str(snapshot_chrome_dir,
+            env=get_test_env()),
                     capture_output=True,
                     text=True,
                     timeout=60,
diff --git a/archivebox/plugins/ssl/tests/test_ssl.py b/archivebox/plugins/ssl/tests/test_ssl.py
index cf131d9b..48ec0a6c 100644
--- a/archivebox/plugins/ssl/tests/test_ssl.py
+++ b/archivebox/plugins/ssl/tests/test_ssl.py
@@ -80,7 +80,8 @@ class TestSSLWithChrome(TestCase):
                 # Run SSL hook with the active Chrome session
                 result = subprocess.run(
                     ['node', str(SSL_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                    cwd=str(snapshot_chrome_dir),
+                    cwd=str(snapshot_chrome_dir,
+            env=get_test_env()),
                     capture_output=True,
                     text=True,
                     timeout=60,
diff --git a/archivebox/plugins/staticfile/tests/test_staticfile.py b/archivebox/plugins/staticfile/tests/test_staticfile.py
index 05af3a02..f80d0839 100644
--- a/archivebox/plugins/staticfile/tests/test_staticfile.py
+++ b/archivebox/plugins/staticfile/tests/test_staticfile.py
@@ -80,7 +80,8 @@ class TestStaticfileWithChrome(TestCase):
                 # Run staticfile hook with the active Chrome session
                 result = subprocess.run(
                     ['node', str(STATICFILE_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
-                    cwd=str(snapshot_chrome_dir),
+                    cwd=str(snapshot_chrome_dir,
+            env=get_test_env()),
                     capture_output=True,
                     text=True,
                     timeout=120,  # Longer timeout as it waits for navigation
diff --git a/archivebox/plugins/title/tests/test_title.py b/archivebox/plugins/title/tests/test_title.py
index 285f7309..91b548d6 100644
--- a/archivebox/plugins/title/tests/test_title.py
+++ b/archivebox/plugins/title/tests/test_title.py
@@ -53,7 +53,8 @@ def test_extracts_title_from_example_com():
             capture_output=True,
             text=True,
             timeout=60
-        )
+        ,
+            env=get_test_env())
 
         assert result.returncode == 0, f"Extraction failed: {result.stderr}"
 
@@ -105,7 +106,8 @@ def test_falls_back_to_http_when_chrome_unavailable():
             capture_output=True,
             text=True,
             timeout=60
-        )
+        ,
+            env=get_test_env())
 
         assert result.returncode == 0, f"Extraction failed: {result.stderr}"
 
@@ -219,7 +221,8 @@ def test_handles_https_urls():
             capture_output=True,
             text=True,
             timeout=60
-        )
+        ,
+            env=get_test_env())
 
         if result.returncode == 0:
             # Hook writes to current directory
@@ -249,7 +252,8 @@ def test_handles_404_gracefully():
             capture_output=True,
             text=True,
             timeout=60
-        )
+        ,
+            env=get_test_env())
 
         # May succeed or fail depending on server behavior
         # example.com returns "Example Domain" even for 404s
@@ -272,7 +276,8 @@ def test_handles_redirects():
             capture_output=True,
             text=True,
             timeout=60
-        )
+        ,
+            env=get_test_env())
 
         # Should succeed and follow redirect
         if result.returncode == 0:
diff --git a/archivebox/plugins/ublock/tests/test_ublock.py b/archivebox/plugins/ublock/tests/test_ublock.py
index 91492d4e..5489739d 100644
--- a/archivebox/plugins/ublock/tests/test_ublock.py
+++ b/archivebox/plugins/ublock/tests/test_ublock.py
@@ -283,7 +283,8 @@ const puppeteer = require('puppeteer-core');
 
     result = subprocess.run(
         ['node', str(script_path)],
-        cwd=str(script_dir),
+        cwd=str(script_dir,
+            env=get_test_env()),
         capture_output=True,
         text=True,
         env=env,
@@ -482,7 +483,8 @@ const puppeteer = require('puppeteer-core');
 
             result = subprocess.run(
                 ['node', str(script_path)],
-                cwd=str(tmpdir),
+                cwd=str(tmpdir,
+            env=get_test_env()),
                 capture_output=True,
                 text=True,
                 env=env,
diff --git a/archivebox/workers/orchestrator.py b/archivebox/workers/orchestrator.py
index b074d529..99d4e27e 100644
--- a/archivebox/workers/orchestrator.py
+++ b/archivebox/workers/orchestrator.py
@@ -30,6 +30,7 @@ __package__ = 'archivebox.workers'
 import os
 import time
 from typing import Type
+from datetime import timedelta
 from multiprocessing import Process as MPProcess
 
 from django.utils import timezone
@@ -67,12 +68,19 @@ class Orchestrator:
     MAX_WORKERS_PER_TYPE: int = 8  # Max workers per model type
     MAX_TOTAL_WORKERS: int = 24  # Max workers across all types
     
-    def __init__(self, exit_on_idle: bool = True):
+    def __init__(self, exit_on_idle: bool = True, crawl_id: str | None = None):
         self.exit_on_idle = exit_on_idle
+        self.crawl_id = crawl_id  # If set, only process work for this crawl
         self.pid: int = os.getpid()
         self.pid_file = None
         self.idle_count: int = 0
         self._last_cleanup_time: float = 0.0  # For throttling cleanup_stale_running()
+
+        # CRITICAL: In foreground mode (exit_on_idle=True), use ONLY 1 worker
+        # to keep execution strictly sequential and deterministic
+        if self.exit_on_idle:
+            self.MAX_WORKERS_PER_TYPE = 1
+            self.MAX_TOTAL_WORKERS = 1
     
     def __repr__(self) -> str:
         return f'[underline]Orchestrator[/underline]\\[pid={self.pid}]'
@@ -315,15 +323,12 @@ class Orchestrator:
         # Enable progress bars only in TTY + foreground mode
         show_progress = IS_TTY and self.exit_on_idle
 
-        # Debug
-        print(f"[yellow]DEBUG: IS_TTY={IS_TTY}, exit_on_idle={self.exit_on_idle}, show_progress={show_progress}[/yellow]")
-
         self.on_startup()
         task_ids = {}
 
         if not show_progress:
             # No progress bars - just run normally
-            self._run_orchestrator_loop(None, task_ids, None, None)
+            self._run_orchestrator_loop(None, task_ids)
         else:
             # Redirect worker subprocess output to /dev/null
             devnull_fd = os.open(os.devnull, os.O_WRONLY)
@@ -356,7 +361,7 @@ class Orchestrator:
                     TaskProgressColumn(),
                     console=orchestrator_console,
                 ) as progress:
-                    self._run_orchestrator_loop(progress, task_ids, None, None)
+                    self._run_orchestrator_loop(progress, task_ids)
 
                 # Restore original console
                 logging_module.CONSOLE = original_console
@@ -374,7 +379,7 @@ class Orchestrator:
                     pass
                 # stdout_for_console is closed by orchestrator_console
 
-    def _run_orchestrator_loop(self, progress, task_ids, read_fd, console):
+    def _run_orchestrator_loop(self, progress, task_ids):
         """Run the main orchestrator loop with optional progress display."""
         try:
             while True:
@@ -385,12 +390,28 @@ class Orchestrator:
                 if progress:
                     from archivebox.core.models import Snapshot
 
-                    # Get all started snapshots
-                    active_snapshots = list(Snapshot.objects.filter(status='started'))
+                    # Get all started snapshots (optionally filtered by crawl_id)
+                    snapshot_filter = {'status': 'started'}
+                    if self.crawl_id:
+                        snapshot_filter['crawl_id'] = self.crawl_id
+                    else:
+                        # Only if processing all crawls, filter by recent modified_at to avoid stale snapshots
+                        recent_cutoff = timezone.now() - timedelta(minutes=5)
+                        snapshot_filter['modified_at__gte'] = recent_cutoff
+
+                    active_snapshots = list(Snapshot.objects.filter(**snapshot_filter))
 
                     # Track which snapshots are still active
                     active_ids = set()
 
+                    # Debug: check for duplicates
+                    snapshot_urls = [s.url for s in active_snapshots]
+                    if len(active_snapshots) != len(set(snapshot_urls)):
+                        # We have duplicate URLs - let's deduplicate by showing snapshot ID
+                        show_id = True
+                    else:
+                        show_id = False
+
                     for snapshot in active_snapshots:
                         active_ids.add(snapshot.id)
 
@@ -421,7 +442,11 @@ class Orchestrator:
 
                         # Build description with URL + current plugin
                         url = snapshot.url[:50] + '...' if len(snapshot.url) > 50 else snapshot.url
-                        description = f"{url}{current_plugin}"
+                        if show_id:
+                            # Show snapshot ID if there are duplicate URLs
+                            description = f"[{str(snapshot.id)[:8]}] {url}{current_plugin}"
+                        else:
+                            description = f"{url}{current_plugin}"
 
                         # Create or update task
                         if snapshot.id not in task_ids:
diff --git a/archivebox/workers/worker.py b/archivebox/workers/worker.py
index 918b2bba..898c4210 100644
--- a/archivebox/workers/worker.py
+++ b/archivebox/workers/worker.py
@@ -63,9 +63,10 @@ class Worker:
     POLL_INTERVAL: ClassVar[float] = 0.2  # How often to check for new work (seconds)
     IDLE_TIMEOUT: ClassVar[int] = 50  # Exit after N idle iterations (10 sec at 0.2 poll interval)
 
-    def __init__(self, worker_id: int = 0, daemon: bool = False, **kwargs: Any):
+    def __init__(self, worker_id: int = 0, daemon: bool = False, crawl_id: str | None = None, **kwargs: Any):
         self.worker_id = worker_id
         self.daemon = daemon
+        self.crawl_id = crawl_id  # If set, only process work for this crawl
         self.pid: int = os.getpid()
         self.pid_file: Path | None = None
         self.idle_count: int = 0
@@ -346,6 +347,13 @@ class CrawlWorker(Worker):
         from archivebox.crawls.models import Crawl
         return Crawl
 
+    def get_queue(self) -> QuerySet:
+        """Get queue of Crawls ready for processing, optionally filtered by crawl_id."""
+        qs = super().get_queue()
+        if self.crawl_id:
+            qs = qs.filter(id=self.crawl_id)
+        return qs
+
 
 class SnapshotWorker(Worker):
     """Worker for processing Snapshot objects."""