Update abx dependencies and plugin test harness

2026-04-06 07:47:53 +10:00 · 2026-03-15 04:37:32 -07:00
parent ecb1764590
commit 4fa701fafe
14 changed files with 763 additions and 650 deletions
--- a/.github/workflows/test-parallel.yml
+++ b/.github/workflows/test-parallel.yml
@@ -28,13 +28,7 @@ jobs:
        id: set-matrix
        run: |
          # Find all main test files
-          main_tests=$(find tests -maxdepth 1 -name "test_*.py" -type f | sort)
-
-          # Find all plugin test files
-          plugin_tests=$(find archivebox/plugins -path "*/tests/test_*.py" -type f | sort)
-
-          # Combine and format as JSON array
-          all_tests=$(echo "$main_tests $plugin_tests" | tr ' ' '\n' | grep -v '^$')
+          all_tests=$(find archivebox/tests -maxdepth 1 -name "test_*.py" -type f | sort)

          # Create JSON array with test file info
          json_array="["
@@ -47,13 +41,7 @@ jobs:
            fi

            # Extract a display name for the test
-            if [[ $test_file == tests/* ]]; then
-              name="main/$(basename $test_file .py | sed 's/^test_//')"
-            else
-              plugin=$(echo $test_file | sed 's|archivebox/plugins/\([^/]*\)/.*|\1|')
-              test_name=$(basename $test_file .py | sed 's/^test_//')
-              name="plugin/$plugin/$test_name"
-            fi
+            name="main/$(basename $test_file .py | sed 's/^test_//')"

            json_array+="{\"path\":\"$test_file\",\"name\":\"$name\"}"
          done
@@ -111,9 +99,56 @@ jobs:

      - name: Install dependencies with uv
        run: |
-          uv sync --dev --all-extras
+          uv sync --dev --all-extras --no-sources

      - name: Run test - ${{ matrix.test.name }}
        run: |
          mkdir -p tests/out
          uv run pytest -xvs "${{ matrix.test.path }}" --basetemp=tests/out --ignore=archivebox/pkgs
+
+  plugin-tests:
+    name: Plugin tests
+    runs-on: ubuntu-22.04
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: true
+          fetch-depth: 1
+
+      - name: Set up Python 3.13
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.13"
+          architecture: x64
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          version: "latest"
+
+      - name: Set up Node JS
+        uses: actions/setup-node@v4
+        with:
+          node-version: 22
+
+      - name: Cache uv
+        uses: actions/cache@v3
+        with:
+          path: ~/.cache/uv
+          key: ${{ runner.os }}-3.13-uv-${{ hashFiles('pyproject.toml', 'uv.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-3.13-uv-
+
+      - uses: awalsh128/cache-apt-pkgs-action@latest
+        with:
+          packages: git ripgrep build-essential python3-dev python3-setuptools libssl-dev libldap2-dev libsasl2-dev zlib1g-dev libatomic1 python3-minimal gnupg2 curl wget python3-ldap python3-msgpack python3-mutagen python3-regex python3-pycryptodome procps
+          version: 1.1
+
+      - name: Install dependencies with uv
+        run: |
+          uv sync --dev --all-extras --no-sources
+
+      - name: Run plugin tests
+        run: |
+          uv run bash ./bin/test_plugins.sh --no-coverage
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -30,63 +30,33 @@ jobs:
          python-version: ${{ matrix.python }}
          architecture: x64

+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          version: "latest"
+
      - name: Set up Node JS
        uses: actions/setup-node@v4
        with:
-          node-version: 20.10.0
-
-      - name: Setup PDM
-        uses: pdm-project/setup-pdm@v3
-        with:
-          python-version: '3.13'
-          cache: true
+          node-version: 22

      ### Install Python & JS Dependencies
-      - name: Get pip cache dir
-        id: pip-cache
-        run: |
-          echo "::set-output name=dir::$(pip cache dir)"
-
-      - name: Cache pip
+      - name: Cache uv
        uses: actions/cache@v3
-        id: cache-pip
        with:
-          path: ${{ steps.pip-cache.outputs.dir }}
-          key: ${{ runner.os }}-${{ matrix.python }}-venv-${{ hashFiles('setup.py') }}
+          path: ~/.cache/uv
+          key: ${{ runner.os }}-${{ matrix.python }}-uv-${{ hashFiles('pyproject.toml', 'uv.lock') }}
          restore-keys: |
-            ${{ runner.os }}-${{ matrix.python }}-venv-
+            ${{ runner.os }}-${{ matrix.python }}-uv-

      - uses: awalsh128/cache-apt-pkgs-action@latest
        with:
          packages: ripgrep build-essential python3-dev python3-setuptools libssl-dev libldap2-dev libsasl2-dev zlib1g-dev libatomic1 python3-minimal gnupg2 curl wget python3-ldap python3-msgpack python3-mutagen python3-regex python3-pycryptodome procps
          version: 1.0

-      - name: Install pip dependencies
+      - name: Install dependencies with uv
        run: |
-          python -m pip install --upgrade pip setuptools wheel pytest bottle build
-          python -m pip install -r requirements.txt
-          python -m pip install -e .[sonic,ldap]
-
-      - name: Get npm cache dir
-        id: npm-cache
-        run: |
-          echo "::set-output name=dir::$GITHUB_WORKSPACE/node_modules"
-
-      - name: Cache npm
-        uses: actions/cache@v3
-        id: cache-npm
-        with:
-          path: ${{ steps.npm-cache.outputs.dir }}
-          key: ${{ runner.os }}-node_modules-${{ hashFiles('package-lock.json') }}
-          restore-keys: |
-            ${{ runner.os }}-node_modules
-
-      - name: Install npm requirements
-        run: |
-          npm install
-          echo "SINGLEFILE_BINARY=$GITHUB_WORKSPACE/node_modules/.bin/single-file" >> $GITHUB_ENV
-          echo "READABILITY_BINARY=$GITHUB_WORKSPACE/node_modules/.bin/readability-extractor" >> $GITHUB_ENV
-          echo "MERCURY_BINARY=$GITHUB_WORKSPACE/node_modules/.bin/mercury-parser" >> $GITHUB_ENV
+          uv sync --dev --all-extras --no-sources

      ### Run the tests
      - name: Directory listing for debugging
@@ -96,13 +66,20 @@ jobs:

      - name: Archivebox version
        run: |
-          archivebox version
+          mkdir -p tests/out/data
+          DATA_DIR="$PWD/tests/out/data" uv run archivebox version

      - name: Test built package with pytest
        # TODO: remove this exception for windows once we get tests passing on that platform
        if: ${{ !contains(matrix.os, 'windows') }}
        run: |
-          python -m pytest -s --basetemp=tests/out --ignore=archivebox/pkgs
+          mkdir -p tests/out
+          uv run pytest -s archivebox/tests --basetemp=tests/out --ignore=archivebox/pkgs
+
+      - name: Run plugin tests
+        if: ${{ !contains(matrix.os, 'windows') }}
+        run: |
+          uv run bash ./bin/test_plugins.sh --no-coverage

  docker_tests:
    runs-on: ubuntu-latest
--- a/archivebox/core/forms.py
+++ b/archivebox/core/forms.py
@@ -147,8 +147,8 @@ class AddLinkForm(forms.Form):
            'screenshot', 'seo', 'singlefile', 'ssl', 'staticfile', 'title'
        }
        archiving = {
-            'archivedotorg', 'favicon', 'forumdl', 'gallerydl', 'git',
-            'htmltotext', 'media', 'mercury', 'papersdl', 'readability', 'wget'
+            'archivedotorg', 'defuddle', 'favicon', 'forumdl', 'gallerydl', 'git',
+            'htmltotext', 'mercury', 'papersdl', 'readability', 'trafilatura', 'wget', 'ytdlp'
        }
        parsing = {
            'parse_html_urls', 'parse_jsonl_urls',
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -2185,7 +2185,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
 # Snapshot State Machine
 # =============================================================================

-class SnapshotMachine(BaseStateMachine, strict_states=True):
+class SnapshotMachine(BaseStateMachine):
    """
    State machine for managing Snapshot lifecycle.

@@ -3074,7 +3074,7 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
 # ArchiveResult State Machine
 # =============================================================================

-class ArchiveResultMachine(BaseStateMachine, strict_states=True):
+class ArchiveResultMachine(BaseStateMachine):
    """
    State machine for managing ArchiveResult (single plugin execution) lifecycle.

--- a/archivebox/crawls/models.py
+++ b/archivebox/crawls/models.py
@@ -506,7 +506,7 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
 # State Machines
 # =============================================================================

-class CrawlMachine(BaseStateMachine, strict_states=True):
+class CrawlMachine(BaseStateMachine):
    """
    State machine for managing Crawl lifecycle.

--- a/archivebox/hooks.py
+++ b/archivebox/hooks.py
@@ -22,13 +22,13 @@ Execution order:
    - Failed extractors don't block subsequent extractors

 Hook Naming Convention:
-    on_{ModelName}__{run_order}_{description}[.bg].{ext}
+    on_{ModelName}__{run_order}_{description}[.finite.bg|.daemon.bg].{ext}

    Examples:
        on_Snapshot__00_setup.py         # runs first
-        on_Snapshot__10_chrome_tab.bg.js # background (doesn't block)
+        on_Snapshot__10_chrome_tab.daemon.bg.js # background (doesn't block)
        on_Snapshot__50_screenshot.js    # foreground (blocks)
-        on_Snapshot__63_media.bg.py      # background (long-running)
+        on_Snapshot__63_media.finite.bg.py      # background (long-running)

 Dependency handling:
    Extractor plugins that depend on other plugins' output should check at runtime:
@@ -108,19 +108,34 @@ def is_background_hook(hook_name: str) -> bool:
    Background hooks have '.bg.' in their filename before the extension.

    Args:
-        hook_name: Hook filename (e.g., 'on_Snapshot__10_chrome_tab.bg.js')
+        hook_name: Hook filename (e.g., 'on_Snapshot__10_chrome_tab.daemon.bg.js')

    Returns:
        True if background hook, False if foreground.

    Examples:
-        is_background_hook('on_Snapshot__10_chrome_tab.bg.js') -> True
+        is_background_hook('on_Snapshot__10_chrome_tab.daemon.bg.js') -> True
        is_background_hook('on_Snapshot__50_wget.py') -> False
-        is_background_hook('on_Snapshot__63_media.bg.py') -> True
+        is_background_hook('on_Snapshot__63_media.finite.bg.py') -> True
    """
    return '.bg.' in hook_name or '__background' in hook_name


+def iter_plugin_dirs() -> List[Path]:
+    """Iterate over all built-in and user plugin directories."""
+    plugin_dirs: List[Path] = []
+
+    for base_dir in (BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR):
+        if not base_dir.exists():
+            continue
+
+        for plugin_dir in base_dir.iterdir():
+            if plugin_dir.is_dir() and not plugin_dir.name.startswith('_'):
+                plugin_dirs.append(plugin_dir)
+
+    return plugin_dirs
+
+
 class HookResult(TypedDict, total=False):
    """Raw result from run_hook()."""
    returncode: int
@@ -420,7 +435,7 @@ def run_hook(
    output_dir.mkdir(parents=True, exist_ok=True)

    # Detect if this is a background hook (long-running daemon)
-    # New convention: .bg. suffix (e.g., on_Snapshot__21_consolelog.bg.js)
+    # Background hooks use the .daemon.bg. or .finite.bg. filename convention.
    # Old convention: __background in stem (for backwards compatibility)
    is_background = '.bg.' in script.name or '__background' in script.stem

@@ -581,28 +596,20 @@ def run_hooks(
@lru_cache(maxsize=1)
 def get_plugins() -> List[str]:
    """
-    Get list of available plugins by discovering Snapshot hooks.
+    Get list of available plugins by discovering plugin directories.

-    Returns plugin names (directory names) that contain on_Snapshot hooks.
-    The plugin name is the plugin directory name, not the hook script name.
-
-    Example:
-    abx_plugins/plugins/chrome/on_Snapshot__10_chrome_tab.bg.js
-    -> plugin = 'chrome'
-
-    Sorted alphabetically (plugins control their hook order via numeric prefixes in hook names).
+    Returns plugin directory names for any plugin that exposes hooks, config.json,
+    or a standardized templates/icon.html asset. This includes non-extractor
+    plugins such as binary providers and shared base plugins.
    """
    plugins = []

-    for base_dir in (BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR):
-        if not base_dir.exists():
-            continue
-
-        for ext in ('sh', 'py', 'js'):
-            for hook_path in base_dir.glob(f'*/on_Snapshot__*.{ext}'):
-                # Use plugin directory name as plugin name
-                plugin_name = hook_path.parent.name
-                plugins.append(plugin_name)
+    for plugin_dir in iter_plugin_dirs():
+        has_hooks = any(plugin_dir.glob('on_*__*.*'))
+        has_config = (plugin_dir / 'config.json').exists()
+        has_icon = (plugin_dir / 'templates' / 'icon.html').exists()
+        if has_hooks or has_config or has_icon:
+            plugins.append(plugin_dir.name)

    return sorted(set(plugins))

@@ -808,37 +815,31 @@ def discover_plugin_configs() -> Dict[str, Dict[str, Any]]:
    """
    configs = {}

-    for base_dir in (BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR):
-        if not base_dir.exists():
+    for plugin_dir in iter_plugin_dirs():
+
+        config_path = plugin_dir / 'config.json'
+        if not config_path.exists():
            continue

-        for plugin_dir in base_dir.iterdir():
-            if not plugin_dir.is_dir():
+        try:
+            with open(config_path, 'r') as f:
+                schema = json.load(f)
+
+            # Basic validation: must be an object with properties
+            if not isinstance(schema, dict):
+                continue
+            if schema.get('type') != 'object':
+                continue
+            if 'properties' not in schema:
                continue

-            config_path = plugin_dir / 'config.json'
-            if not config_path.exists():
-                continue
+            configs[plugin_dir.name] = schema

-            try:
-                with open(config_path, 'r') as f:
-                    schema = json.load(f)
-
-                # Basic validation: must be an object with properties
-                if not isinstance(schema, dict):
-                    continue
-                if schema.get('type') != 'object':
-                    continue
-                if 'properties' not in schema:
-                    continue
-
-                configs[plugin_dir.name] = schema
-
-            except (json.JSONDecodeError, OSError) as e:
-                # Log warning but continue - malformed config shouldn't break discovery
-                import sys
-                print(f"Warning: Failed to load config.json from {plugin_dir.name}: {e}", file=sys.stderr)
-                continue
+        except (json.JSONDecodeError, OSError) as e:
+            # Log warning but continue - malformed config shouldn't break discovery
+            import sys
+            print(f"Warning: Failed to load config.json from {plugin_dir.name}: {e}", file=sys.stderr)
+            continue

    return configs

@@ -1002,20 +1003,13 @@ def get_plugin_template(plugin: str, template_name: str, fallback: bool = True)
    if base_name in ('yt-dlp', 'youtube-dl'):
        base_name = 'ytdlp'

-    for base_dir in (BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR):
-        if not base_dir.exists():
-            continue
+    for plugin_dir in iter_plugin_dirs():

-        # Look for plugin directory matching plugin name
-        for plugin_dir in base_dir.iterdir():
-            if not plugin_dir.is_dir():
-                continue
-
-            # Match by directory name (exact or partial)
-            if plugin_dir.name == base_name or plugin_dir.name.endswith(f'_{base_name}'):
-                template_path = plugin_dir / 'templates' / f'{template_name}.html'
-                if template_path.exists():
-                    return template_path.read_text()
+        # Match by directory name (exact or partial)
+        if plugin_dir.name == base_name or plugin_dir.name.endswith(f'_{base_name}'):
+            template_path = plugin_dir / 'templates' / f'{template_name}.html'
+            if template_path.exists():
+                return template_path.read_text()

    # Fall back to default template if requested
    if fallback:
@@ -1068,25 +1062,19 @@ def discover_plugin_templates() -> Dict[str, Dict[str, str]]:
    """
    templates: Dict[str, Dict[str, str]] = {}

-    for base_dir in (BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR):
-        if not base_dir.exists():
+    for plugin_dir in iter_plugin_dirs():
+
+        templates_dir = plugin_dir / 'templates'
+        if not templates_dir.exists():
            continue

-        for plugin_dir in base_dir.iterdir():
-            if not plugin_dir.is_dir():
-                continue
+        plugin_templates = {}
+        for template_file in templates_dir.glob('*.html'):
+            template_name = template_file.stem  # icon, card, full
+            plugin_templates[template_name] = str(template_file)

-            templates_dir = plugin_dir / 'templates'
-            if not templates_dir.exists():
-                continue
-
-            plugin_templates = {}
-            for template_file in templates_dir.glob('*.html'):
-                template_name = template_file.stem  # icon, card, full
-                plugin_templates[template_name] = str(template_file)
-
-            if plugin_templates:
-                templates[plugin_dir.name] = plugin_templates
+        if plugin_templates:
+            templates[plugin_dir.name] = plugin_templates

    return templates

--- a/archivebox/machine/migrations/0001_initial.py
+++ b/archivebox/machine/migrations/0001_initial.py
@@ -169,7 +169,7 @@ class Migration(migrations.Migration):
                        ('modified_at', models.DateTimeField(auto_now=True)),
                        ('name', models.CharField(blank=True, db_index=True, default='', max_length=63)),
                        ('binproviders', models.CharField(blank=True, default='env', help_text='Comma-separated list of allowed providers: apt,brew,pip,npm,env', max_length=127)),
-                        ('overrides', models.JSONField(blank=True, default=dict, help_text="Provider-specific overrides: {'apt': {'packages': ['pkg']}, ...}")),
+                        ('overrides', models.JSONField(blank=True, default=dict, help_text="Provider-specific overrides: {'apt': {'install_args': ['pkg']}, ...}")),
                        ('binprovider', models.CharField(blank=True, default='', help_text='Provider that successfully installed this binary', max_length=31)),
                        ('abspath', models.CharField(blank=True, default='', max_length=255)),
                        ('version', models.CharField(blank=True, default='', max_length=32)),
--- a/archivebox/machine/models.py
+++ b/archivebox/machine/models.py
@@ -227,7 +227,7 @@ class Binary(ModelWithHealthStats, ModelWithStateMachine):
    binproviders = models.CharField(max_length=127, default='env', null=False, blank=True,
        help_text="Comma-separated list of allowed providers: apt,brew,pip,npm,env")
    overrides = models.JSONField(default=dict, blank=True,
-        help_text="Provider-specific overrides: {'apt': {'packages': ['pkg']}, ...}")
+        help_text="Provider-specific overrides: {'apt': {'install_args': ['pkg']}, ...}")

    # Installation results (populated after installation)
    binprovider = models.CharField(max_length=31, default='', null=False, blank=True,
@@ -2042,7 +2042,7 @@ class Process(models.Model):
 # Binary State Machine
 # =============================================================================

-class BinaryMachine(BaseStateMachine, strict_states=True):
+class BinaryMachine(BaseStateMachine):
    """
    State machine for managing Binary installation lifecycle.

@@ -2133,7 +2133,7 @@ class BinaryMachine(BaseStateMachine, strict_states=True):
 # Process State Machine
 # =============================================================================

-class ProcessMachine(BaseStateMachine, strict_states=True):
+class ProcessMachine(BaseStateMachine):
    """
    State machine for managing Process (OS subprocess) lifecycle.

--- a/archivebox/tests/test_cli_run_binary_worker.py
+++ b/archivebox/tests/test_cli_run_binary_worker.py
@@ -129,7 +129,7 @@ class TestBinaryWorkerHooks:
    """Tests for specific Binary hook providers."""

    def test_env_provider_hook_detects_system_binary(self, initialized_archive):
-        """on_Binary__15_env_install.py hook detects system binaries."""
+        """on_Binary__15_env_discover.py hook detects system binaries."""
        binary_record = {
            'type': 'Binary',
            'name': 'python3',
--- a/archivebox/tests/test_hooks.py
+++ b/archivebox/tests/test_hooks.py
@@ -27,39 +27,33 @@ class TestBackgroundHookDetection(unittest.TestCase):

    def test_bg_js_suffix_detected(self):
        """Hooks with .bg.js suffix should be detected as background."""
-        script = Path('/path/to/on_Snapshot__21_consolelog.bg.js')
-        is_background = '.bg.' in script.name or '__background' in script.stem
-        self.assertTrue(is_background)
+        from archivebox.hooks import is_background_hook
+        self.assertTrue(is_background_hook('on_Snapshot__21_consolelog.daemon.bg.js'))

    def test_bg_py_suffix_detected(self):
        """Hooks with .bg.py suffix should be detected as background."""
-        script = Path('/path/to/on_Snapshot__24_responses.bg.py')
-        is_background = '.bg.' in script.name or '__background' in script.stem
-        self.assertTrue(is_background)
+        from archivebox.hooks import is_background_hook
+        self.assertTrue(is_background_hook('on_Snapshot__24_responses.finite.bg.py'))

    def test_bg_sh_suffix_detected(self):
        """Hooks with .bg.sh suffix should be detected as background."""
-        script = Path('/path/to/on_Snapshot__23_ssl.bg.sh')
-        is_background = '.bg.' in script.name or '__background' in script.stem
-        self.assertTrue(is_background)
+        from archivebox.hooks import is_background_hook
+        self.assertTrue(is_background_hook('on_Snapshot__23_ssl.daemon.bg.sh'))

    def test_legacy_background_suffix_detected(self):
        """Hooks with __background in stem should be detected (backwards compat)."""
-        script = Path('/path/to/on_Snapshot__21_consolelog__background.js')
-        is_background = '.bg.' in script.name or '__background' in script.stem
-        self.assertTrue(is_background)
+        from archivebox.hooks import is_background_hook
+        self.assertTrue(is_background_hook('on_Snapshot__21_consolelog__background.js'))

    def test_foreground_hook_not_detected(self):
        """Hooks without .bg. or __background should NOT be detected as background."""
-        script = Path('/path/to/on_Snapshot__11_favicon.js')
-        is_background = '.bg.' in script.name or '__background' in script.stem
-        self.assertFalse(is_background)
+        from archivebox.hooks import is_background_hook
+        self.assertFalse(is_background_hook('on_Snapshot__11_favicon.js'))

    def test_foreground_py_hook_not_detected(self):
        """Python hooks without .bg. should NOT be detected as background."""
-        script = Path('/path/to/on_Snapshot__50_wget.py')
-        is_background = '.bg.' in script.name or '__background' in script.stem
-        self.assertFalse(is_background)
+        from archivebox.hooks import is_background_hook
+        self.assertFalse(is_background_hook('on_Snapshot__50_wget.py'))


 class TestJSONLParsing(unittest.TestCase):
@@ -182,15 +176,15 @@ class TestHookDiscovery(unittest.TestCase):
        wget_dir = self.plugins_dir / 'wget'
        wget_dir.mkdir()
        (wget_dir / 'on_Snapshot__50_wget.py').write_text('# test hook')
-        (wget_dir / 'on_Crawl__00_install_wget.py').write_text('# install hook')
+        (wget_dir / 'on_Crawl__10_wget_install.finite.bg.py').write_text('# install hook')

        chrome_dir = self.plugins_dir / 'chrome'
        chrome_dir.mkdir()
-        (chrome_dir / 'on_Snapshot__20_chrome_tab.bg.js').write_text('// background hook')
+        (chrome_dir / 'on_Snapshot__20_chrome_tab.daemon.bg.js').write_text('// background hook')

        consolelog_dir = self.plugins_dir / 'consolelog'
        consolelog_dir.mkdir()
-        (consolelog_dir / 'on_Snapshot__21_consolelog.bg.js').write_text('// background hook')
+        (consolelog_dir / 'on_Snapshot__21_consolelog.daemon.bg.js').write_text('// background hook')

    def tearDown(self):
        """Clean up test directory."""
@@ -208,8 +202,8 @@ class TestHookDiscovery(unittest.TestCase):

        self.assertEqual(len(hooks), 3)
        hook_names = [h.name for h in hooks]
-        self.assertIn('on_Snapshot__20_chrome_tab.bg.js', hook_names)
-        self.assertIn('on_Snapshot__21_consolelog.bg.js', hook_names)
+        self.assertIn('on_Snapshot__20_chrome_tab.daemon.bg.js', hook_names)
+        self.assertIn('on_Snapshot__21_consolelog.daemon.bg.js', hook_names)
        self.assertIn('on_Snapshot__50_wget.py', hook_names)

    def test_discover_hooks_sorted_by_name(self):
@@ -222,10 +216,25 @@ class TestHookDiscovery(unittest.TestCase):
        hooks = sorted(set(hooks), key=lambda p: p.name)

        # Check numeric ordering
-        self.assertEqual(hooks[0].name, 'on_Snapshot__20_chrome_tab.bg.js')
-        self.assertEqual(hooks[1].name, 'on_Snapshot__21_consolelog.bg.js')
+        self.assertEqual(hooks[0].name, 'on_Snapshot__20_chrome_tab.daemon.bg.js')
+        self.assertEqual(hooks[1].name, 'on_Snapshot__21_consolelog.daemon.bg.js')
        self.assertEqual(hooks[2].name, 'on_Snapshot__50_wget.py')

+    def test_get_plugins_includes_non_snapshot_plugin_dirs(self):
+        """get_plugins() should include binary-only plugins with standardized metadata."""
+        env_dir = self.plugins_dir / 'env'
+        env_dir.mkdir()
+        (env_dir / 'on_Binary__15_env_discover.py').write_text('# binary hook')
+        (env_dir / 'config.json').write_text('{"type": "object", "properties": {}}')
+
+        from archivebox import hooks as hooks_module
+
+        hooks_module.get_plugins.cache_clear()
+        with patch.object(hooks_module, 'BUILTIN_PLUGINS_DIR', self.plugins_dir), patch.object(hooks_module, 'USER_PLUGINS_DIR', self.test_dir / 'user_plugins'):
+            plugins = hooks_module.get_plugins()
+
+        self.assertIn('env', plugins)
+

 class TestGetExtractorName(unittest.TestCase):
    """Test get_extractor_name() function."""
--- a/archivebox/workers/models.py
+++ b/archivebox/workers/models.py
@@ -338,7 +338,7 @@ class BaseStateMachine(StateMachine):
    (e.g., 'snapshot', 'archiveresult', 'crawl', 'binary').

    Example usage:
-        class SnapshotMachine(BaseStateMachine, strict_states=True):
+        class SnapshotMachine(BaseStateMachine):
            model_attr_name = 'snapshot'

            # States and transitions...
--- a/bin/test_plugins.sh
+++ b/bin/test_plugins.sh
@@ -21,7 +21,7 @@
 #   coverage json
 #   ./bin/test_plugins.sh --coverage-report

-set -e
+set -euo pipefail

 # Color codes
 GREEN='\033[0;32m'
@@ -31,6 +31,7 @@ NC='\033[0m' # No Color

 # Save root directory first
 ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
+PLUGINS_DIR="${ABX_PLUGINS_DIR:-$(python3 -c 'from abx_plugins import get_plugins_dir; print(get_plugins_dir())')}"

 # Parse arguments
 PLUGIN_FILTER=""
@@ -49,7 +50,8 @@ done

 # Function to show JS coverage report (inlined from convert_v8_coverage.js)
 show_js_coverage() {
-    local coverage_dir="$1"
+    local plugin_root="$1"
+    local coverage_dir="$2"

    if [ ! -d "$coverage_dir" ] || [ -z "$(ls -A "$coverage_dir" 2>/dev/null)" ]; then
        echo "No JavaScript coverage data collected"
@@ -57,10 +59,11 @@ show_js_coverage() {
        return
    fi

-    node - "$coverage_dir" << 'ENDJS'
+    node - "$plugin_root" "$coverage_dir" << 'ENDJS'
 const fs = require('fs');
 const path = require('path');
-const coverageDir = process.argv[2];
+const pluginRoot = path.resolve(process.argv[2]).replace(/\\/g, '/');
+const coverageDir = process.argv[3];

 const files = fs.readdirSync(coverageDir).filter(f => f.startsWith('coverage-') && f.endsWith('.json'));
 if (files.length === 0) {
@@ -90,8 +93,8 @@ files.forEach(file => {
 });

 const allFiles = Object.keys(coverageByFile).sort();
-const pluginFiles = allFiles.filter(url => url.includes('archivebox/plugins'));
-const otherFiles = allFiles.filter(url => !url.startsWith('node:') && !url.includes('archivebox/plugins'));
+const pluginFiles = allFiles.filter(url => url.replace(/\\/g, '/').includes(pluginRoot));
+const otherFiles = allFiles.filter(url => !url.startsWith('node:') && !url.replace(/\\/g, '/').includes(pluginRoot));

 console.log('Total files with coverage: ' + allFiles.length + '\n');
 console.log('Plugin files: ' + pluginFiles.length);
@@ -118,8 +121,8 @@ let totalRanges = 0, totalExecuted = 0;
 pluginFiles.forEach(url => {
    const cov = coverageByFile[url];
    const pct = cov.totalRanges > 0 ? (cov.executedRanges / cov.totalRanges * 100).toFixed(1) : '0.0';
-    const match = url.match(/archivebox\/plugins\/.+/);
-    const displayPath = match ? match[0] : url;
+    const normalizedUrl = url.replace(/\\/g, '/');
+    const displayPath = normalizedUrl.includes(pluginRoot) ? normalizedUrl.slice(normalizedUrl.indexOf(pluginRoot)) : url;
    console.log(displayPath + ': ' + pct + '% (' + cov.executedRanges + '/' + cov.totalRanges + ' ranges)');
    totalRanges += cov.totalRanges;
    totalExecuted += cov.executedRanges;
@@ -139,17 +142,17 @@ if [ "$COVERAGE_REPORT_ONLY" = true ]; then
    echo "Python Coverage Summary"
    echo "=========================================="
    coverage combine 2>/dev/null || true
-    coverage report --include="archivebox/plugins/*" --omit="*/tests/*"
+    coverage report --include="*/abx_plugins/plugins/*" --omit="*/tests/*"
    echo ""

    echo "=========================================="
    echo "JavaScript Coverage Summary"
    echo "=========================================="
-    show_js_coverage "$ROOT_DIR/coverage/js"
+    show_js_coverage "$PLUGINS_DIR" "$ROOT_DIR/coverage/js"
    echo ""

    echo "For detailed coverage reports:"
-    echo "  Python:     coverage report --show-missing --include='archivebox/plugins/*' --omit='*/tests/*'"
+    echo "  Python:     coverage report --show-missing --include='*/abx_plugins/plugins/*' --omit='*/tests/*'"
    echo "  Python:     coverage json  # LLM-friendly format"
    echo "  Python:     coverage html  # Interactive HTML report"
    exit 0
@@ -157,7 +160,7 @@ fi

 # Set DATA_DIR for tests (required by abx_pkg and plugins)
 # Use temp dir to isolate tests from project files
-if [ -z "$DATA_DIR" ]; then
+if [ -z "${DATA_DIR:-}" ]; then
    export DATA_DIR=$(mktemp -d -t archivebox_plugin_tests.XXXXXX)
    # Clean up on exit
    trap "rm -rf '$DATA_DIR'" EXIT
@@ -173,7 +176,7 @@ if [ "$ENABLE_COVERAGE" = true ]; then

    # Enable Python subprocess coverage
    export COVERAGE_PROCESS_START="$ROOT_DIR/pyproject.toml"
-    export PYTHONPATH="$ROOT_DIR:$PYTHONPATH"  # For sitecustomize.py
+    export PYTHONPATH="$ROOT_DIR${PYTHONPATH:+:$PYTHONPATH}"  # For sitecustomize.py

    # Enable Node.js V8 coverage (built-in, no packages needed)
    export NODE_V8_COVERAGE="$ROOT_DIR/coverage/js"
@@ -183,8 +186,7 @@ if [ "$ENABLE_COVERAGE" = true ]; then
    echo ""
 fi

-# Change to plugins directory
-cd "$ROOT_DIR/archivebox/plugins" || exit 1
+cd "$ROOT_DIR" || exit 1

 echo "=========================================="
 echo "ArchiveBox Plugin Tests"
@@ -212,10 +214,10 @@ FAILED_PLUGINS=0
 # Find and run plugin tests
 if [ -n "$PLUGIN_FILTER" ]; then
    # Run tests for specific plugin(s) matching pattern
-    TEST_DIRS=$(find . -maxdepth 2 -type d -path "./${PLUGIN_FILTER}*/tests" 2>/dev/null | sort)
+    TEST_DIRS=$(find "$PLUGINS_DIR" -maxdepth 2 -type d -path "$PLUGINS_DIR/${PLUGIN_FILTER}*/tests" 2>/dev/null | sort)
 else
    # Run all plugin tests
-    TEST_DIRS=$(find . -maxdepth 2 -type d -name "tests" -path "./*/tests" 2>/dev/null | sort)
+    TEST_DIRS=$(find "$PLUGINS_DIR" -maxdepth 2 -type d -name "tests" -path "$PLUGINS_DIR/*/tests" 2>/dev/null | sort)
 fi

 if [ -z "$TEST_DIRS" ]; then
@@ -230,26 +232,35 @@ for test_dir in $TEST_DIRS; do
        continue
    fi

-    plugin_name=$(basename $(dirname "$test_dir"))
+    plugin_name=$(basename "$(dirname "$test_dir")")
    TOTAL_PLUGINS=$((TOTAL_PLUGINS + 1))

    echo -e "${YELLOW}[RUNNING]${NC} $plugin_name"

    # Build pytest command with optional coverage
-    PYTEST_CMD="python -m pytest $test_dir -p no:django -v --tb=short"
+    PYTEST_CMD=(python -m pytest "$test_dir" -p no:django -v --tb=short)
    if [ "$ENABLE_COVERAGE" = true ]; then
-        PYTEST_CMD="$PYTEST_CMD --cov=$plugin_name --cov-append --cov-branch"
+        PYTEST_CMD+=(--cov="$(dirname "$test_dir")" --cov-append --cov-branch)
        echo "[DEBUG] NODE_V8_COVERAGE before pytest: $NODE_V8_COVERAGE"
        python -c "import os; print('[DEBUG BASH->PYTHON] NODE_V8_COVERAGE:', os.environ.get('NODE_V8_COVERAGE', 'NOT_SET'))"
    fi

-    if eval "$PYTEST_CMD" 2>&1 | grep -v "^platform\|^cachedir\|^rootdir\|^configfile\|^plugins:" | tail -100; then
+    LOG_FILE=$(mktemp -t "archivebox_plugin_${plugin_name}.XXXXXX.log")
+    PLUGIN_TMPDIR=$(mktemp -d -t "archivebox_plugin_${plugin_name}.XXXXXX")
+    if (
+        cd "$PLUGIN_TMPDIR"
+        TMPDIR="$PLUGIN_TMPDIR" "${PYTEST_CMD[@]}"
+    ) >"$LOG_FILE" 2>&1; then
+        grep -v "^platform\|^cachedir\|^rootdir\|^configfile\|^plugins:" "$LOG_FILE" | tail -100
        echo -e "${GREEN}[PASSED]${NC} $plugin_name"
        PASSED_PLUGINS=$((PASSED_PLUGINS + 1))
    else
+        grep -v "^platform\|^cachedir\|^rootdir\|^configfile\|^plugins:" "$LOG_FILE" | tail -100
        echo -e "${RED}[FAILED]${NC} $plugin_name"
        FAILED_PLUGINS=$((FAILED_PLUGINS + 1))
    fi
+    rm -f "$LOG_FILE"
+    rm -rf "$PLUGIN_TMPDIR"
    echo ""
 done

@@ -277,21 +288,18 @@ elif [ $FAILED_PLUGINS -eq 0 ]; then
        # Coverage data is in ROOT_DIR, combine and report from there
        cd "$ROOT_DIR" || exit 1
        # Copy coverage data from plugins dir if it exists
-        if [ -f "$ROOT_DIR/archivebox/plugins/.coverage" ]; then
-            cp "$ROOT_DIR/archivebox/plugins/.coverage" "$ROOT_DIR/.coverage"
-        fi
        coverage combine 2>/dev/null || true
-        coverage report --include="archivebox/plugins/*" --omit="*/tests/*" 2>&1 | head -50
+        coverage report --include="*/abx_plugins/plugins/*" --omit="*/tests/*" 2>&1 | head -50
        echo ""

        echo "=========================================="
        echo "JavaScript Coverage Summary"
        echo "=========================================="
-        show_js_coverage "$ROOT_DIR/coverage/js"
+        show_js_coverage "$PLUGINS_DIR" "$ROOT_DIR/coverage/js"
        echo ""

        echo "For detailed coverage reports (from project root):"
-        echo "  Python:     coverage report --show-missing --include='archivebox/plugins/*' --omit='*/tests/*'"
+        echo "  Python:     coverage report --show-missing --include='*/abx_plugins/plugins/*' --omit='*/tests/*'"
        echo "  Python:     coverage json  # LLM-friendly format"
        echo "  Python:     coverage html  # Interactive HTML report"
        echo "  JavaScript: ./bin/test_plugins.sh --coverage-report"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -83,8 +83,8 @@ dependencies = [
    ### Extractor dependencies (optional binaries detected at runtime via shutil.which)
    "yt-dlp>=2024.1.0",      # for: media extractor
    ### Binary/Package Management
-    "abx-pkg>=0.1.0",        # for: detecting, versioning, and installing binaries via apt/brew/pip/npm
-    "abx-plugins>=0.1.0",    # shared plugin package (sourced from uv workspace in local dev)
+    "abx-pkg>=0.7.0",        # for: detecting, versioning, and installing binaries via apt/brew/pip/npm
+    "abx-plugins>=0.9.5",    # shared ArchiveBox plugin package with install_args-only overrides
    "gallery-dl>=1.31.1",
    ### UUID7 backport for Python <3.14
    "uuid7>=0.1.0; python_version < '3.14'",  # for: uuid7 support on Python 3.13 (provides uuid_extensions module)
@@ -165,9 +165,6 @@ package = true
 python-version = "3.13"
 # compile-bytecode = true

-[tool.uv.sources]
-abx-plugins = { workspace = true }
-
 [build-system]
 requires = ["pdm-backend"]
 build-backend = "pdm.backend"
@@ -188,9 +185,9 @@ exclude = ["*.pyi", "typings/", "migrations/"]
 ignore = ["E731", "E303", "E266", "E241", "E222"]

 [tool.pytest.ini_options]
-testpaths = [ "tests" ]
+testpaths = [ "archivebox/tests" ]
 DJANGO_SETTINGS_MODULE = "archivebox.core.settings"
-# Note: Plugin tests under archivebox/plugins/ must NOT load Django
+# Note: Plugin tests under abx_plugins/plugins/ must NOT load Django
 # They use a conftest.py to disable Django automatically

 [tool.coverage.run]
--- a/uv.lock
+++ b/uv.lock